!     ===========================================
!     This program calculates the ion-ion monopole
!     interaction energy and force corrections
!     using the ewald summation technich 
!     The madelung matrix is calculated explicit
!     and used for ewald energy and force calculations
!     ===========================================

      module Madelung_module

!     locals 
      integer, save, private :: nlmax,ngmax
      real*8, save, private :: lamda,rmax,gmax,ws
      real*8, save, pointer :: gvector(:,:),lvector(:,:) ! 3,ngmax and 3,nlmax
      logical, save, private :: init = .true.

      contains
      subroutine ewald_madl(posion,volc,nions,recc,          & 
           dirc,nspec,nionsp,ngx,ngy,ngz,lpctx,lpcty,lpctz,  & 
           vmadl,ew_force,ew_energy,nconso,icharge,partial,  &  
           calculate_stress )


      implicit none
      
!     input
      integer ngx,ngy,ngz,lpctx(ngx),lpcty(ngy),lpctz(ngz)
      integer nions,nspec,nionsp(nspec),nconso
      real*8 recc(3,3),dirc(3,3),posion(3,nions,nspec),volc
      integer ,optional  :: icharge(nions)
      real*8 ,optional   :: partial(nions)
      logical*4,optional :: calculate_stress

!     locals
      real*8 for(3,nions,nions,nions),rions(3,nions),pi,x

!     output
      real*8 vmadl(nions,nions),ew_force(3,nions),ew_energy

      pi = acos(-1.d0)


      if(init)then
         call ewaldpar(pi,nions,volc,ws,lamda,rmax,gmax)

         call g_and_lvector(gmax,rmax,recc,ngx,ngy,ngz,lpctx,lpcty,&
              lpctz,dirc,lvector,nlmax,ngmax,gvector,volc)
         init = .false.
      endif

      call posion2rions(posion,dirc,nspec,&
           nionsp,nions,rions)
      
      call madelung(rmax,rions,nions,lamda,lvector&
           ,gvector,nlmax,ngmax,volc,pi,vmadl,for)
      
      call force_energy_madl(nions,icharge,partial,vmadl, & 
           for,ew_force,ew_energy)


!     calculate stress on unitcell from ion-ion interaction
      if (present(calculate_stress)) then 
         call madelung_stress(rmax,rions,nions,lamda,lvector & 
           ,gvector,nlmax,ngmax,volc,pi,icharge) 
      endif
        
      end subroutine ewald_madl
 
!     ==========================================
!     Define the ewald parameter lamda
!     ==========================================
      subroutine ewaldpar(pi,nions,volc,ws,lamda,rmax,gmax)


      implicit none
      integer nions
      real*8 alphamax,lamda,betamax,rmax,gmax
      real*8 pi,ws,volc

      parameter(alphamax = 60.d0)              
!     alpha is determined by the convergens of erfc(alpha)

      parameter(betamax = 60.d0)                 
!     beta is determined by the convergens of exp(betasq)/betasq

!     The Wigner-Seitz radius 
      ws=((3.d0/4.d0)*volc/pi/nions)**(1.d0/3.d0)

!     The Ewald parameter lamda
      lamda = 1.d0/ws*(3.d0/4.d0*pi**2.d0)**(1.d0/6.d0)

!     The maximum lenghts of the g- and l-vectors
      rmax = alphamax/lamda
      gmax = 2.d0*lamda*betamax
      
      return 

      end subroutine ewaldpar
!     ========================================
!     ========================================
!     Here is the g-vector and lattice vectors (l-vector)
!     used in the ewald summation found
!     ========================================

      subroutine g_and_lvector(gmax,rmax,recc,ngx,ngy,ngz,&
           lpctx,lpcty,lpctz,dirc,lvector,&
           nlmax,ngmax,gvector,volc)

!     ==========================================

 
      implicit none

!     input
      integer ngx,ngy,ngz,lpctx(ngx),lpcty(ngy),lpctz(ngz) 
      real*8 recc(3,3),dirc(3,3),volc,rmax,gmax

!     output
      integer ngmax,nlmax
      real*8,pointer :: lvector(:,:) !3,nlmax
      real*8,pointer :: gvector(:,:) !3,ngmax

!     local use only
      integer n,nn,ng,h,i,j,k,num,a,b,c,con,axbxc,nnn
      real*8 gxx,gxy,gxz,gyx,gyy,gyz,gzx,gzy,gzz
      real*8 gx,gy,gz,sqgx,sqgy,sqgz,norm
      real*8 normbas(3)
      real*8 a1xa3(3),a1xa2(3),a2xa3(3)
      real*8 num1,num2,num3,norma1xa2,norma1xa3,norma2xa3
      real*8 numa,numb,numc
      real*8 l(3,3),lx,ly,lz,norml
      real*8, allocatable ::gwork(:,:) ! 3,axbxc

!     ==========================================
!     in reciprocal space find the g-vectors with
!     norm less than gmax
!     ===========================================

      nnn = ngx*ngy*ngz
      allocate(gwork(3,nnn))
      ng = 0
      do h = 1,ngx
         gxx = recc(1,1)*lpctx(h)
         gxy = recc(1,2)*lpctx(h)
         gxz = recc(1,3)*lpctx(h)
         do i = 1,ngy
            gyx = recc(2,1)*lpcty(i)
            gyy = recc(2,2)*lpcty(i)
            gyz = recc(2,3)*lpcty(i)
            do j = 1,ngz
               gzx = recc(3,1)*lpctz(j)
               gzy = recc(3,2)*lpctz(j)
               gzz = recc(3,3)*lpctz(j)
               gx = gxx + gyx + gzx
               gy = gxy + gyy + gzy
               gz = gxz + gyz + gzz 
               sqgx = (gxx + gyx + gzx)**2
               sqgy = (gxy + gyy + gzy)**2
               sqgz = (gxz + gyz + gzz)**2
               norm = sqgx + sqgy +sqgz
               if (norm .lt. gmax .and. norm .gt. 0.d0) then
                  ng = ng + 1
                  gwork(1,ng) = gx ! the g-vectors of relavance
                  gwork(2,ng) = gy
                  gwork(3,ng) = gz
               endif 
            enddo
         enddo
      enddo
      ngmax = ng
      allocate(gvector(3,ngmax))
      do i = 1,ngmax
         gvector(:,i)=gwork(:,i)
      enddo
      deallocate(gwork)

!     ===========================================================
!     Find the basis-vectors and calculate the needed 
!     max number of reapated supercells (num) in real space given 
!     rcutoff. Notice that min(num) = 1
!     ===========================================================

      do i =1,3
         normbas(i) = sqrt(sum(dirc(i,:)*dirc(i,:)))
      enddo
!     The cross product of the basis vectors
      a1xa3(1) = dirc(1,2)*dirc(3,3)-dirc(1,3)*dirc(3,2)
      a1xa3(2) = dirc(1,3)*dirc(3,1)-dirc(1,1)*dirc(3,3)
      a1xa3(3) = dirc(1,1)*dirc(3,2)-dirc(1,2)*dirc(3,1)
      a1xa2(1) = dirc(1,2)*dirc(2,3)-dirc(1,3)*dirc(2,2)
      a1xa2(2) = dirc(1,3)*dirc(2,1)-dirc(1,1)*dirc(2,3)
      a1xa2(3) = dirc(1,1)*dirc(2,2)-dirc(1,2)*dirc(2,1)
      a2xa3(1) = dirc(2,2)*dirc(3,3)-dirc(2,3)*dirc(3,2)
      a2xa3(2) = dirc(2,3)*dirc(3,1)-dirc(2,1)*dirc(3,3)
      a2xa3(3) = dirc(2,1)*dirc(3,2)-dirc(2,2)*dirc(3,1)
      norma2xa3 = sqrt(sum(a2xa3(:)*a2xa3(:)))
      norma1xa3 = sqrt(sum(a1xa3(:)*a1xa3(:)))
      norma1xa2 = sqrt(sum(a1xa2(:)*a1xa2(:)))
!     The number of repeated supercells in the 3 directions
      num1 = rmax*norma2xa3/volc
      num2 = rmax*norma1xa3/volc 
      num3 = rmax*norma1xa2/volc
      numa = max(1,int(num1+1))
      numb = max(1,int(num2+1))
      numc = max(1,int(num3+1))

      nlmax = (2*numa+1)*(2*numb+1)*(2*numc+1)
      allocate (lvector(3,nlmax))
      con = 1
      do i = -numa,numa
         a = i
         do j = -numb,numb
            b = j 
            do h = -numc,numc
               c = h
               if(a == 0 .and. b == 0 .and. c == 0)cycle
               con = con+1
               l(1,:) = dirc(1,:) * a
               l(2,:) = dirc(2,:) * b
               l(3,:) = dirc(3,:) * c
               lx = l(1,1) + l(2,1) + l(3,1)
               ly = l(1,2) + l(2,2) + l(3,2)
               lz = l(1,3) + l(2,3) + l(3,3)
               lvector(1,con) = lx
               lvector(2,con) = ly
               lvector(3,con) = lz
            enddo
         enddo
      enddo
      
      
    
      lvector(:,1) = 0.d0
      
      return 
      end subroutine g_and_lvector


!     ==========================================
!     get xyz-positions of the ions in rions(3,ion)     
!     ===========================================

      subroutine posion2rions(posion,dirc,nspec,nionsp,&
           nions,rions)


      implicit none
      
      integer nspec,nions,nionsp(nions),n,i,j
      real*8 dirc(3,3),posion(3,nions,nspec),rions(3,nions)
      real*8 rion(3)

      
      n = 0
      do i = 1,nspec
         do j = 1,nionsp(i)
            n = 1 + n
            call xyzpos(posion(1,j,i),dirc,rion)
            rions(:,n) = rion(:)
         enddo
      enddo

      return
      end subroutine posion2rions





!     =======================================================
!     This subroutine calculates the madelung matrix
!     this is based on a subroutine made by prof. H. Skriver. 
!     =======================================================
      subroutine madelung(rmax,rions,nions,lamda,lvector&
           ,gvector,nlmax,ngmax,volc,pi,vmadl,for)

      implicit none

!     input
      integer nions,ngmax,nlmax
      real*8 rions(3,nions),lamda,rmax,pi,volc
      real*8 lvector(3,nlmax),gvector(3,ngmax)
 
!     output 
      real*8 vmadl(nions,nions),for(3,nions,nions)

!     local use only
      integer h,i,j,k,ni,hp
      real*8 sumog1,sumog2,sumor1,sumor2,gnorm
      real*8 sum1,sum2,sum3,sum4,lnorm,distr1r2(3)
      real*8 betasq,alpha,erfca,derfca,distsq,dist(3)
      real*8 gdotr,expb,factor,normdist
      real*8 sumogde(3),sumorde(3)
      external erfc
      real*8 erfc
!     ==========================================
!
!     The sum over G space for R1=R2 and l NE 0
!     ==========================================
      factor = pi/volc/(lamda**2.d0)
            

      sumog1=0.d0
      do i=1,ngmax
         gnorm = sqrt(sum(gvector(:,i)*gvector(:,i)))
         betasq=(gnorm**2.d0)/4.d0/(lamda**2.d0)
         sumog1=sumog1+exp(-betasq)/betasq
      enddo

      sum1=factor*(sumog1-1.0d0)

!     ==========================================
!     The sum over R space for R1=R2 and l NE 0
!     ==========================================

      sumor1=0.d0
      do i=2,nlmax
         lnorm = sqrt(sum(lvector(:,i)*lvector(:,i)))
         if(lnorm .gt. rmax)cycle
         alpha = lamda*lnorm    ! no units
         erfca = erfc(alpha)    ! the complementary erro function
!         write(*,*) 'erfc_alpha',erfca,alpha
         sumor1 = sumor1+erfca/lnorm
      enddo

!      write(*,*) 'sumor1',sumor1

      sum2=sumor1-2.d0*lamda/sqrt(pi)

      do i=1,nions
         vmadl(i,i)=sum1+sum2
!         write(*,*) 'vmadl1',vmadl(i,i)
      enddo
      

!     =======================================================
!     The contribution from interactions between different atoms
!     in real and g-space R1#R2 and g#0 
!     ======================================================
     
       if(nions .gt. 1)then 
         
         
         ni = nions - 1         ! notice that h and i never are equal 
         do h= 1,ni
            hp = h+1
            do i = hp,nions
!     sum over real-space R1 # R2
               sumor2 = 0.d0
               sumorde = 0.d0
               do j = 1,nlmax
                  dist(:) = rions(:,h)-rions(:,i)+lvector(:,j)
                  normdist = sqrt(sum(dist(:)*dist(:)))
                  if(normdist .lt. rmax .and. &
                       normdist .gt. 0.d0)then
                     alpha = lamda*normdist ! no units

!     Madelung matrix element used to calculate ENERGY
                     erfca = erfc(alpha) ! the com er func
                     sumor2 = sumor2+erfca/normdist

!     The derivative of Madelung matrix used to calculate FORCE
                     derfca=-2.d0*exp(-(alpha**2.d0))/sqrt(pi)
                     sumorde(:)=sumorde(:)+(lamda*derfca/normdist&
                          -erfca/normdist**2.d0)*dist(:)/normdist
                  endif
               enddo

!     sum over g-space R1 # R2
               sumog2 = 0.d0
               sumogde = 0.d0
               distr1r2(:) = rions(:,h)-rions(:,i)
               do j = 1,ngmax
                  gdotr = sum(gvector(:,j)*distr1r2(:))
                  gnorm = sqrt(sum(gvector(:,j)*gvector(:,j)))
                  betasq = (gnorm**2.d0)/4.d0/(lamda**2.d0)  
                  expb = exp(-betasq)
!     Madelung matrix element
                  sumog2 = sumog2 + expb/betasq*cos(gdotr)

!     The derivative of Madelung matrix
                  sumogde(:) = sumogde(:)-expb/betasq*sin(gdotr)&
                       *gvector(:,j)
               enddo
               for(:,i,h)=sumorde(:)+(factor*sumogde(:))
               vmadl(i,h)=sumor2+factor*(sumog2-1.d0)
!               write(*,*)'v_i#h',vmadl(i,h)
            enddo
         enddo
      endif
      
   
!     Calculate the total Madelung matrix using the mirror symmetry
      
      do h = 1,nions
         do i = h,nions
            vmadl(h,i) = vmadl(i,h)
            if(h == i)cycle
            for(:,h,i)=-for(:,i,h)
         enddo
      enddo

      end subroutine madelung



!     ===============================================
!     Calculate the Ewald force and energy form the 
!     madelung matrix
!     ===============================================

      subroutine force_energy_madl(nions,icharge,partial,vmadl,&
           for,ew_force,ew_energy)

      use basicdata
!     input
      integer nions
      real*8 vmadl(nions,nions)
      real*8 for(3,nions,nions)
      integer ,optional :: icharge(nions)
      real*8 ,optional :: partial(nions)
!     output
      real*8 ew_force(3,nions),ew_energy

!     locals
      integer k,h,i
      real*8 force(3,nions),energy

!     FORCE

      do k = 1,nions
         force(:,k)=0.d0
         do h = 1,nions
            if(h == k) cycle
            if(present(icharge)) then
               force(:,k) = force(:,k)+icharge(k)*&
                    icharge(h)*for(:,k,h)
            endif
            if(present(partial)) then
               force(:,k) = force(:,k)+partial(k)*&
                    partial(h)*for(:,k,h)
            endif
         enddo 
         ew_force(:,k) = edeps/(4.0d0*pi)*force(:,k)
      enddo
      

!     ENERGY
      energy=0.d0
      do h = 1,nions
         do i = 1,nions
            if(present(icharge)) then
            energy = icharge(i)*icharge(h)*vmadl(i,h)+energy
            endif
            if(present(partial)) then
            energy = partial(i)*partial(h)*vmadl(i,h)+energy
            endif
         enddo
      enddo
      ew_energy = 0.5d0*edeps/(4.0d0*pi)*energy
      
      end subroutine force_energy_madl




!     =======================================================
!     This subroutine calculates the stress on the unitcell 
!     from the ion-ion interaction. 
!     see eq. B2, O.H.Niesen et al, PRB 32, page 3792

!        u(x1,x1) = dEwald/de  
!         
!         = factor*sum(G) fac1G(G)*fac2G(Ri,G)*
!               [2*Gx*Gy/G^2*(G^2/4*lamda + 1) - u(x1,x2)]
!         
!           + 1/2*sqrt(lamda)*sum(RiRjT) ZiZj H'(sqrt(lamda)*D)*
!                                        Dx1*Dx2/D^2,   D = Ri-Rj+T =/ 0  
!
!           + factor * (sum(Ri)Zi)**2 u(x1,x2)
!
!
      subroutine madelung_stress(rmax,rions,nions,lamda1,lvector & 
           ,gvector,nlmax,ngmax,volc,pi,icharge)

      use basicdata, only : edeps
      use stress_module
      implicit none

!     input
      integer, intent(in) ::  nions,ngmax,nlmax
      real*8,  intent(in) ::  rions(3,nions)
      real*8,  intent(in) ::  lamda1              ! 1/A, locally in this subroutine we use 
                                                  ! lamda as lamda1**2, 1/A^2 
      real*8,  intent(in) ::  rmax,pi,volc
      real*8, intent(in)  ::  lvector(3,nlmax),gvector(3,ngmax)
      integer, intent(in) ::  icharge(nions)
 

!     locals
      integer     :: ng,na,index,Ri,Rj,T
      real*8      :: factor,fourlamda,gsqr,gsqr_d_4lamda,fac1G,h
      real*8      :: gdotr,gx1,gx2,Dx1,Dx2,zizr,sumZ,Dnorm
      real*8      :: sqrtl,D(3)
      complex*16  :: fac2G
      real*8      :: str(6)
      real*8      :: lamda
      external erfc
      real*8 erfc

!     zero stress
      str(1:6) = 0.0d0

      lamda = lamda1**2

      factor     = pi/(2.0d0*volc*lamda)
      fourlamda  = 4.0d0*lamda

!     ==========================================
!     The sum over G space 
!     ==========================================
      do ng=1,ngmax


         gsqr = sum(gvector(1:3,ng)*gvector(1:3,ng))
         gsqr_d_4lamda = gsqr/fourlamda

         fac1G = exp(-gsqr/fourlamda)/gsqr_d_4lamda

!        loop over atoms to make fac2G (sum(Ri) Zi*exp(iG*Ri))
         fac2G = 0.0d0 
         do na = 1,nions 
           gdotr = sum(gvector(1:3,ng)*rions(1:3,na))
           fac2G = fac2G + dble(icharge(na))*exp((0.0d0,1.0d0)*gdotr)
         enddo 
         fac2G = fac2G*conjg(fac2G)
         
         do index = 1,6 
            gx1 = gvector(x1_index(index),ng)
            gx2 = gvector(x2_index(index),ng)
            h = 2*gx1*gx2/gsqr *(gsqr_d_4lamda+1.0d0)
            if (x1_index(index).eq.x2_index(index)) h = h - 1.0d0

!           collect result for loop over G space
            str(index) = str(index) + factor*fac1G*fac2G*h
         enddo

      enddo

!     ==========================================
!     The sum over R space
!     ==========================================
      sqrtl = sqrt(lamda)
!     loop of Ri and Rj
      do Ri = 1,nions 
        do Rj = 1,nions 

          zizr = dble(icharge(Ri)*icharge(Rj)) 
!         loop over lattice translation verctors
          do T = 1,nlmax
             D(1:3) = rions(1:3,Ri)-rions(1:3,Rj)+lvector(1:3,T) 
             Dnorm  = sqrt(sum(D(1:3)**2))

             if ((Dnorm.gt.rmax).or.(Dnorm.lt.10d-20)) cycle

             do index = 1,6 

               Dx1 = D(x1_index(index))
               Dx2 = D(x2_index(index))

               str(index) = str(index) +  & 
                   0.5d0*sqrtl*zizr*H1(sqrtl*Dnorm)*Dx1*Dx2/(Dnorm**2)

             enddo  ! index 

          enddo

        enddo ! Rj 
      enddo   ! Ri 
      write(6,*) 'strR = ',str

      sumZ = 0.0d0 
      do Ri = 1,nions 
        sumZ = sumZ + dble(icharge(Ri))
      enddo 

      do index = 1,6 
        if (x1_index(index).eq.x2_index(index)) then 
          str(index) = str(index) + factor*sumZ**2
        endif 
      enddo 
      write(6,*) 'strT = ',str

!     fold str out into the stress matrix 
      stress_all%ion_ion = 0.0d0 
      do index = 1,6 
        stress_all%ion_ion(x1_index(index),x2_index(index))=str(index) 
        stress_all%ion_ion(x2_index(index),x1_index(index))=str(index) 
      enddo 
      stress_all%ion_ion = edeps/(4.0d0*pi)*stress_all%ion_ion/volc

      call write_stress(stress_all%ion_ion,'ewald') 
        
      contains

! =======================================================================
      function H1(x) 
!     define H' function Eq. B2  d(erfc(x)/dx) = 2/sqrt(pi) *exp(-x^2) 
      real*8 H1
      real*8 x
         H1 = -2.0d0/sqrt(pi)*exp(-x**2) -erfc(x)/x
      end function H1
!========================================================================
      
   
      end subroutine madelung_stress


      end module Madelung_module
