#include "definitions.h"

!     Damped-density algorithm

      subroutine damdid(nconso)
      write(nconso,*) '@(#)damden.F	1.58 7/1/99'
      return
      end

      subroutine damden(&
#          include H_TIMES_B_ARGS
! Array dimensions and constants/flags
           ,nsymax,nkpnum,&
           idebug,mmaxx,&
! integer
           nmsfft,nmstyp,icharg,nlnum,mmax,&
           nkpibz,nkpunf,nplwkp,lpctx,lpcty,&
           lpctz,ndcbyt,ireset,iscxc,&
! double precision
           tolerance, resmax, nextend,&
           wfkine,rdensr,rho_rad,rho_core,dirdat,rdnsum,&
           vxc,dnlg0,&
           gkp123,vkpt,wtkpt,sizham,deriv,eigen,&
           eigold,rlog,psp,phir2v,radius,phiatm,&
           forwrk,fnleif,ffield,occ,vnl,&
           vext,&
! complex*WF_PRECISION
           cptwfp,&
! complex*16
           cvion,&
! logical
           lkpinv,lkpnew,ldonkp,ldelin, lstop,&
! character
           wffile,&
           wfsetu,timer,dip0,extfie,dipmix,endipc,&
           zdip,enalp,enpot,encons,lworkp,extpot,idipol,width,occmix,&
           enfrec,entrpy,smethod,eneig,ndiapb,enion,entot,&
           enkin,envnl,enewa,lmastr,numsym,ocvar,&
           enxcc,enhac,nxc,excdat,xcfdat,xcpdat,enxc,&
           n,lupdks,lupdio,lupdch,lupdsu,lupdki,lupdnl,nconso,&
           recc,enmax,nelect,efermi,reci,&
           g2max,nplwk0,luns2s,&
           ldenrd,noffs0,damconv,lspsi,rmagmo,&
           magic)

      use van_us_data_module
      use module_rmm_diss
      use us_hpsi_module
      use blochlmodule
      use external_pot_module
      use density_mixing_module
#ifdef PARAL 
      use par_functions_module
#endif PARAL

      implicit none
#include H_TIMES_B_DECL

      integer nsymax,nkpnum
      integer idebug,maxovl,ncut,nener, mmaxx

! nmsfft(r,ns): symmetry operation indices

      integer nmsfft(nplwv,max(1,nsymax))
      integer nmstyp(max(1,nsymax))

! nkpibz: The k-point within the IBZ that represent this k-point
! nkpunf: The point group operation, that brings this k-point into the IBZ

      integer icharg(nspec)
      integer nlnum(nspec)
      integer mmax(nspec)

      integer nkpibz(nkpnum)
      integer nkpunf(3,3,nkpnum)
      integer nplwkp(nkprun)


      integer lpctx(ngx),lpcty(ngy),lpctz(ngz)

      integer ndcbyt, magic
      integer ireset,iscxc

      complex*WF_PRECISION cptwfp(nrplwv,nbands,*)  ! nrplwv,nbands,nkpmem


 
! D O U B L E  P R E C I S I O N

! rdensr(r):  real space charge density
! dirdat(g): 1/g**2 array
! rdnsum(.): work array

      real*8 wfkine(nbands,nkprun)
      real*8 rdensr(nplwv,*)
      real*8 dirdat(nplwv)
      real*8 rdnsum(nplwv,nspin)
      real*8 vxc(*)
      real*8 dnlg0(*)

! gkp123: Coordinates of the k-point in G1,G2,G3-units

      real*8 gkp123(3,nkpnum)
      real*8 vkpt  (3,nkprun)
      real*8 wtkpt   (nkprun)
      real*8 sizham  (nkprun)
      real*8 deriv   (nkprun)
      real*8 eigen(nbands,nkprun)
      real*8 eigold(nbands,nkprun)

      real*8 rlog  (nspec)
      real*8 psp(npspts,nspec)

!     rho_rad : fourier transform of partialcore charge
      real*8 rho_rad(npspts,nspec)
!     rho_core: partialcore charge on realspace grid
      real*8 rho_core(nplwv)

      real*8 phir2v(mmaxx)
      real*8 radius(mmaxx,nspec)
      real*8 phiatm(mmaxx,0:2,nspec)

      real*8 forwrk(3,nions)
      real*8 fnleif(3,nions,nspec)
      real*8 ffield(3,nions,nspec)

      real*8 occ   (nbands,nkprun)
      real*8 vnl   (nbands,nkprun)

      real*8 vext  (ngz)
      real*8 endcpl

! C O M P L E X * 1 6

      complex*16 cvion(nplwv)

! L O G I C A L

! lkpinv: True if the nkpunf has been added the inversion without this
!         being present in the space group

      logical*4 lkpinv(nkpnum)
      logical*4 lkpnew(nkprun)
      logical*4 ldonkp(nkprun)
      logical*4 ldelin
      logical*4 lstop
      logical*4 lspsi

! C H A R A C T E R 

      character*(*) wffile


      character*64 wfsetu

      integer nxc, idipol, ndiapb, nextend
      real*8  nelect
      integer nconso, numsym, n, smethod
      integer nplwk0,noffs0(nkprun)
      real*8 timer(*)
      real*8 tolerance, resmax
      real*8 dip0, extfie, dipmix, g2max
      real*8 endipc,zdip,enalp,enpot,encons, extpot
      real*8 width,occmix,enfrec, entrpy, eneig
      real*8 enion, entot, enkin,envnl, enewa, enmax, enxc
      real*8 ocvar,enxcc,enhac, damconv
      real*8 recc(3,3), efermi, reci(3,3)
      real*8 excdat(nxc),xcfdat(nxc),xcpdat(nxc)
      real*8 rmagmo

!     Local vars
#ifdef PARAL
!     Make a full length work array
      complex*WF_PRECISION cwork(nrplwv_global)
      logical exists
#endif

! Arrays over g-vectors below kinetic energy cutoff
! For parallel program full size arrays
      logical*4 lupdks,lupdio,lupdch,lupdsu,lupdki,lupdnl
      logical*4 luns2s,ldenrd
      logical*4 lmastr
      logical*4 lworkp
      logical*4 lbacwf
      complex*16 cdum, csum
      real*8 rinplw,A00(2),h
      real*8 gzx,gzy,gzz,gyx,gyy,gyz,gxx,gxy,gxz,gx,gy,gz,g2,Ginv
      integer nsp,nat,nkpeff,m,nr,nn,mu,nb,ispin
      integer nz,ny,nx,i,j
      real*8 det(2),dirmin,dirmax,q1sq,amp
#include "etime.h"
      real*8 occsetup(nbands,nkprun)
      real*8    rwork1(nbands*nkprun) 
      logical*4 linit
      integer nmax,info,norder
      integer idim,iwork,resmindim
      data    linit /.true./
      integer ibacwf
      parameter(ibacwf=0)
      real*8    get_eigen_value_convergence
      external  get_eigen_value_convergence                         
      save    linit

      if (magic .ne. 12345678) then
        write(nconso,*) 'wrong damden magic value = ', magic
        write(nconso,*) 'probably the argument list is bad'
        call uflush(nconso)
        call clexit (nconso)
      endif

!     Zero the non-local force array
      fnleif = 0.0d0

      call uttime(time)
      timer(TDAMDEN)=timer(TDAMDEN)-time(1)

      if ((lupdio).or.linit) then
!         cvion(r) = sum [vps*cstrf]
          call potion(ngx,ngy,ngz,nions,nionsp,nspec,nplwv,posion,&
           lpctx,lpcty,lpctz,npspts,recc,volc,psp,psgmax,icharg,cvion)
          call extadd(ngx,ngy,ngz,cvion,vext,1.0d0)

!         get partialcore charge on realspace grid
          call partialcore(ngx,ngy,ngz,nions,nionsp,nspec,nplwv,posion,&
           lpctx,lpcty,lpctz,npspts,recc,volc,rho_rad,psgmax,rho_core,&
           mmax,cwork1)
 
          if (lgenpp) then
!           calc vkb for all k-points
            do nkp = 1,nkprun
               if (ldonkp(nkp)) then
!                Calculate the beta(G) (vkb) for this k-point
                 call cal_vkb (nplwv,nrplwv,nrplwv_global,        &
                    nplwkp(nkp),nkp,nions, nspec, nionsp,nkpmem,  &
                    nbands,nkprun,dnlg(1,1,nkp),posion,dirc,volc, &
                    ngxs,ngys,ngzs,g2max,idebug,recc & 
#ifdef PARAL
                     , &  
#include             PARAL_ARGS
#endif
                    )
               endif
            enddo
          endif
          call density_mix_init(ngdens_max,ngx,ngy,ngz,nspin,&
                            lpctx,lpcty,lpctz,dirdat,g2max,recc,&
                            ipwpadG,lmastr)                 
      endif
   
      if (linit.or.lnonselfconsistent) then 
!        Initialize rdnsum=rdensr for first iteration
         do i = 1,nplwv
           do ispin = 1,nspin
             rdnsum(i,ispin) = rdensr(i,ispin)
           enddo
         enddo
         linit = .false.
      endif

! Now : rdnsum contains the output density from last call of resmin/subproj
!       rdensr contains the input density from  last call of resmin/subproj 

        call density_mixing(rdnsum,rdensr,ngx,ngy,ngz,nspin,damconv)

! -------------------------------------------------------------------------
! Copy new density rdensr from master node to all slaves
! This is a temporary solution done to ensure the excact same density on all nodes 
! (small difference is currently accumulated in density_mixing module)
! -------------------------------------------------------------------------
#ifdef PARAL
!       send damconv too all slaves
        call ms_send_real8 (nconso, damconv)

        if (.not.lmastr) then 
          do ispin = 1,nspin 
            rdensr(1:nplwv,ispin) = 0.0d0
          enddo 
        endif
        do ispin = 1,nspin 
            call par_sum_double('A',rdensr(1,ispin),rdensr(1,ispin),&
                                nplwv,nplwv,1,&
#include                        PARAL_ARGS
                                ,timer)
        enddo
#endif
        call symadd (nconso, rdensr, rdensr, nplwv,&
            cwork1,cwork1,nmsfft, numsym, nmstyp, .FALSE., 0,nspin,&
#ifdef PARAL
#include      PARAL_ARGS
              ,&
#endif PARAL
              timer )

!----------------------------------------------------------------------------

! Perform the dipole correction (Store external potential with cvion)
         call uttime(time)
         timer(TDIPOLE)=timer(TDIPOLE)-time(1)
         call dipole(nconso,ngx,ngy,ngz,cveff,rdensr,dirc,&
              nspec,nionsp,icharg,nions,posion,idipol,vext,volc,ffield,&
              endipc,zdip,efermi,cwork1,nplwv,enxcc,enxc,&
              excdat,xcfdat,xcpdat,nxc,&
              dip0,extfie,dipmix,lworkp,extpot,&
              recc,lpctx,lpcty,lpctz,vxc,&
              nspin,dirdat,cvion,idebug,lmastr)
         call uttime(time)
         timer(TDIPOLE)=timer(TDIPOLE)+time(1)
         encons=enalp+endipc

! added by jross 27/3-2001
         
         call blochl(volc,nions,dirc,recc,rdensr,posion,&
              ngx,ngy,ngz,nspec,nionsp,nelect,nconso,&
              lpctx,lpcty,lpctz,icharg,nplwv,nspin,&
              damconv,ffield,deltav,endcpl,timer,lupdio,& 
              lmastr)
         
         encons = encons+endcpl 
         call extern_pot(ngx,ngy,ngz,dirc,nions,&
              nspec,nionsp,posion,&
              icharg,g,ngdens_max,ipwpadG,&
              deltav,endipc,ffield,nconso)
         
         lupdio=.false.

! calculate Hartree and exchange-correlation energies and potentials
         call uttime(time)
         timer(TTIDYUP)=timer(TTIDYUP)-time(1)
         call tidyup(nplwv,npwxc,rdensr,rho_core,&
                 enhac,dirdat,cveff,volc,enxcc,&
                 enxc,excdat,xcfdat,xcpdat,nxc,cvion,&
                 recc,lpctx,lpcty,lpctz,vxc,dnlg0,cwork1,&
                 iscxc,nspin,ngx,ngy,ngz,ngxs,ngys,ngzs,&
                 cveff_soft,deltav,&
                 ngdens_soft,nrplwv,ipwpadG_soft,nconso,idebug,timer)
         call uttime(time)
         timer(TTIDYUP)=timer(TTIDYUP)+time(1)

! cveff(r) is now (hartree + xc + ion)-potential for the
!                                                 kohn-sham-equation
!        get new deeq matrix for cveff
         if (lspsi) then
            write(nconso,*) 'damden: setup new d-matrix'
            call uflush(nconso)
            call uttime(time)
            timer(TNEWD)=timer(TNEWD)-time(1)
            call newd(nions,nspec,nionsp,nplwv,nrplwv,nrplwv_global,&
              nbands,nkprun,ngx,ngy,ngz,posion,dirc,volc,cveff,nspin,&
              idebug,nconso&
#ifdef PARAL
             ,&
#include PARAL_ARGS
#endif
             ,timer)
            call uttime(time)
            timer(TNEWD)=timer(TNEWD)+time(1)
         endif

! write out some terms of the total energy
         if (lmastr.and.n.eq.1) then
            call sumpot(enpot,nplwv*nspin,rdensr,cveff,nspin)
            entot=eneig+enhac+enxcc+enewa+encons
            enion=0.0d0
            write(nconso,2605) 'TR2',entot+entrpy,enkin,&
              enpot,enhac,enxcc,envnl,enewa,enalp,eneig,&
                 enkin+enpot+envnl,-(enpot+envnl),-enhac,enxc,enion,&
                 entot+enfrec,entot,endipc
            call uflush(nconso)
 2605       format(1x,a3,17f14.6)
         endif


!        k-point loop

         call uflush(nconso)
         do 2200 nkp=1,nkprun
           if (.not. ldonkp(nkp)) goto 2200

! Read in the nkp'th k-point arrays - and let nkpeff point to the 
! appropiate addresses in already read arrays

           call  wfswap_get(cptwfp,ndcbyt,nkpeff,&
#include                   "apply_h_args.h"
                            )        

! Perform the energy minimisation of the electronic system using the
! damped density residual minimization technique

            call uttime(time)
            timer(TRESMIN)=timer(TRESMIN)-time(1)

!     Diagonalize the Hamiltonian approximately by an iterative method,
!     finding the lowest eigenvalues using an initial guess for
!     the eigenvectors.
 
!     Most of the arguments of diag_h are defined in the
!     header-file apply_h_args.h, except that some arrays
!     are indexed by (nkp).
      if (idebug.gt.1) then
          do i = 1,nbands
           write(nconso,*) 'damden resmin',i,nkp,eigen(i,nkp)
          enddo
          call uflush(nconso)
      endif
 
!     For the eigenvalue (generalized if lspsi=true)  problem:
      if (ieigsolver==3) then 
       call rmm_diss(cptwfp(1,1,nkpeff),nplwkp(nkp),eigen(1,nkp),&
#include "apply_h_args.h"
                     ,timer) 

! Find the kinetic energy.
      call sum_kin_erg(cptwfp,nrplwv,nrplwv_global,nplwkp,nbands,&
                       pwkine(1,nkp),wfkine&
#ifdef PARAL
                       ,&
#include PARAL_ARGS
#endif
                 ,nconso,timer )

      elseif (ieigsolver==2) then 

!      use the davidson algorithm
       norder = 0

       call diag_h (lspsi, tolerance, resmax, nextend,&
        ndiapb, norder, nplwkp(nkp),&
        eigen(1,nkp), cptwfp(1,1,nkpeff), &
        wfkine(1,nkp), vnl(1,nkp),entot,damconv,&
#       include H_TIMES_B_ARGS
        , nconso, timer)

      elseif (ieigsolver==1) then 

!       setup a hamilton matrix for the current set of wavefunction
!       write it to the output netcdf file
        call gen_hamiltonian_matrix(cptwfp(1,1,nkpeff),      &
                                         nplwkp(nkp),&
#include "apply_h_args.h"
                                        ,timer)

      endif

      call uttime(time)
      timer(TRESMIN)=timer(TRESMIN)+time(1)

      call wfswap_put(cptwfp,nkp,nrplwv,nbands,nkpmem,nkprun,ndcbyt) 

!  k-point loop end here

 2200    continue  


#ifdef PARAL
!     Slaves send non-local energies,
!     eigenvalues,  sizham,deriv back to master.
      call smkinl (nconso, ldonkp, wfkine,&
        vnl, eigen, eigold, occ, sizham, deriv, &
        nbands, nkprun,&
#include PARAL_ARGS
        )
#endif PARAL

! =========================================================================
!     for a non selfconsistent calculation return the convergence of the eigen
!      values instead of the convergence of the density (damconv)
      if (lnonselfconsistent) then
        damconv = get_eigen_value_convergence(eigen,nbands,nkprun)
      endif                  
! ==========================================================================
!     Construct the real-space output density from isubproj/resmin in rdnsum

!     first set occupation numbers (occ)
      if (lmastr) then 
        call setoccupation(nbands,nkprun,occ,wtkpt,eigen,efermi,&
             width,enfrec,entrpy,smethod,nelect,ocvar,nspin,rmagmo)
      endif
#ifdef PARAL
!     Multicast efermi, eigenvalues and occupancies to all slaves
      call mseigo (nconso, efermi, eigen, eigold, occ,&
        nbands, nkprun)
#endif PARAL
      if (idebug.gt.1) then 
        do i = 1,nbands
         write(nconso,*) 'damden occ',i,(occ(i,nkp),nkp=1,nkprun)
        enddo
      endif

!     init rdnsum to zero before k-point loop
      rdnsum = 0.0d0

!     New k-point loop starts here to construct the output density
      do 2300 nkp=1,nkprun
           if (.not. ldonkp(nkp)) goto 2300
#ifdef PARAL 
           n_global = nplwkp(nkp)
#endif

! Read in the nkp'th k-point arrays - and let nkpeff point to the
! appropiate addresses in already read arrays

           call  wfswap_get(cptwfp,ndcbyt,nkpeff,&
#include                   "apply_h_args.h"
                            )                         

      if (idebug.gt.1) then
          do i = 1,nbands
           write(nconso,*) 'damden eig',i,nkp,eigen(i,nkp),occ(i,nkp)
          enddo
          call uflush(nconso)
          write(nconso,*)  'damden',ngx,ngy,ngz,ngxs,ngys,ngzs
          call uflush(nconso)               
      endif
      

!     Construct the real-space electronic charge density
      call chdtr(nbands,nplwv,nrplwv,nrplwv_global,nkprun,wtkpt(nkp),&
#ifdef PARAL
#include PARAL_ARGS
              ,&
#endif
              cptwfp(1,1,nkpeff),rdnsum,ngx,ngy,ngz,&
              ngxs,ngys,ngzs,cwork1,&
              ipwpad(1,nkp),nplwkp(nkp),occ(1,nkp),&
              nspin,kspin(nkp), timer,nconso)

                                   
!     (now: rdnsum(r) contains the sum of contributions to the real-space
!     charge-density from k-points treated up till now)                    

!========================================================================
!     add ultra-soft density
        if (lspsi) then
!        First calc <beta|psi> overlabs for all bands
         if (ieigsolver.ne.3) then   ! for rmm-diis becp is uptodate
           call calc_all_becp(nplwv,nrplwv,nrplwv_global,nplwkp(nkp), &
                nkp,nions, nspec, nionsp, cptwfp(1,1,nkpeff),&
                nbands,nkprun,nkpmem,dnlg(1,1,nkp),posion,dirc,volc,&
                ngxs,ngys,ngzs,ipwpad(1,nkp)&
#ifdef PARAL
                ,&
#include        PARAL_ARGS
#endif PARAL
                ,timer)
         endif

!        add density
         call uttime(time)
         timer(TADDUSDENS)=timer(TADDUSDENS)-time(1)
         call addusdens(&
              nplwv,nrplwv,nrplwv_global,nions,nspec,nionsp,nbands,&
              nkprun, wtkpt, nkp, occ,&
              efermi,eigen, rdnsum(1,kspin(nkp)),&
              ngx,ngy,ngz,posion,dirc,volc,nconso&
#ifdef PARAL
             ,&
#include PARAL_ARGS
#endif
             ,timer)
         call uttime(time)
         timer(TADDUSDENS)=timer(TADDUSDENS)+time(1)
 
!       at this point calculate the contribution from the non-local
!       pseudo potential (only if density is resonable well 
!       converged).
        if ((damconv.lt.0.5d0).or.(idebug.gt.5)) then 
          call  force_us(&
            fnleif, eigen, occ,wtkpt,nplwkp(nkp),cptwfp(1,1,nkpeff),&
#           include "apply_h_args.h"
            ,timer,nconso )
        endif
 
      endif  ! if (lspsi)
!==========================================================================
 
 2300    continue

!=================== k-point loop ends here ===================
!     Symmetrize charge density rdnsum(r).
         call symadd (nconso, rdnsum, rdnsum, nplwv,&
            cveff,cwork1,nmsfft, numsym, nmstyp, .FALSE., 0,nspin,&
#ifdef PARAL
#include      PARAL_ARGS
              ,&
#endif PARAL
              timer )

! now: rdnsum(r) contains the new charge density in   r e a l  space

#ifdef PARAL
!     sum fnleif so it is awailable on all nodes
      m = 3*nions*nspec
      call mssum (fnleif, fnleif,m,m,1,&
#include PARAL_ARGS
         ,nconso,timer )
#endif PARAL

      call uttime(time)
      timer(TDAMDEN)=timer(TDAMDEN)+time(1)

      return
      end


!==============================================================================
      subroutine calc_all_becp(nplwv,nrplwv, nrplwv_global,nplwkp, nkp,&
                         nions, nspec, nionsp, cptwfp,&
                         nbands,nkprun,nkpmem,dnlg,posion,dirc,volc,&
                         ngxs,ngys,ngzs,ipwpad&
#ifdef PARAL
                         ,&
#include PARAL_ARGS
#endif PARAL
                         ,timer )
 
!==============================================================================
!     update  <beta|psi> overlaps for all bands for the new set of wavefunctions
!     cptwfp for this k-point
 
      use van_us_data_module
      use us_hpsi_module
      implicit none
 
      integer    nplwv,nrplwv,nrplwv_global,nplwkp,nkp
#ifdef PARAL
#include PARAL_DECL
      real*8     dnlg (nrplwv_global,3)
#else
      real*8     dnlg (nrplwv,3)
#endif
 
      integer    nions,nspec,nionsp(nspec)
      integer    nbands,nkprun,nkpmem
      real*8     posion(3,nions,nspec),dirc(3,3),volc
      integer    ngxs,ngys,ngzs,ipwpad(nrplwv)
      complex*WF_PRECISION cptwfp(nrplwv,nbands)
      complex*16 ZDOTC
      external   ZDOTC
 
      real*8     timer(*)
 
!     locals
      integer nn


!     Zero the betas initially
      becp(:,:,nkp) = 0.0d0
 
      do 200 nn = 1,nbands
 
!         use offset into skb and vkb
          call cal_bec(nplwv,nrplwv,nrplwv_global,nplwkp,nkp,nn,        &
           nions,nspec,nionsp,nbands,nkprun,dnlg(1,1),posion,dirc,volc, &
           ngxs,ngys,ngzs,ipwpad,nkpmem                                 & 
#ifdef PARAL
           ,&
#include PARAL_ARGS
#endif
              ,timer,reci_psi=cptwfp(1,nn))
 
200      continue
 
 
      return
      end

