#include "definitions.h"

      subroutine applyhid(nconso)
      write(nconso,*) '@(#)applyh.F	1.18 7/1/99'
      return
      end

      subroutine apply_H(psi,hpsi,spsi,nplwkp,block_size,nbn,&
                         lcalc_only_h_m_vnlpsi,&
#       include "apply_h_args.h"
        , timer)
      use van_us_data_module
      use us_hpsi_module
      use non_local_projectors
      use run_context
#ifdef PARAL
      use par_functions_module
#endif PARAL
      implicit none

! Calculate the product of the Hamiltonian operator and a wave
! function cptwfp in reciprocal space. 
!
! Operation of apply_h : 
!         do n = 1,block_size 
!             call par_getwf(cptwfp,psi,block_size,...)   ! get full length wavefunction in psi(G) 
!               (V_kin)|psi(G)> 
!               psi(G) -> psi(R)
!               (V_eff*psi)(R) 
!               (Vnl  *psi(R)                      ! for realspace non-local operators
!               (V_eff*psi)(R) ->V_eff*psi)(G)
!               hpsi(G) = V_kin)|psi(G)> + V_eff*psi)(G) 
!             par_getback(hpsi,hpsi)                      ! distribute full length array back on nodes 
!          enddo 
!          do n = 1,block_size 
!             hpsi(G) = hpsi(G) + Vln|psi(G)       ! for reciprocal-space non-local operators
!             spsi(G) = S|psi(G)>
!          enddo
!
!          psi  (input)         : block of reciprocal wavevectors
!          hpsi (output)        : Hamiltonian operator times psi
!          spsi (output)        : generalized overlab operator S times psi  
!          nplwkp (input)       : length of reciprocal wavevectors
!          block_size (input)   : size of block
!          nbn        (input)   : first element of blocks corressponding number in 
!                                 1..nbands arrays (used for reusing becp overlap 
!                                 matrices
!          lcalc_only_h_m_vnlps : if true only calculated (H-Vnl)|psi>
!                                 for used with the RMM-DIIS algorithm


!               
!     Argument declarations
      integer nplwkp,block_size
#     include "apply_h_decl.h"
      complex*WF_PRECISION  psi (nrplwv,block_size) 
      complex*WF_PRECISION  spsi(nrplwv,*) 
      complex*WF_PRECISION  hpsi(nrplwv,block_size) 
      complex*WF_PRECISION  psiwork(nrplwv_global)
      complex*WF_PRECISION  hpsiwork(nrplwv_global) 
      
      integer   nbn
      logical*4 lcalc_only_h_m_vnlpsi
      real*8 timer(*)
     
!     Local vars
      complex*16 ctemp
      real*8 rinplw, wfnorm
#include "etime.h"
      real*4 NORM2   
      integer m, ispin,nb
      complex*16            :: psiR(ngxs*ngys*ngzs)
      complex*16            :: vpsi(nplwkp)
      integer   nplwv_soft
      logical*4             :: exists,lhpsi

      call uttime(time)
      timer(TAPPLYH)=timer(TAPPLYH)-time(1)

!     soft grid size
      nplwv_soft = ngxs*ngys*ngzs   

      do nb = 1,block_size      

#ifdef PARAL
        call par_getwf(psi,nrplwv,nb,block_size,par_pw_np,&
#include PARAL_ARGS
              , psiwork, exists, timer, nconso)          
#else 
        psiwork(:) = psi(:,nb) 
        exists = .true.
#endif
        if (exists) then 
          call uttime(time)
          timer(TPOTXWF)=timer(TPOTXWF)-time(1)
          psiR(:) = 0.0d0
!    fold the wavefunction into the 3D-FFT grid
          do m=1,nplwkp
              psiR(ipwpad(m,nkp))=psiwork(m)
          enddo
 
!    transform the wavefunction into real space
          call fft3d(psiR,ngxs,ngys,ngzs,1)
          call uttime(time)
          timer(TPOTXWF)=timer(TPOTXWF)+time(1)

!    multiply wavefunction*potential
          ispin = kspin(nkp)
          do m=1,nplwv_soft
             cwork1(m)=cveff_soft(m,ispin)*psiR(m)
          enddo   

!    calculate the contribution from the non-local part of the potential.
!    for reciprocal space operators
          if ((luse_rs_proj).and.(.not.lcalc_only_h_m_vnlpsi)) then 
!           add in real space V_nl|psi> to cwork1
            call usvnlpsi(&
              spsi,nplwkp,.true.,.true.,.true.,nbn+nb-1,&
#include     "apply_h_args.h"
             ,timer,real_psi=psiR,real_vpsi=cwork1)
          endif

!    transform (wavefunction*potential) into reciprocal space
          call uttime(time)
          timer(TPOTXWF)=timer(TPOTXWF)-time(1)
          call fft3d(cwork1,ngxs,ngys,ngzs,-1)
          call uttime(time)
          timer(TPOTXWF)=timer(TPOTXWF)+time(1)

!    add the kinetic energy term to obtain H*psi in hpsiwork
          rinplw = 1.0d0/dble(nplwv_soft)
          vnlband = 0.0d0 ! not implemented at the moment
          wkintmp = 0.0d0
          wfnorm = 0.0d0
          call uttime(time)
          timer(TXC1)=timer(TXC1)-time(1)               
          do m = 1,nplwkp
             ctemp = psiwork(m) * pwkine(m,nkp)
             wkintmp = wkintmp + dble(conjg(psiwork(m)) * ctemp)
             wfnorm = wfnorm + dble(conjg(psiwork(m)) * psiwork(m))
             hpsiwork(m)= rinplw*(cwork1(ipwpad(m,nkp)))+ctemp 
          enddo

!    normalize the kinetic energy
          wkintmp = wkintmp / wfnorm

        endif  ! if exists

!    transform hpsiwork back on nodes to hpsi
#ifdef PARAL 
          call par_getback(hpsiwork,nb,block_size,par_pw_np,&
#include PARAL_ARGS
                          ,hpsi,nrplwv,exists,timer,nconso)
#else 
        hpsi(1:nplwkp,nb) = hpsiwork(1:nplwkp)
#endif

      enddo     ! nb = 1,block_size
     
! calculate the contribution from the non-local part of the potential,
! reciprocal space and overlap matrix S :
!     hpsi = Vnl|cptwfp>
!     spsi =   S|cptwfp>

      if (.not.lcalc_only_h_m_vnlpsi) then 
        lhpsi =  (.not.(luse_rs_proj))
        do nb = 1,block_size
!         add in reciprocal space V_NL|psi> to vpsi
          call usvnlpsi(&
            spsi(1,nb),nplwkp,lhpsi,.true.,.true.,nbn+nb-1,&
#include   "apply_h_args.h"
            ,timer,reci_psi=psi(1,nb),reci_vpsi=hpsi(1,nb))                    
        enddo
      endif   

#ifdef DEBUG2
      write(*,*) 'applyH: ',lgenpp
      write(*,*) ' wkintmp ',wkintmp
      write(*,*) ' vnlpsi    ',vpsi(2)
      write(*,*) ' cveff     ',hpsiwork(1)
#endif DEBUG2
                                                    
      call uttime(time)
      timer(TAPPLYH)=timer(TAPPLYH)+time(1)
!     Count the number of calls
      timer(NAPPLYH)=timer(NAPPLYH) + 1

      return
      end

! ----------------------------------------------------------------------------
      subroutine pre_precon (nplwkp, psi, lgenev,&
#       include "apply_h_args.h"
        , timer)
      use van_us_data_module
      use run_context
#ifdef PARAL
      use par_functions_module
#endif PARAL

!     Calculate the band kinetic energy wkintmp, to be used in precon

!     nplwkp ..... Length of the vectors
!     psi ........ Input: wavefunction (for calculating the kinetic energy)
!     spsi ....... Input: S*psi (generalized eigenproblem)
!     lgenev ..... Input: .TRUE. for the generalized eigenproblem
!     timer ...... Program timers.

      implicit none
!     Argument declarations
      integer nplwkp
#     include "apply_h_decl.h"
      complex*WF_PRECISION psi(nrplwv)
      logical lgenev
      real*8 timer(*)

!     Local vars
      real*8 x, wfnorm,wfnorma(1),wkintmpa(1)
      integer m,nplwkp_local,offs
#include "etime.h"
#ifdef PARAL 
      integer nlocal(par_pw_np),offset(par_pw_np)
#endif

      call uttime(time)
      timer(TAPPLYH)=timer(TAPPLYH)-time(1)

#ifdef SERIAL
      nplwkp_local = nplwkp
      offs = 0
#else
      call par_defwfk (nrplwv,nrplwv_global,nplwkp,nlocal,offset,&
                       nplwkp_local,&
#include PARAL_ARGS
        , nconso)
      offs         = offset(par_process+1)
#endif                     

 
!     Calculate wkintmp: The kinetic energy of this band

      wkintmp = 0.0d0
      wfnorm = 0.0d0
      do m = 1,nplwkp_local
         x = conjg(psi(m)) * psi(m)
         wkintmp = wkintmp + x * pwkine(m+offs,nkp)
         wfnorm = wfnorm + x
      end do

#ifdef PARAL 
!     Make global sum of partial kinetic energies and norm
      wkintmpa(1) = wkintmp
      wfnorma(1) = wfnorm 
      call par_sum_double ('A',wkintmpa,wkintmpa,1,1,1,&
#include PARAL_ARGS
          ,timer )
      call par_sum_double ('A',wfnorma,wfnorma,1,1,1,&
#include PARAL_ARGS
          ,timer )
      wkintmp = wkintmpa(1)
      wfnorm  = wfnorma(1)
#endif                                 

!     Normalize the kinetic band energy
      wkintmp = wkintmp / wfnorm

!     Result of this subroutine is "wkintmp" being returned

      call uttime(time)
      timer(TAPPLYH)=timer(TAPPLYH)+time(1)
#ifdef DEBUG2
      write(*,*) 'pre_precon  ',nkp,wkintmp,wfnorm
#endif DEBUG2

      return
      end


! ---------------------------------------------------------------
      subroutine precon (nplwkp, psi, resid, e,&
#       include "apply_h_args.h"
        , timer, info)
      use van_us_data_module
      use run_context

!     Precondition the residual: psi = K * resid

!     nplwkp ..... Length of the vectors
!     psi ........ Output: preconditioned vector
!     resid ...... Input residual vector
!     e .......... Input: eigenvalue (unused in the present algorithm)
!     timer ...... Program timers.
!     info ....... Return code (0 for success).
   
#ifdef PARAL
      use par_functions_module
#endif PARAL
      implicit none
!     Argument declarations
      integer nplwkp, info
      complex*WF_PRECISION resid(nplwkp), psi(nplwkp)
      real*8 e
      logical lgenev
#     include "apply_h_decl.h"
      real*8 timer(*)

!     Local vars
      real*8 entemp, x, pcnum, kgg,dtemp,dtempa(1)
      integer m,nplwkp_local,offs
#include "etime.h"
      real*R_PRECISION stemp,NORM2
#ifdef PARAL
      integer nlocal(par_pw_np),offset(par_pw_np)
#endif

      call uttime(time)
      timer(TAPPLYH)=timer(TAPPLYH)-time(1)

#ifdef SERIAL
      nplwkp_local = nplwkp
      offs = 0
#else
      call par_defwfk (nrplwv,nrplwv_global,nplwkp,nlocal,offset,&
                       nplwkp_local,&
#include PARAL_ARGS
        , nconso)
      offs         = offset(par_process+1)
#endif    
 
      if (wkintmp .lt.1.0d0) then
        entemp = 1.0d0
      else
        entemp = 1.0d0 / wkintmp 
      endif
#ifdef DEBUG2
      write(*,*) 'precon wkintmp ',wkintmp
#endif DEBUG2

! The rational polynomial kgg(x) has the properties:
! for x->0 approaches 1 with zero 1st, 2nd, 3rd order derivatives
! for x->inf approaches 1/[2(x-1)] correct to to 4th order in 1/x
! (see Payne, Teter, Allen, Rev.Mod.Phys. vol.64 (1992) p.1073).

!VOCL LOOP,TEMP (x,pcnum,kgg)
      do 100 m = 1,nplwkp_local
        x = pwkine(m+offs,nkp) * entemp
        pcnum = 27.0d0 + (18.0d0 + (12.0d0 + 8.0d0 * x) * x) * x
        kgg = pcnum / (pcnum + 16.0d0 * (x * x)**2)
        psi(m) = kgg * resid(m)
100   continue
#ifdef DEBUG2
      write(*,*) 'precon pwkin ',nkp,pwkine(10,nkp)
#endif DEBUG2

!      normalize psi
       dtemp=NORM2(nplwkp_local,psi,1)
       dtemp = dtemp**2

#ifdef PARAL
!     Make global sum of partial norms of psi
      dtempa(1) = dtemp
      call par_sum_double ('A',dtempa,dtempa,1,1,1,&
#include PARAL_ARGS
          ,timer )
      dtemp = dtempa(1)
#endif      

      stemp = 1.d0/sqrt(dtemp)
      call BLAS_RSCAL(nplwkp_local,stemp,psi,1)                             

      info = 0

      call uttime(time)
      timer(TAPPLYH)=timer(TAPPLYH)+time(1)

      return
      end
