#include "definitions.h"
    module  density_mixing_module
!=====================================================================
!    
!=====================================================================
!   module data
    integer,save,private                  :: ichsto,noldrd,nrplwv,ientry
    complex*16 , allocatable,save,private :: Rm(:,:)
    integer,allocatable,save,private      :: i2wpad(:)
    real*8, allocatable,save,private      :: onedivGsqr(:)
    complex*16, allocatable,save,private  :: cdelr(:,:,:)    ! nrplwv,nspin,noldrd
    complex*16, allocatable,save,private  :: cdelp(:,:)      ! nrplwv*nspin,noldrd
    complex*16, allocatable,save,private  :: cdelpgr(:,:,:)  ! nrplwv,nspin,noldrd
    character*20,save,private             :: density_mixing_method   ! (Pulay/GRPulay)
    real*8,save,private                   :: q1sq,alpha
    real*8,save,private,allocatable       :: denmix(:)       ! nrplvw (Kerker mixing scheme)
    integer,save,private                  :: ngdens_max_densmix
    integer, allocatable,save,private     :: ipwpadG_densmix(:,:)    ! ngdens_max,0:3)       
    integer,save,private                  :: number_mixing_iterations, number_harris_iterations
    real*8,save,private                   :: gvector_factor
    logical*4,save                        :: lnonselfconsistent
!=====================================================================
    

!##################################################################### 
    contains
!#####################################################################
    subroutine density_mixing(in_density,out_density,ngx,ngy,ngz,nspin,damconv)
    implicit none 
    integer,intent(in)   :: ngx,ngy,ngz,nspin
    real*8,intent(in)    :: in_density (ngx*ngy*ngz,nspin)  
    real*8,intent(inout) :: out_density(ngx*ngy*ngz,nspin) 
    real*8,intent(out)   :: damconv

!   check if we shall do anything
    if (number_harris_iterations>0) then 
      if (number_mixing_iterations<number_harris_iterations) then 
        number_mixing_iterations = number_mixing_iterations + 1
        return 
      endif
      lnonselfconsistent = .false.
    else 
      if (lnonselfconsistent) return 
    endif 
      
    number_mixing_iterations = number_mixing_iterations + 1

    if (density_mixing_method=='Pulay') then 
      call Pulay(in_density,out_density,ngx,ngy,ngz,nspin,damconv)
    else 
      call GRPulay(in_density,out_density,ngx,ngy,ngz,nspin,damconv)
    endif

    end subroutine density_mixing

!#####################################################################
    subroutine Pulay(in_density,out_density,ngx,ngy,ngz,nspin,damconv)
    use netcdfinterface
    use run_context
    use matrix_utilities
    implicit none
!=====================================================================
    integer,intent(in)   :: ngx,ngy,ngz,nspin
    real*8,intent(in)    :: in_density (ngx*ngy*ngz,nspin)  
    real*8,intent(inout) :: out_density(ngx*ngy*ngz,nspin) 
    real*8,intent(out)   :: damconv
!--------------------------------------------------------------------- 
!   locals 
    complex*16,allocatable   :: in_reci(:,:),out_reci(:,:)
    complex*16,allocatable   :: ctmpres(:,:)
    complex*16,allocatable,save :: residual(:,:)
    complex*16               :: tmpvec(noldrd)
    integer                  :: nmax,irow,i,j,m,ispin,info,idebug=0,nplwv,nr
    logical                  :: lcoldel=.false.
    real*8                   :: rcond = 1.0d-6
    real*8                   :: h,A00(nspin)
    complex*16,allocatable   :: cwork1(:)
    complex*16, pointer      :: in_density_reci(:,:),out_density_reci(:,:)
    complex*16               :: zzero = (0.0d0,0.0d0),zone=(1.0d0,0.0d0)
    external DZNRM2
    real*8 DZNRM2          
    complex*16               :: sum
!=======================================================================
   irow=nrplwv*nspin

   allocate(in_reci(nrplwv,nspin),out_reci(nrplwv,nspin))
   allocate(ctmpres(nrplwv,nspin))
   if (.not.allocated(residual)) allocate(residual(nrplwv,nspin))
   nplwv = ngx*ngy*ngz 
   allocate(out_density_reci(nplwv,nspin))
   allocate(cwork1(nplwv),in_density_reci(nplwv,nspin))

!  transform the charge densities to reciprocal space
!  the output charge density, "output_density"
   call real2reci(ngx,ngy,ngz,nspin,out_density,out_reci,i2wpad,nrplwv,  &
                  out_density=out_density_reci )
                  
!  the input charge density, "input_density"
   call real2reci(ngx,ngy,ngz,nspin,in_density,in_reci,i2wpad,nrplwv, &
                  out_density=in_density_reci)

!  linear mix of high fourier components 
!  in_density_reci(1:nplwv,1:nspin) = (1.0d0-alpha)*in_density_reci(1:nplwv,1:nspin) & 
!                                        +alpha*out_density_reci(1:nplwv,1:nspin)
! ------------------------------------------------------------------
! - Pulay Mixing using  QR factorization/ Steinn Gudmondson 
!
! after the 1st call, we can construct the residual charge density
  if (ichsto == 2) then
    residual(1:nrplwv,1:nspin) = in_reci(1:nrplwv,1:nspin)-out_reci(1:nrplwv,1:nspin)
  endif
  
  nmax=min(ichsto-2,noldrd-1)

! after the 2nd call, we can construct the change in the residual
  if (ichsto.ge.3) then
    do ispin = 1,nspin
      ctmpres(1:nrplwv,ispin)=(in_reci(1:nrplwv,ispin)-out_reci(1:nrplwv,ispin)-residual(1:nrplwv,ispin))  &
                     *dsqrt(1.0d0+onedivGsqr(1:nrplwv)*q1sq)
    enddo
    residual(1:nrplwv,1:nspin) = in_reci(1:nrplwv,1:nspin)-out_reci(1:nrplwv,1:nspin)

    if (ichsto.eq.3) then
!     Insert first column into the QR factorization of cdelr
      Rm(1,1)=dcmplx(DZNRM2(irow,ctmpres,1),0.0d0)
      call ZCOPY(irow,ctmpres,1,cdelr,1)
      call ZSCAL(irow,1.0d0/Rm(1,1),cdelr,1)
    else
      if (lcoldel) then
!       Delete a column from the QR factorization of cdelr
        info=0
        call ZDELC(cdelr,nrplwv*nspin,irow,noldrd,Rm, &
               noldrd,ientry,ctmpres,info)
        if (info .ne. 0) then
          write(nconso,*) 'ZDELC returned info=', info
          call uflush(nconso)
          call clexit(nconso)
         return
        end if
      end if ! coldel

!     Insert a new column into QR factorization of cdelr
!     Use 'cwork1' as workspace, size of required array is
!     2*nspin*nrplwv+2*noldrd+1 elements
      rcond=1.0d-6
!     Q = cdelr, LDQ = nrplwv*nspin, M = irow  , N = nmax
!     R = Rm   , LDR = noldrd      , K = ientry, V = ctmpres 
!     rcond = rcond, wk = cwork1
      
      call ZINSC(cdelr,nrplwv*nspin,irow,nmax,Rm, &
             noldrd,ientry,ctmpres,rcond,cwork1,info)
      if (info .ne. 0) then
        write(nconso,*) 'ZINSC returned info=', info
        call uflush(nconso)
        call clexit(nconso)
        return
      end if
     endif   ! ichsto.eq.3

!    Augment matrix A+=[A b]=[Q,q]=[R z ; 0 p]
!    Scale the right-hand-side (residual)
     do ispin = 1,nspin
       ctmpres(1:nrplwv,ispin)=residual(1:nrplwv,ispin)*dsqrt(1.d0+onedivGsqr(1:nrplwv)*q1sq)
     enddo
     rcond=1.0d-6
     call ZINSC(cdelr,nrplwv*nspin,irow,nmax+1,Rm, &
             noldrd,nmax+1,ctmpres,rcond,cwork1,info)
     if (info .ne. 0) then
         write(nconso,*) 'ZINSC 2 returned info=', info
         call uflush(nconso)
         call clexit(nconso)
         return    
     end if
     ientry=ientry+1
     if (ientry.gt.noldrd-1) then
        ientry=ientry-(noldrd-1)
        lcoldel=.true.
     end if

     tmpvec(1:nmax)=Rm(1:nmax,nmax+1)
     if (idebug.gt.0) write(nconso,*)  &
           'damden:tmpvec ',nmax,(tmpvec(j),j=1,nmax)

!    Backsubstitution to get the weights
     if (idebug>0) then 
       do i = 1,nmax
         do j = 1,nmax
           write(nconso,*) 'damden: Rm ',i,j,Rm(i,j)
         enddo
       enddo
     endif

!    N = nmax, NRHS = 1 , A = Rm, LDA = noldrd, B = tmpvec, 
!    LDB = noldrd
     call ZTRTRS('U','N','N',nmax,1,Rm,noldrd,tmpvec,noldrd,info)
     if (info.ne.0) then
         write(nconso,*) 'ZTRTRS failed, info=', info
         call uflush(nconso)
         call clexit(nconso)
     endif

!    Compute sum[alpha_i*dr_i] (equals cdelr[Rm*alpha])
!    and store in ctmpres
     do i=1, nmax
        h = dble(tmpvec(i))
        if (idebug.gt.0) then 
           write(nconso,100) 'damden: QR weights ',i,nmax,h
100        format(1x,a30,i2,i2,f16.9)
        endif
        tmpvec(i)=dcmplx(h,0.0d0)
        cwork1(i)=tmpvec(i)
     end do
     call ZTRMV('U','N','N',nmax,Rm,noldrd,cwork1,1)
     call ZGEMV('N',irow,nmax,zone,cdelr,nrplwv*nspin,  &
                cwork1,1,zzero,ctmpres,1)

!    Compute sum[alpha_i*dp_i] and store in cwork1
     call ZGEMV('N',irow,nmax,zone,cdelp,nrplwv*nspin,  &
                tmpvec,1,zzero,cwork1,1)

  endif ! if ichsto >= 3

! write out the size of the residual
  damconv=0.0d0
  if (ichsto.ge.2) then
     do ispin=1,nspin
        A00(ispin)=0.0d0
        do m=1,nrplwv
          A00(ispin)=A00(ispin)+(residual(m,ispin)*conjg(residual(m,ispin))) &
                      *(1.d0+onedivGsqr(m)*q1sq)
        enddo            
        damconv = damconv + A00(ispin)**2
     enddo
     damconv = dsqrt(damconv)
     write(nconso,3190) nmax,(A00(ispin),ispin=1,nspin)
3190 format(1x,'DAM:',i4,2f15.6)
  endif

! Perform the actual charge density mixing
! p_{k+1}=p_{k} + G*r_{k} + sum[alpha_i*(dp_i + Gdr_i)]
  if (ichsto.ge.2) then
    j = 1
    do ispin=1,nspin
      do nr=1,nrplwv
        m=i2wpad(nr)
        if (m>1) then 
          in_density_reci(m,ispin) = out_reci(nr,ispin)
          in_density_reci(m,ispin)=in_density_reci(m,ispin)+denmix(nr)*residual(nr,ispin) !+ out_reci(:,:)
!          if ((ichsto.ge.3).and.(number_mixing_iterations>20)) then
           if (ichsto.ge.3) then
             in_density_reci(m,ispin)=in_density_reci(m,ispin)-cwork1(j)  &
               -denmix(nr)*ctmpres(nr,ispin)/dsqrt(1.d0+onedivGsqr(nr)*q1sq)
          endif
        endif
        cdelp(j,ientry)=in_density_reci(m,ispin)-out_reci(nr,ispin)
        j = j + 1
      end do

!     Fourier transform the charge density back to real space
      call fft3d(in_density_reci(1,ispin),ngx,ngy,ngz,1)
      out_density(1:nplwv,ispin)=dble(in_density_reci(1:nplwv,ispin))

    end do
  else 
     out_density(1:nplwv,1:nspin) = in_density(1:nplwv,1:nspin)
  endif
  ichsto=ichsto+1

  deallocate(in_reci,out_reci)
  deallocate(ctmpres)
  call uflush(nconso)
  deallocate(cwork1) 

  deallocate(in_density_reci,out_density_reci)

 end subroutine Pulay


!#####################################################################
    subroutine GRPulay(in_density,out_density,ngx,ngy,ngz,nspin,damconv)
    use netcdfinterface
    use run_context
    use matrix_utilities
    implicit none
!===================================================================== 
!   GRPulay: 
!   Implementation of the GR-Pulay algorithm 
!   Chem. Phys. Lett. 325 (2000) 473-476 
! 
!     GRPulay(rho_n',rho_n'')
! 
!       R_n' = rho_n'' - rho_n'
!       density_set  :  { rho_n', rho_n,rho_n-1 .. }
!       residual_set :  R_n'  : { R_n, Rn-1 ..     }
!
!       make optimal combination that minimize residual
!       rho_n+1 = rho_opt  = a1 rho_n' + a2 rho_n + a3 rho_n-1
!       rho_n+1'= rho_opt' = a1 R_n'   + a2 R_n   + a3 Rn-1
! 
!       density_set  : remove rho_n'   
!       residual_set : remove R_n'
!
!       density_set  : rho_n+1',rho_n+1 : { rho_n,rho_n-1 .. }  
!       residual_set :          Rn+1    : { R_n, Rn-1 ..     } 
!
!       return rho_n' = rho_n+1'
!         
! 
!=====================================================================
    integer,intent(in)   :: ngx,ngy,ngz,nspin
    real*8,intent(in)    :: in_density (ngx*ngy*ngz,nspin)  
    real*8,intent(inout) :: out_density(ngx*ngy*ngz,nspin) 
    real*8, intent(out)  :: damconv
!--------------------------------------------------------------------- 
!   locals 
    complex*16,allocatable   :: in_reci(:,:),out_reci(:,:)
    complex*16,allocatable   :: dresidual(:,:),residual(:,:)
    complex*16,allocatable   :: Rnopt(:,:),n_opt(:,:),n_optm(:,:)
    complex*16,allocatable,save :: Rnm(:,:),n_opt_old(:,:),Rnopt_old(:,:)
    complex*16               :: tmpvec(noldrd)
    integer                  :: nmax,i,j,m,ispin,info,idebug=4,nplwv,nr
    logical                  :: lcoldel=.false.
    real*8                   :: rcond = 1.0d-6 
    real*8                   :: h,A00(nspin)
    complex*16,allocatable   :: cwork1(:),in_density_reci(:,:)
    complex*16               :: zzero = (0.0d0,0.0d0),zone=(1.0d0,0.0d0)
    complex*16               :: sum
    real*8                   :: DZNRM2          
    external                    DZNRM2
!=======================================================================
   allocate(in_reci(nrplwv,nspin),out_reci(nrplwv,nspin))
   allocate(dresidual(nrplwv,nspin),residual(nrplwv,nspin))
   if (.not.allocated(Rnm)) allocate(Rnm(nrplwv,nspin))
   if (.not.allocated(n_opt_old)) allocate(n_opt_old(nrplwv,nspin))
   if (.not.allocated(Rnopt_old)) allocate(Rnopt_old(nrplwv,nspin))
   allocate(Rnopt(nrplwv,nspin),n_opt(nrplwv,nspin),n_optm(nrplwv,nspin))
   nplwv = ngx*ngy*ngz 
   allocate(cwork1(nplwv),in_density_reci(nplwv,nspin))

!  transform the charge densities to reciprocal space
!  the output charge density, "output_density"
   call real2reci(ngx,ngy,ngz,nspin,out_density,out_reci,i2wpad,nrplwv)
                  

!  the input charge density, "input_density"
   call real2reci(ngx,ngy,ngz,nspin,in_density,in_reci,i2wpad,nrplwv, &
                  out_density=in_density_reci)


! ------------------------------------------------------------------
! make R_n' (Rnm) 
  if (ichsto==2) then
    Rnm(1:nrplwv,1:nspin) = in_reci(1:nrplwv,1:nspin)-out_reci(1:nrplwv,1:nspin)
  endif
  
  nmax=min(ichsto-2,noldrd-1)

! after the 2nd call, we can construct the change in the residual
  if (ichsto.ge.3) then
    do ispin = 1,nspin
      dresidual(1:nrplwv,ispin)=(in_reci(1:nrplwv,ispin)-out_reci(1:nrplwv,ispin)-Rnopt_old(1:nrplwv,ispin))  &
                     *dsqrt(1.0d0+onedivGsqr(1:nrplwv)*q1sq)
    enddo
    Rnm(1:nrplwv,1:nspin) = in_reci(1:nrplwv,1:nspin)-out_reci(1:nrplwv,1:nspin)

    do ispin = 1,nspin  
      residual(1:nrplwv,ispin) = Rnm(1:nrplwv,ispin)*dsqrt(1.0d0+onedivGsqr(1:nrplwv)*q1sq)
    enddo


    if (lcoldel) then
!      Delete a column from the QR factorization of cdelr
!      allways first column
!      info=0
!      call ZDELC(cdelr,nrplwv*nspin,nrplwv*nspin,nmax,Rm, &
!              noldrd,1,dresidual,info)
!      if (info .ne. 0) then
!          write(nconso,*) 'ZDELC 1 returned info=', info
!          call uflush(nconso); call clexit(nconso)
!          return
!      end if
!      move cdelgr 
!      do i = 1,nmax-1
!       cdelpgr(1:nrplwv,1:nspin,i) = cdelpgr(1:nrplwv,1:nspin,i+1)
!      enddo
     end if ! coldel
!    insert column to QR factorization: insert dresidual
     call insert_column_qr(cdelr,nrplwv*nspin,nrplwv*nspin,nmax,Rm,noldrd,ientry,dresidual)
     cdelpgr(1:nrplwv,1:nspin,ientry) = out_reci(1:nrplwv,1:nspin)-n_opt_old(1:nrplwv,1:nspin) 

!    Augment matrix A+=[A b]=[Q,q]=[R z ; 0 p]
!    Scale the right-hand-side (residual)
     do ispin = 1,nspin
       residual(1:nrplwv,ispin)=Rnm(1:nrplwv,ispin)*dsqrt(1.d0+onedivGsqr(1:nrplwv)*q1sq)
     enddo
     call insert_column_qr(cdelr,nrplwv*nspin,nrplwv*nspin,nmax+1,Rm,noldrd,nmax+1,residual)

     tmpvec(1:nmax)=Rm(1:nmax,nmax+1)

!    Backsubstitution to get the weights
!    N = nmax, NRHS = 1 , A = Rm, LDA = noldrd, B = tmpvec, 
!    LDB = noldrd
     call ZTRTRS('U','N','N',nmax,1,Rm,noldrd,tmpvec,noldrd,info)
     if (info.ne.0) then
         write(nconso,*) 'ZTRTRS failed, info=', info
         call uflush(nconso); call clexit(nconso)
     endif

     if (idebug.gt.0) then 
     do i=1, nmax
        write(nconso,123) 'tmpvec ',i,tmpvec(i)
123     format(1x,a20,i2,f14.8,1x,f14.8)
        h = dble(tmpvec(i))
        tmpvec(i)=dcmplx(h,0.0d0)
        cwork1(i)=tmpvec(i)
     end do
         write(nconso,100) 'damden: QR weights ',nmax,ientry,(dble(tmpvec(i)),i=1,nmax),& 
                            (DZNRM2(nrplwv*nspin,cdelpgr(:,:,i),1),i=1,nmax)
100      format(1x,a30,i2,i2,10(f14.9,1x))
     endif

!    Make rho_opt = rho_n + sum[alpha_i*dn]
     call ZGEMV('N',nrplwv*nspin,nmax,zone,cdelpgr,nrplwv*nspin,  &
                tmpvec,1,zzero,n_opt,1)
     n_opt(1:nrplwv,1:nspin) = -n_opt(1:nrplwv,1:nspin) + out_reci(1:nrplwv,1:nspin)


!    Make R[n_opt] = Rn + sum[alpha_i*dR_i]  (equals cdelr[Rm*alpha])
!    First compute sum[alpha_i*dR_i] and store in Rnopt
     call ZTRMV('U','N','N',nmax,Rm,noldrd,cwork1,1)
     call ZGEMV('N',nrplwv*nspin,nmax,zone,cdelr,nrplwv*nspin,  &
                cwork1,1,zzero,Rnopt,1)
     do ispin = 1,nspin
         Rnopt(1:nrplwv,ispin) = -Rnopt(1:nrplwv,ispin)/dsqrt(1.d0+onedivGsqr(1:nrplwv)*q1sq)+Rnm(1:nrplwv,ispin) 
     enddo

!    Make rho_opt' = R[n_opt] + rho_opt ; this will be output from this call of GRPulay
     n_optm(1:nrplwv,1:nspin) = n_opt(1:nrplwv,1:nspin) + Rnopt(1:nrplwv,1:nspin)
        
     cdelpgr(1:nrplwv,1:nspin,ientry) = n_opt(1:nrplwv,1:nspin) - n_opt_old(1:nrplwv,1:nspin) 
     n_opt_old(1:nrplwv,1:nspin) = n_opt(1:nrplwv,1:nspin)

     do ispin=1,nspin
       dresidual(1:nrplwv,ispin) = (Rnopt(1:nrplwv,ispin)-Rnopt_old(1:nrplwv,ispin))*dsqrt(1.d0+onedivGsqr(1:nrplwv)*q1sq)
     enddo

!    delete the nmax+1 column
!     info = 0
!     call ZDELC(cdelr,nrplwv*nspin,nrplwv*nspin,nmax+1,Rm, &
!               noldrd,nmax+1,dresidual,info)                    
!     if (info.ne.0) then 
!       write(nconso,*) 'GRPulay : ZDELC 2 info not zero ', info 
!       call uflush(nconso);  call clexit(nconso)          
!     endif

!    Delete column before inserting a new 
!    if (lcoldel) then 
!      info = 0
!      call ZDELC(cdelr,nrplwv*nspin,nrplwv*nspin,nmax,Rm, &
!              noldrd,ientry,dresidual,info)
!       if (info.ne.0) then 
!        write(nconso,*) 'GRPulay : ZDELC 3 info not zero ', info 
!        call uflush(nconso)
!        call clexit(nconso)          
!       endif
!    endif
     call insert_column_qr(cdelr,nrplwv*nspin,nrplwv*nspin,nmax,Rm,noldrd,ientry,dresidual)
     Rnopt_old(1:nrplwv,1:nspin)     = Rnopt(1:nrplwv,1:nspin)

     ientry=ientry+1
     if (ientry.gt.noldrd-1) then
        ientry=ientry-(noldrd-1)
        lcoldel=.true.
        ientry = noldrd-1
     end if

     do ispin=1,nspin
       dresidual(1:nrplwv,ispin) = (Rnopt(1:nrplwv,ispin))*dsqrt(1.d0+onedivGsqr(1:nrplwv)*q1sq)
     enddo

  endif  ! if ichsto >= 3

! write out the size of the residual
  damconv=0.0d0
  if (ichsto.ge.2) then
     do ispin=1,nspin
        A00(ispin)=0.0d0
        do m=1,nrplwv
          A00(ispin)=A00(ispin)+(Rnm(m,ispin)*conjg(Rnm(m,ispin))) &
                      *(1.d0+onedivGsqr(m)*q1sq)
        enddo            
        damconv = damconv + A00(ispin)**2
     enddo
     damconv = dsqrt(damconv)
     write(nconso,3190) nmax,(A00(ispin),ispin=1,nspin)
3190 format(1x,'DAM:',i4,2f15.6)
  endif

! Perform the actual charge density mixing
! p_{k+1}=p_{k} + G*r_{k} + sum[alpha_i*(dp_i + Gdr_i)]
  if (ichsto.ge.2) then
    do ispin=1,nspin
      do nr=1,nrplwv
        m=i2wpad(nr)
        if (ichsto.ge.3) then 
          in_density_reci(m,ispin) = n_optm(nr,ispin)
        else 
          in_density_reci(m,ispin) = out_reci(nr,ispin) + denmix(nr)*(in_reci(nr,ispin)-out_reci(nr,ispin))
        endif
      end do

!     Fourier transform the charge density back to real space
      call fft3d(in_density_reci(1,ispin),ngx,ngy,ngz,1)
      out_density(1:nplwv,ispin)=dble(in_density_reci(1:nplwv,ispin))

    end do
    if (ichsto.eq.2) then
      n_opt_old(1:nrplwv,1:nspin) = out_reci(1:nrplwv,1:nspin) ! + 0.1*(in_reci(:,:)-out_reci(:,:))
      Rnopt_old(1:nrplwv,1:nspin) = Rnm(1:nrplwv,1:nspin)
    endif
  else 
     out_density(1:nplwv,1:nspin) = in_density(1:nplwv,1:nspin)
  endif
  ichsto=ichsto+1
 
 end subroutine GRPulay

!=======================================================================
 subroutine density_mix_init(ngdens_max,ngx,ngy,ngz,nspin,   &
                             lpctx,lpcty,lpctz,dirdat,g2max,recc,ipwpadG, & 
                             lmastr)
!
! init routine, should be called before first call of Pulay
!
 use netcdfinterface
 use run_context
#ifdef PARAL
 use par_functions_module
#endif
 implicit none
!=====================================================================
  integer,intent(in)  :: ngdens_max,ngx,ngy,ngz,nspin
  integer,intent(in)  :: lpctx(ngx),lpcty(ngy),lpctz(ngz)
  real*8, intent(in)  :: dirdat(:),g2max,recc(3,3)
  integer,intent(in)  :: ipwpadG(0:3,ngdens_max)
  logical*4,intent(in):: lmastr
!---------------------------------------------------------------------
! locals
  real*8             :: dirmin,dirmax
  integer            :: m,nx,ny,nz,status,ncid,nOK
  real*8             :: gzx,gzy,gzz,gyx,gyy,gyz,gxx,gxy,gxz,gx,gy,gz,g2,amp 
  real*8             :: hsqdtm,g2max_density_mix
  real*8, allocatable:: tmpdirdat(:) 
  real*8             :: denmixA,maxamp 
  character*20       :: method
  parameter(hsqdtm=3.810033d0)      
  logical,save       :: init = .false.
  logical*4          :: kerker
  character*999 :: keyword      ! char buffer for scanning input file items
#include "ms.h"

  ientry=1
  ichsto=1     
  number_mixing_iterations = 1

  if (init) return ! only need to set ientry and ichsto

  write(nconso,*) 

  allocate(ipwpadG_densmix(0:3,ngdens_max))
  ipwpadG_densmix    = ipwpadG
  ngdens_max_densmix = ngdens_max 

!=======================================================================          
! Set noldrd (charge mixing history)    - default = 10       
! Set denmix (Pulay mixing coefficient) - default = 0.1
!=======================================================================
  if (lmastr) then 

  status =  nf_open(netCDF_input_filename, NF_NOWRITE, ncid)
  if (status /= nf_noerr) call abort_calc(nconso, &
                "density_mixing -> nf_open : error opening nc-file")                 

! ----------------------------------------------------------
! ChargeMixing%UpdateCharge
! ----------------------------------------------------------
  keyword = ""         ! clear buffer
  status = nfget(ncid, "ChargeMixing%UpdateCharge", keyword)
  if ((status == nfif_OK).and.(index(keyword,"No")  > 0)) then
       lnonselfconsistent = .true.                            ! for Harris type calculations
       write(nconso,400) "Performing a non self-consistent calculation (no charge update)"
       init = .true.
       return 
  else
       lnonselfconsistent = .false.                           ! default (self-consistent calculation)
       write(nconso,400) "Updating charge using Pulay mixing"
  endif
400  format(1x,'density_mixing: ',a)

  ! get method Pulay or GRPulay
  method = "" 
  status = nfget(ncid, "ChargeMixing%Method", method)
  if (status /= nfif_OK ) then
    density_mixing_method = 'Pulay'  ! default
    write(nconso,400) 'ChargeMixing method : using default method'
  else 
    if (method=='Pulay') then 
        density_mixing_method = 'Pulay'
    elseif (method=='GRPulay') then 
        density_mixing_method = 'GRPulay'
    else
      write(nconso,400) 'density_mixing: method not known: ',method  
      call abort_calc(nconso, "density_mixing -> unknown method")                 
    endif 
  endif 
  write(nconso,400) 'ChargeMixing method : ', density_mixing_method


  ! read max number of densities to be used (noldrd)
  status = nfget(ncid, "ChargeMixing%Pulay_MixingHistory", noldrd)
  if (status /= nfif_OK ) then
    noldrd = 10  ! default
  elseif (noldrd == nf_fill_int) then
    noldrd = 10  ! default
  endif 
  write(nconso,421) 'noldrd = ', noldrd,'(charge mixing history)'

  ! read density mixing coefficient (denmix) 
  status=nfget(ncid,"ChargeMixing%Pulay_DensityMixingCoeff",denmixA)
  if (status /= nfif_OK)  denmixA = 0.1         ! set default
  write(nconso,420) 'denmix = ', denmixA,'(Pulay mixing coefficient)'

  ! read number of harris iterations to be used before starting mixing the density
  status = nfget(ncid, "ChargeMixing%NumberHarrisIterations",number_harris_iterations)
  if (status /= nfif_OK)  number_harris_iterations=0      ! set default
  if (number_harris_iterations>0) then  
    write(nconso,421) 'Number of Harris iterations before start mixing the density = ', number_harris_iterations
    lnonselfconsistent = .true.
  endif

  ! read gvector_factor, using this one can contron how many gvectors should be included in the mixing 
  status = nfget(ncid, "ChargeMixing%GvectorFactor",gvector_factor)
  if (status /= nfif_OK)  gvector_factor=1.0d0     ! set default
  write(nconso,420) 'GvectorFactor', gvector_factor

!  check if we should use simple mixing using denmixA (read above) or Kerker mixing
   status=nfget(ncid,"ChargeMixing%Pulay_KerkerPrecondition",keyword)
   if (status == nfif_OK) then
      if (index(keyword,"Yes") > 0) then
        write(nconso,420) 'Using Kerker preconditioning'
        kerker = .true. 
      else
        kerker = .false.
        write(nconso,420) 'Using linear mixing (No Kerker preconditioning)'
      endif
   else
      kerker = .false.
      write(nconso,420) 'Using linear mixing (No Kerker preconditioning)'
   endif

   status = nf_close(ncid)

   endif ! lmastr

#ifdef PARAL
!  send noldrd,denmix,lnonselfconsistent,denmixA,kerker
   call mspack_integer_scalar (nconso,ANY, MSG_SETUP,&
                   INTEGER4, noldrd,  1, nOK)
   call mspack_double_scalar (nconso,ANY,MSG_SETUP,&
                   REAL8,  denmixA,  1, nOK)
   call mspack_logical_scalar (nconso, ANY, MSG_SETUP,&
                   LOGICAL4, lnonselfconsistent, 1, nOK)
   call mspack_logical_scalar (nconso, ANY, MSG_SETUP,&
                   LOGICAL4, kerker, 1, nOK)
#endif

   density_mixing_method = 'Pulay'

420  format(1x,'density_mixing: ',a,1x,f12.6,1x,a)
421  format(1x,'density_mixing: ',a,1x,i3,1x,a)
                                                                                          
!=======================================================================          
! find out how many states below the cut off for k=0
! define nrplwv and i2wpad
!=======================================================================          
   allocate(i2wpad(ngdens_max))
   allocate(tmpdirdat(ngdens_max))
   dirmin=1000.0d0
   dirmax=0.0d0
   nrplwv=0
   m = 1
   do nz=1,ngz
      gzx=recc(3,1)*lpctz(nz)
      gzy=recc(3,2)*lpctz(nz)
      gzz=recc(3,3)*lpctz(nz)
      do ny=1,ngy
         gyx=recc(2,1)*lpcty(ny)
         gyy=recc(2,2)*lpcty(ny)
         gyz=recc(2,3)*lpcty(ny)
         do nx=1,ngx
            gxx=recc(1,1)*lpctx(nx)
            gxy=recc(1,2)*lpctx(nx)
            gxz=recc(1,3)*lpctx(nx)
            gx=gxx+gyx+gzx
            gy=gxy+gyy+gzy
            gz=gxz+gyz+gzz
            g2=hsqdtm*(gx**2+gy**2+gz**2)
            g2max_density_mix = gvector_factor*g2max
            if ((g2.le.g2max).and.(g2.gt.0.00001d0)) then
                 nrplwv = nrplwv + 1
                 i2wpad(nrplwv)=m
                 if (g2.gt.0.00001d0) then 
                   tmpdirdat(nrplwv) = dirdat(m)
                   dirmin=min(dirmin,dirdat(m))
                   dirmax=max(dirmax,dirdat(m))
                 else
                   ! G=0  
                   tmpdirdat(nrplwv) = 1.0d0
                 endif
             
            endif
            m = m + 1
         enddo
      enddo     
   enddo  
!  The weighting factor (G^2 + Q1^2)/Q^2 ) is chosen so that the shortest
!  wavevector is weighted 20 times as strong as the longest. Max amplification
!  is however given by Gmax^2/Gmin^2, so
   amp = min(20.0d0,(dirmax/dirmin)-1.0d0)
   q1sq=(amp-1.0d0)/(dirmax-amp*dirmin)

!  allocate arrays over nrplwv,nspin and noldrd
   allocate(cdelr(nrplwv,nspin,noldrd))
   allocate(cdelp(nrplwv*nspin,noldrd))
   allocate(cdelpgr(nrplwv,nspin,noldrd))
   allocate(Rm(noldrd,noldrd))
   allocate(denmix(nrplwv))

   allocate(onedivGsqr(nrplwv))
   onedivGsqr(1:nrplwv) = tmpdirdat(1:nrplwv)
   deallocate(tmpdirdat)


!  check if we should use simple mixing using denmixA (read above) or Kerker mixing
   if (kerker) then 
       write(nconso,420) 'Using Kerker preconditioning'
       denmix(1:nrplwv) = 1.0d0/dsqrt(1.0d0 + onedivGsqr(1:nrplwv)*q1sq) * denmixA
   else
        denmix(1:nrplwv) = denmixA         ! set default to simple mixing
        write(nconso,420) 'Using linear mixing (No Kerker preconditioning)'
   endif

   write(nconso,1025) nrplwv,dirmin,dirmax,q1sq
   1025 format(1x,'DAM ',i8,3f15.6)
   call uflush(nconso)

   init = .true.

 end subroutine density_mix_init


!=========================================================================
 subroutine real2reci(ngx,ngy,ngz,nspin,real_density,reci_density, & 
                      i2wpad,nrplwv,out_density )
 implicit none
 integer, intent(in)         :: ngx,ngy,ngz,nspin
 real*8, intent(in)          :: real_density(:,:)  ! nplwv,nspin
 complex*16, intent(out)     :: reci_density(:,:)  ! nrplwv,nspin
 integer, intent(in)         :: i2wpad(:),nrplwv 
 complex*16,optional,target  :: out_density(:,:)   ! nplwv,nspin
! locals 
 real*8                         :: factor
 integer                        :: m,nplwv,ispin 
 complex*16, allocatable,target :: cwork(:,:)       ! nplwv
 complex*16, pointer            :: fftdens(:,:)
 complex*16                     :: sum 
!========================================================================= 

  nplwv = ngx*ngy*ngz
  factor=1.0d0/dble(nplwv)

  if (present(out_density)) then 
    fftdens => out_density
  else
    allocate(cwork(nplwv,nspin))
    fftdens => cwork
  endif

  do ispin = 1,nspin
    fftdens(1:nplwv,ispin)=real_density(1:nplwv,ispin)*factor
    call fft3d(fftdens(:,ispin),ngx,ngy,ngz,-1) 
    do m=1,nrplwv
      reci_density(m,ispin)=fftdens(i2wpad(m),ispin)
    enddo 
  enddo

  ! if (present(out_density)) then 
  !   out_density(:,:) = 0.0d0
  !   do ispin = 1,nspin
  !     do m=1,ngdens_max_densmix
  !       out_density(ipwpadG_densmix(0,m),ispin)=fftdens(ipwpadG_densmix(0,m),ispin)
  !     enddo                                                         
  !   enddo
  ! endif

  if (allocated(cwork)) deallocate(cwork)
  

end subroutine real2reci


!=========================================================================
 subroutine insert_column_qr(cdelr,LDQ,M,nmax,Rm,noldrd,ientry,dresidual)            
!     Insert a new column into QR factorization of A = cdelr*Rm
!     Insert between ientry-1:ientry of A:=cdelr*Rm, 
!     1<=ientry<=nmax
!     cdelr(ldq,nmax) 
!     Rm(noldrd,nmax) : (nmax-1)*(nmax-1)

!     LDQ = nrplwv*nspin, M = nrplwv*nspin
!=========================================================================
  use run_context
  implicit none
  complex*16           :: cdelr(:,:,:)   !  nrplwv,nspin,noldrd)
  integer              :: LDQ,M,nmax
  complex*16           :: Rm(:,:)        !  noldrd,noldrd
  integer              :: noldrd,ientry
  complex*16           :: dresidual(:,:) !  nrplwv,nspin

! locals
  real*8             :: rcond
  complex*16         :: cwork(2*ldq+2*noldrd+1)   ! automatic work array
  integer            :: info
  real*8             :: DZNRM2          
  external DZNRM2

  if (nmax.eq.1) then 
      ! initialize the QR factorization
      Rm(1,1)=dcmplx(DZNRM2(M,dresidual,1),0.0d0)
      call ZCOPY(M,dresidual,1,cdelr,1)
      call ZSCAL(M,1.0d0/Rm(1,1),cdelr,1)                           
  else

    ! rcond=1.0d-6
    rcond=0
    call ZINSC(cdelr,ldq,M,nmax,Rm,noldrd,ientry,dresidual,rcond,cwork,info)
    write(nconso,*) 'rcond = ', rcond,info
    if (info .ne. 0) then
      write(nconso,*) 'ZINSC returned info=', info
      call uflush(nconso)
      call clexit(nconso)
      return
    end if
  endif

 end subroutine insert_column_qr

end module density_mixing_module

