#include "definitions.h"

      subroutine libtos(iscxc, & 
! Array dimensions and constants/flags
     &     ngx,ngy,ngz,ngxs,ngys,ngzs,nions,nspec,nbands,nrplwv, & 
     &     nsymax, & 
     &     nrplwv_global, &  
     &     nspin,npwxc, nkpnum,nkprun, & 
     &     idebug,nplwv,nkpmem,npspts,mmaxx, & 
!          arrays initialized in dacapo.F & 
     &     ipwpad,dnlg,dnlkg,pwkine, & 
     &     numsym,nmstyp,dirnms,nmsfft,nmsfor, & 
     &     nkpibz,nkpunf,lkpinv, & 
     &     nplwkp,kspin,lpctx,lpcty,lpctz, & 
     &     vkpt,wtkpt,gkp123,rmove,rvelo,posion,nionsp,atomic_number, & 
     &     recc,reci,dirc,diri,ecut,ndcbyt,ldonkp, & 
     &     sphericalcnstr,icoordsystem & 
#ifdef PARAL
            ,lmastr,kphost,&
#include    PARAL_ARGS
#endif PARAL
           ,wffile,magic)

      use van_us_data_module
      use ewald
      use update_netcdf_history_module
      use run_context
      use us_hpsi_module
      use init_wave_function
      use module_rmm_diss, only : rmm_init
      use blochlmodule
      use Madelung_module
      use density_mixing_module 
      use stress_module
      use multicenter_projection_module
#ifdef PARAL
      use par_functions_module
#endif PARAL

      implicit none

      integer iscxc
      integer ngx,ngy,ngz,ngxs,ngys,ngzs,nions,nspec,nbands
      integer nrplwv,nsymax
      integer nrplwv_global
      integer nspin,npwxc, nkpnum, nkprun
      integer idebug,nplwv,nkpmem,npspts,mmaxx

      integer ipwpad(nrplwv_global,nkprun)
      real*8  dnlg  (nrplwv_global,3,nkprun)
      real*8  dnlkg (nrplwv_global,0:3,nkprun)   
      real*8  pwkine(nrplwv_global,nkprun)     
     
      integer numsym,nmstyp(*)
      real*8 dirnms(3,3,*)
! nmsfft(r,ns): symmetry operation indices
      integer nmsfft(nplwv,*)
      integer nmsfor(nions,*)
! nkpibz: The k-point within the IBZ that represent this k-point
      integer nkpibz(nkpnum)
! nkpunf: The point group operation, that brings this k-point into the IBZ
      integer nkpunf(3,3,nkpnum)
! lkpinv: True if the nkpunf has been added the inversion without this
!         being present in the space group
      logical*4 lkpinv(nkpnum)
! gkp123: Coordinates of the k-point in G1,G2,G3-units
      real*8 gkp123(3,nkpnum)
      real*8 vkpt  (3,nkprun)
      real*8 wtkpt   (nkprun)

      integer nplwkp(nkprun)
      integer kspin(nkprun)

      integer lpctx(ngx),lpcty(ngy),lpctz(ngz)
      real*8 posion(3,nions,nspec)
      real*8 rmove (3,nions)     
      real*8 rvelo (3,nions)

      integer nionsp(nspec)
      integer atomic_number(nspec)

!     spherical atoms in ionmov 
      integer sphericalcnstr(2),icoordsystem

      real*8 dirc(3,3),recc(3,3),diri(3,3),reci(3,3)

      character*(*) wffile

      logical*4 ldonkp(nkprun)
      integer  ndcbyt,magic


#ifdef PARAL
      integer kphost(nkprun),wfdim
#include PARAL_DECL
#endif PARAL


      integer icharg(nspec)
      integer nlnum(nspec)
      integer mmax(nspec)
      integer icharc(nions)

      integer ibacwf
      integer noffs0(nkprun)

      real*8 volc,voli
      real*8 wfkine(nbands,nkprun)
! rdensr(r): real space charge density
      real*8, allocatable  :: rdensr(:,:)
! dirdat(g): 1/g**2 array
      real*8, allocatable  :: dirdat(:)
! rdnsum(.): work array
      real*8, allocatable  ::  rdnsum(:,:)
      real*8, allocatable  ::  vxc(:,:)
      real*8, allocatable  ::  dnlg0(:,:)

      real*8 sizham  (nkprun)
      real*8 deriv   (nkprun)

      real*8 eigen(nbands,nkprun)
      real*8 eigold(nbands,nkprun)

      real*8 rmass (nspec)
      real*8 rdamp (nspec)
      real*8 pscore(nspec)
      real*8 psgmax(nspec)
      real*8 rlog  (nspec)
      real*8 psp(npspts,nspec)
      real*8 pscale(0:2,nspec)

!     rho_rad : fourier transform of partialcore charge
      real*8 rho_rad(npspts,nspec)
!     rho_core: partialcore charge on realspace grid
      real*8, allocatable  ::  rho_core(:)

      real*8 phir2v(mmaxx)
      real*8 radius(mmaxx,nspec)
      real*8 phiatm(mmaxx,0:2,nspec)
      real*8 vion  (mmaxx,0:2)

      real*8 forwrk(3,nions)
      real*8 eifor(3,nions,nspec)
      real*8 ewifor(3,nions,nspec)
      real*8 fnleif(3,nions,nspec)
      real*8 fortot(3,nions,nspec)
      real*8 ffield(3,nions,nspec)
      real*8 dirion(3,nions*nspec)
      real*8 dipion(3,nions*nspec)

      real*8 occ   (nbands,nkprun)
      real*8 vnl   (nbands,nkprun)

      real*8 vext  (ngz)

! cvion(r):  real space local ionic potential
      complex*16, allocatable :: cvion(:)
! cveff(r):     real space Kohn-Sham (effective one-electron)-potential
      complex*16, allocatable :: cveff(:,:)
      complex*16, allocatable :: cveff_soft(:,:)
! cvhartree(r):     real space hartree-potential
      complex*16, allocatable :: cvhartree(:,:)
      complex*16, allocatable :: deltav(:)

      logical*4 lkpnew(nkprun)
      logical*4 redmem
      logical*4 lstowf
      logical*4 ldelin
      logical*4 lcharged
      integer   ieigsolver

      character*64 wfsetu

! Arrays of system independent dimensions
      integer nxc
      parameter (nxc=2000)
      real*8 excdat(nxc),xcfdat(nxc),xcpdat(nxc)
      real*8 timer(MAXTIMERS)

      real*8 avebox(3,4)

! Local arrays

! plane wave coefficients
      complex*WF_PRECISION, allocatable :: cptwfp(:,:,:)

!     work array for use in call of calcxc and included in H_TIMES_B_ARGS
      complex*16, allocatable :: cwork1(:)

! Local variables

!  variables belonging to the analysis part of libtos

      integer nener ! defined in setuop 
      integer ncut 
      parameter(ncut=2)
      real*8, allocatable ::  adostmp(:,:,:,:,:) ! nener,9,nions,ncut,nspin
      real*8, allocatable ::  aoldos(:,:,:,:,:)  ! nener,9,nions,ncut,nspin

      real*8 overls  (nions,ncut,nkprun)
      real*8 overlp(3,nions,ncut,nkprun)
      real*8 overld(5,nions,ncut,nkprun)

      real*8  vnlband 
      real*8, allocatable :: work1(:)
      complex*16 cdum, csum
      real*8 tolerance, resmax
      real*8 dummy, wkintmp
      real*8 enpot,  enkin, entot,enewa,envnl,enxc,enxcc,envion
      real*8 enxcpbe,enxcrevpbe,enxcrpbe, enraw
      real*8 ukfactor,uk0,wtkpt1(nkprun)
      real*8, allocatable :: entot_all_xc(:),exc_and_corr_all(:,:)
      real*8 enhac, enfdum, efedum, etota1, etota2, enxcgg, eneig
      real*8 enxcpz,enxvwn,exdum, ediff
      real*8 enion, enxcln, efermi, etota0
      real*8 enfrec, entrpy, enacc1, enemin, extpot, enemax
      real*8 encons, endipc, enalp
      real*8 ocvar, ocvold, omix, occmix
      real*8 stmcen,stmwid, wangul,t,forvar,forsha,forall,sizeforce
      real*8 width
      real*8 zdip, deltim, uppcpu
      real*8 dip0, extfie, dipmix
      logical*4 lworkp
      logical*4 langul
#include "etime.h"
      real*8 thisit,previt
      logical*4 lstop,lionmv,lfermi,lmastr
      real*8  nelect
      integer nionch,  nichsq,  nextend
      integer idipol,iwait,iion
      integer numovl,  nxytot
      integer nitend, niterm, nbspas, nkspas, ispati
      integer nsp,  niter,  ndiapb
      integer ifactor, smethod
      real*8 fallmax,fvarmax,fchgmax,damconv
      real*8 energy_convergence,density_convergence
      real*8 occupation_convergence
      integer inudged,repeated_convergences,concrit_one,concrit_two
      integer i, j, n, nn, m, mu, nat, nkp, nb, nkpeff, nnkp,icheck
      real*8  detot,alroot,temp1,temp2,enhaq,rinplw
      real*8  ex,ex2(2)
      real*8  ensemble_energies(5)
      integer k
      logical*4 lharri
      logical*4 luns2s
      logical*4 ldenrd
      real*8 g2max
      integer nmov
      integer nkpdum
      integer iversi,mplwv,mspin
      integer nplwk0
      logical*4 lspsi
      logical*4 lmagmo,lxcpot,foundallwf,found
      real*8    rmagmo
      real*8    ecut,rcutoff
      integer   ireset
      integer   ispin,offset,myrank,irank,ni,natom
      character*4 txttot,txtdft
      real*8 sumcvion

! lupdch,ki,nl true: chdop should update: charge dens.,kin. and nl-terms
! lupdsu       true: update kin and nl sums
! lupdio       true: update local ionic potential
! lupdks       true: update effective (local) one-electron potential
! lupdew       true: update ewald (ion-ion) energy

      logical*4 lupdch,lupdki,lupdnl,lupdsu,lupdio,lupdks,lupdew
      logical*4 charge_density_initialized
      real*8   vmadl(nions,nions),ew_force(3,nions)

      data lstop,lionmv/.false.,.true./
      data lupdch,lupdki,lupdnl/.true.,.true.,.true./
      data lupdsu,lupdio,lupdks/.true.,.true.,.true./
      data lupdew              /.true./
      data nmov /0/

      concrit_one = 0
      concrit_two = 0

! =========================================================
! Inactive variables : occmix (not initialized in setuop any more)
! =========================================================


! =========================================================
!     allocate global arrays for wave-function, densities and 
!     potentials, plus some work arrays 

      allocate(rdensr(nplwv,nspin),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,'rdensr') 

      allocate(dirdat(nplwv),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,'dirdat') 

      allocate( rdnsum(nplwv,nspin),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,'rdnsum ') 

      allocate(vxc(nplwv,nspin),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,'vxc') 

      allocate( dnlg0(nplwv,3),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,'dnlg0 ') 

      allocate(rho_core(nplwv),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,'rho_core ') 

      allocate( cvion(nplwv),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,' cvion ') 

      allocate(cveff(nplwv,nspin),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,'cveff ') 

      allocate(cveff_soft(ngxs*ngys*ngzs,nspin),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,'cveff_soft ') 

      allocate(cvhartree(nplwv,nspin),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,'cvhartree ')

      allocate(deltav(nplwv),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,'deltav ')
      deltav(:)=0.d0

      allocate(cwork1(nplwv),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,'cwork1 ') 

      allocate(work1(nplwv),STAT=icheck)
      call libtos_allocate_error(nconso,icheck,'work1 ') 

! =========================================================
!     allocate room for non-selfconsistent total energies and
!     for exchange and correlation energies
      allocate(entot_all_xc(6),exc_and_corr_all(6,2))
! =========================================================

! =========================================================
!     allocate module data for van_us_data_module 
      call allocate_van_us_data(nplwv,nrplwv_global,nions, &
            nspec,nbands,nkprun )
! =========================================================

#ifdef SERIAL
!     (The serial code and the parallel master task do similar things)
      lmastr = .TRUE.
#endif SERIAL
!     Wavefunction setup file:
      wfsetu = 'wf_setup'

! Reset all timers
      timer = 0.0d0
      call uttime(time)

      if (magic .ne. 12345678) then
        write(nconso,*) 'wrong magic value = ', magic
        write(nconso,*) 'probably the argument list is bad'
        call clexit (nconso)
      endif

!     Initialize
      etota0=0.0d0
      etota1=0.0d0
      enacc1=0.0020d0
      enfrec=0.0d0
      entrpy=0.0d0
      efermi=0.0d0
      lfermi=.true.
      eneig=0.0d0
      ediff=1.0d10
      resmax = -1.0d10
      tolerance = 1.0d-4
      ieigsolver = 2  ! eigsolver as default
      ldenrd = .false.
      damconv = 0.0d0
      envnl = 0.0d0
      enion = 0.0d0
      enkin = 0.0d0
      efedum = 0.0d0
      enfdum = 0.0d0
      sizeforce = 100.0d0
      iwait = 3
      eifor(:,:,:) = 0.0d0
      ffield(:,:,:) = 0.0d0
      fortot(:,:,:)= 0.0d0
      forsha = 0.0d0 
      forvar = 0.0d0
      occ(:,:) = 0.0d0
      becp(:,:,:) = 0.0d0
      vnl         = 0.0d0


!     find volume
      call bastr(dirc,recc,volc)
      call bastr(diri,reci,voli)            

      if (lmastr) then

! ---- Read in setup values ----

       call setuop(&
       nrplwv,nrplwv_global,nkprun,nbands,ngx,ngy,ngz,nplwv,&
       nions,nspin,nspec,nionsp, volc,&
       nlnum,mmax,mmaxx,npspts,rlog,radius,phiatm,rho_rad,&
       pscore, psgmax, psp, &
       vext, idipol, zdip,dipmix,dip0, extfie, extpot, lworkp,&
       width,smethod,enemin,enemax,nener,rcutoff, &
       stmcen,stmwid,&
       iion,inudged,fallmax,fvarmax,fchgmax,energy_convergence, & 
       density_convergence,occupation_convergence,repeated_convergences,&
       rmass,rdamp,noffs0,rmagmo,lspsi,ieigsolver,lcharged,ireset,&
       icharc, icharg,  nelect, deltim,atomic_number,uppcpu,&
       ispati,nbspas,nkspas,langul,&
       wangul,avebox,ibacwf,lstowf,ldelin,&
       niter,ndiapb)                           

      endif

      write(nconso,1858) WF_PRECISION
      call uflush(nconso)
1858  format(' WFG: wavefunction array type = complex*', i2)

#ifdef PARAL

!     Multicast setuop data from master to all slaves
      call mstuop (nconso, kphost, nkprun,&
            nlnum,mmax,rlog,radius,phiatm,vion,phir2v,&
            mmaxx,&
            dirc, diri, vext,ffield,&
            ecut, newpts,&
            idipol, icharc, icharg, &
            lpctx,&
            lpcty, lpctz, nelect,nionch,nichsq,&
            ngx,ngy,ngz,nionsp,nions,&
            nmsfft, nplwv, npspts, nspec,nsymax, numsym,nmstyp,&
            posion, pscale, pscore,psgmax, psp, rho_rad,&
            recc, reci,&
            vkpt, volc, width, wtkpt,&
            occmix,&
            gkp123,smethod,nkpibz,nkpunf,lkpinv,nkpnum,&
            zdip,stmcen,stmwid,&
            ispati,dip0,extfie,dipmix,nbspas,nkspas,lworkp,langul,&
            wangul,extpot,enemin,enemax,nener,rcutoff,avebox,kspin,niter,ndiapb,&
            ireset, lspsi, ieigsolver,&
            nrplwv,nrplwv_global,nbands,&
            output_wavefunction_to_netcdf,&
            output_chargedensity_to_netcdf,&
            output_totalstress_to_netcdf,&
            output_effpotential_to_netcdf,&
            output_elspotential_to_netcdf)
 
      if (.not. lmastr) then
!       The slaves do not backup or store wavefunctions
        ibacwf = 0
        lstowf = .false.
      endif
#endif PARAL


! calculate the contribution to the total energy from the non-coulomb
! part of the g=0 component of the pseudopotentials and the force on the
! unit cell due to the change in this energy as the size of the cell
! changes

      enalp=0.0d0
      do  800 nsp=1,nspec
        enalp=enalp+pscore(nsp)*nionsp(nsp)
  800 continue
      enalp=enalp*nelect/volc
      endipc=0.0d0
      encons=enalp+endipc

! initialize velocities (k-point dependent, lifted up from setuop) /OHN

      do 4110 nkp=1,nkprun
!       Set k-point status to "new" (i.e., uninitialized)
        lkpnew(nkp)=.true.
!       serial code must do all k-points
        do 4100 nb=1,nbands
          eigen(nb,nkp)=1000.0d0
          wfkine(nb,nkp)=0.0d0
 4100   continue
 4110 continue

!     setup look-up tables for ex-corr energy and force

      call xcdat(excdat, xcfdat, xcpdat, nxc)

! setup dirdat

      call inidir(ngx,ngy,ngz,nplwv,recc,volc,lpctx,lpcty,lpctz,dirdat)

!     setup parameters for charge density mixing
      g2max=ecut
      call density_mix_init(ngdens_max,ngx,ngy,ngz,nspin,&
                            lpctx,lpcty,lpctz,dirdat,g2max,recc,&
                            ipwpadG,lmastr)             


      charge_density_initialized=.false.
!     try reading a density from netcdf file 
      call density_get(rdensr,ngx,ngy,ngz,nspin,found,lmastr) 
      if (found) charge_density_initialized=.true.  

#ifdef PARAL
!     Send flag charge_density_initialized from master to all slaves
      call msflag (nconso, charge_density_initialized, lupdch, &
                   lupdio, lupdki, lupdnl,lionmv)
#endif PARAL

! ------------------------------------------------------------------------
! Setup the initial wavefunctions

!       allocate memory for wavefunctions 
        allocate(cptwfp(nrplwv,nbands,nkpmem),STAT=icheck)
        call libtos_allocate_error(nconso,icheck,'cptwfp')

        foundallwf = .true.
        do nkp =1,nkprun

          call geteff(nkp,nkpmem,nconso,nkpeff)
          if (foundallwf) then
              call getwf_netcdf(nkp,nbands,nrplwv,nrplwv_global,&
               nplwkp(nkp),nkprun,nspin,lkpnew(nkp),kspin,ndcbyt,nkpmem,&
               cptwfp,ldonkp,lmastr&
#ifdef PARAL
              ,kphost,&
#              include PARAL_ARGS
#endif PARAL
              ,timer)                

              write(nconso,*) 'lkpnew = ',lkpnew(nkp)
              if (lkpnew(nkp)) foundallwf = .false.
          endif ! if foundall

        enddo   ! nkp = 1,nkprun

        if (foundallwf) then 
!         read density
          call density_get(rdensr,ngx,ngy,ngz,nspin,found,lmastr) 
!         at present ChargeDensity must be present then 
!         wavefunction are present 
          if (.not.found) then 
           call abort_calc(nconso,'Could not read ChargeDensity')
          endif
        endif

!       If not all wavefunctions found initialize from scratch
!       and if parallel do broadcast to all nodes
        if (.not.foundallwf) then

!        Calculate the local potential and the partialcore
         call potion(ngx,ngy,ngz,nions,nionsp,nspec,nplwv,posion,&
          lpctx,lpcty,lpctz,npspts,recc,volc,psp,psgmax,icharg,cvion)
         call extadd(ngx,ngy,ngz,cvion,vext,1.0d0)

!        get partialcore charge on realspace grid
         call partialcore(ngx,ngy,ngz,nions,nionsp,nspec,nplwv,posion,&
          lpctx,lpcty,lpctz,npspts,recc,volc,rho_rad,psgmax,rho_core,&
           mmax,cwork1 )

        do nkp =1,nkprun
           if (lgenpp) then
!             calc vkb for all k-points
               if (ldonkp(nkp)) then
!                Calculate the beta(G) (vkb) for this k-point
                 call cal_vkb (nplwv,nrplwv,nrplwv_global,        &
                    nplwkp(nkp),nkp,nions, nspec, nionsp,nkpmem,  &
                    nbands,nkprun,dnlg(1,1,nkp),posion,dirc,volc, &
                    ngxs,ngys,ngzs,ecut,idebug,recc               &
#ifdef PARAL
                    , & 
#include PARAL_ARGS
#endif
                    )
               endif
           endif

           if (ldonkp(nkp)) then
            call geteff(nkp,nkpmem,nconso,nkpeff)
!           get initial wavefunctions from LCAO method and write them to disk
            call wfinit(ndcbyt,recc,&
              nplwkp(nkp),&
              nelect,occ,&
              mmaxx,nlnum,mmax,rlog,radius,rdensr, &
              charge_density_initialized,&
              phiatm,phir2v,wtkpt,width,eigen,lmastr,&
              cptwfp,&
#             include "potential_args.h"
#             include "apply_h_args.h"
              ,timer,nconso  )

           endif
           lkpnew(nkp) = .false. 
         enddo  ! nkp


        endif  ! found all

!      initial set of wavefunctions has now been written to disk. 
!      or is all present in the cptwfp if nkpmem=nkprun 

#ifdef PARAL 
!      check that eigsolve is used if par_pw_np > 1 
       if ((par_pw_np.gt.1).and.(ieigsolver<2)) then 
         write(nconso,*)  'PAR:  Using eigsolve (par_pw_np>1)'
         ieigsolver = 2
       endif
#endif

! Remember the setup time
      call uttime(time)
      timer(TSETUP)=timer(TSETUP)+time(1)


      thisit = 0.0d0

! -----------------------------------------------------------------------
! this is the main loop of the program during which one complete step of
! the electron dynamics is performed
! -----------------------------------------------------------------------

      do 1000 n=1,niter


! Keep track of how long time an iteration takes
         call uttime(time)
         previt=thisit
         thisit=-time(1)
         if (.not. lmastr) goto 2090

! See if program-execution is requested to stop 
! 0 : stop after this iteration without analysis 
! 1 : stop after this iteration with the analysis 
! 2 : turn of the ionic dynamic (ions = 0) 

         if (n.eq.1) then
            nitend=niter
            open(7,FILE=stop_file_name)
            rewind 7
            write(7,*) ' '
            close(7)
         else if (.not. lstop) then
            open(7,FILE=stop_file_name)
            rewind 7
            read(7,*,err=998,end=998) niterm
            rewind 7
            write(7,*) ' '
            if (niterm.eq.0) then 
              write(nconso,*) 'No analysis is made before stopping' 
              langul = .false.
              ispati =  0 
              numovl = 0 
              nitend = 1
            elseif (niterm.eq.2) then 
              write(nconso,*) 'New iion: 0'
              iion = 0 
            else 
              write(nconso,*) 'New niter: ',niterm
              nitend=niterm
            endif
 998        continue
            close(7)
            if (n.gt.nitend) lstop=.true.

! see if we have time for 2.00 more iterations (the last bit is for the
! analysis)
            if (uppcpu.gt.0.0d0.and.&
                uppcpu*3600.lt.-thisit+2.00d0*previt) then
               lstop=.true.
               write(nconso,*) 'Running out of time. ',&
                    'No more iterations taken. End niter: ',n-1
            endif
         endif

2090     continue

#ifdef PARAL
!        Send flags from master to all slaves
         call msflag (nconso, lstop, lupdch, lupdio, lupdki, lupdnl,&
           lionmv)

         if (lupdio) then
!          Multicast updated ionic positions to all slaves
           call msposi (nconso, posion, nions, nspec)
         endif
#endif PARAL

! now that also the slaves (if running in parallel) know about lionmv:
         if (n.ne.1.and.lionmv) then
            nmov=n-1
         endif

         if (lstop) goto 999

! Output that new iteration started

    
         write(nconso,2100) n
2100     format ( 66("-"),/,20(">"),1x,"MAIN LOOP ITERATION ", i4,&
               1x,20("<"),/,66("-") )
         call uflush(nconso)

! calculate ion-ion energy and force contributions

         if (lupdew) then
            call uttime(time)
            timer(TEWALD)=timer(TEWALD)-time(1)

           call ewald_madl(posion,volc,nions,recc,&
            dirc,nspec,nionsp,ngx,ngy,ngz,lpctx,lpcty,lpctz,&
            vmadl,ew_force,enewa,nconso,icharge=icharc)


            lupdew=.false.
            call uttime(time)
            timer(TEWALD)=timer(TEWALD)+time(1)

!           use new ewald routine
            natom = 1
            do nsp = 1,nspec
              do ni = 1,nionsp(nsp)
                ewifor(:,ni,nsp) = ew_force(:,natom)
                natom = natom + 1
              enddo
            enddo                                                           

            lupdew=.false.
         endif

         lharri=.true.

         call inidir(ngx,ngy,ngz,nplwv,recc,volc,lpctx,&
                     lpcty,lpctz,dirdat)      

!        Damped-density algorithm

!        Eigenvalue tolerance
 
         if (resmax .gt. 1.0d-5 .and. nextend .eq. 0) then
!          Too large maximum residual, yet no states were updated.
!          In order to prevent stagnation, we reduce the tolerance
           tolerance = 0.5 * min (tolerance, resmax)
           write (nconso,*) 'libtos: nextend.eq.0, setting tolerance=',&
             tolerance
         endif

!        Algorithm: tolerance should not exceed ediff (Etotal difference)
!        but small tolerance needed only when close to selfconsistency
!        (when ediff is small).
         if (abs(ediff) .lt. 0.1) then
!          If resmax < tolerance, then decrease tolerance.
           tolerance = min (tolerance, abs(ediff), 0.5*abs(resmax))
         endif

!        A tolerance of 1.0d-5 is the smallest that should be used,
!        since we cannot converge eigenvectors any better.
         tolerance = max (tolerance, 1.0d-5)

         if (idebug.gt.0) then 
           write (nconso,2101) entot, ediff, tolerance, resmax
2101       format (' libtos: entot, ediff, tol, resmax=', &
             f15.5,g15.5,g13.6,g13.6)
         endif

!        Count no. of subspace extensions, and maximum residual found
         nextend = 0
         resmax = -1.0d10
         call damden(&
#          include H_TIMES_B_ARGS
! Array dimensions and constants/flags
           ,nsymax,nkpnum,&
           idebug,mmaxx,&
! integer
           nmsfft,nmstyp,icharg,nlnum,mmax,&
           nkpibz,nkpunf,nplwkp,lpctx,lpcty,&
           lpctz,ndcbyt,ireset,iscxc,&
! double precision
           tolerance, resmax, nextend,&
           wfkine,rdensr,rho_rad,rho_core,dirdat,rdnsum,&
           vxc,dnlg0,&
           gkp123,vkpt,wtkpt,sizham,deriv,eigen,&
           eigold,rlog,psp,phir2v,radius,phiatm,&
           forwrk,fnleif,ffield,occ,vnl,&
           vext,&
! complex*WF_PRECISION
           cptwfp,&
! complex*16
           cvion,&
! logical
           lkpinv,lkpnew,ldonkp,ldelin, lstop,&
! character
           wffile,&
           wfsetu,timer,dip0,extfie,dipmix,endipc,&
           zdip,enalp,enpot,encons,lworkp,extpot,idipol,width,occmix,&
           enfrec,entrpy,smethod,eneig,ndiapb,enion,entot,&
           enkin,envnl,enewa,lmastr,numsym,ocvar,&
           enxcc,enhac,nxc,excdat,xcfdat,xcpdat,enxc,&
           n,lupdks,lupdio,lupdch,lupdsu,lupdki,lupdnl,nconso,&
           recc,ecut,nelect,efermi,reci,&
           g2max,nplwk0,luns2s,&
           ldenrd,noffs0,damconv,lspsi,rmagmo,&
           magic)

         if (.not. lmastr) goto 2610

! calculate the free-energy correction due to the finite electroni temp.
! (first iteration only)

      if (lfermi .and. n.eq.1) then
         call eferop(nbands,nkprun,occ,wtkpt,eigen,efermi,&
              width,0.0d0,enfrec,entrpy,smethod,nconso,nelect,ocvar,nspin)
      endif

      call sumvnl(eneig,nkprun,nbands,eigen,wtkpt,occ)

      call sumpot(enpot,nplwv*nspin,rdensr,cveff,nspin)
      ! call sumpot(envion,nplwv*nspin,rdensr,cvion,nspin)
      call sumkin(enkin,nkprun,nbands,wfkine,wtkpt,occ)
      call sumvnl(envnl,nkprun,nbands,vnl,wtkpt,occ)
      if (lharri) then
         entot=eneig+            enhac+enxcc+enewa+encons
      else
         entot=enkin+envnl+enpot+enhac+enxcc+enewa+encons
      endif

      call uttime(time)
      timer(TNONSC)=timer(TNONSC)-time(1)

!     get the total energy without the selfconsistent 
!     exchange-corelation energy 
      if (iscxc.eq.1) then 
!       calculate enxc because it is not returned correctly 
!       from call of calcxc then lxcpot = true
        lxcpot = .false.
        call calcxc(1,lxcpot,nspin,nconso,rdensr,rho_core,&
         volc,recc,lpctx,lpcty,lpctz,&
         dummy,enxc,vxc,nxc,excdat,xcfdat,xcpdat,&
         cwork1,ngx,ngy,ngz,nplwv,dnlg0,npwxc,timer )
      endif

      enraw = entot + enfrec - enxc

!----------------------------------------------------------------------
! Calculate the non self consistent exchange-correlation energies

! Get the non self consistent Perdew Zunger LDA
      lxcpot = .false.
      call calcxc(1,lxcpot,nspin,nconso,rdensr,rho_core,&
        volc,recc,lpctx,lpcty,lpctz,&
        dummy,enxcpz,ex,vxc,nxc,excdat,xcfdat,xcpdat,&
        cwork1,ngx,ngy,ngz,nplwv,dnlg0,npwxc,timer)
      exc_and_corr_all(1,:) = 0.0d0   ! not implemented for PZ LDA

! Get the non self consistent LDA and GGA exchange correlation energies
! Note that for lxcpot=false the VWN exchange energy is returned in 
! the exchange-corr. correction energy.
      if ((iscxc.eq.2).or.(iscxc.eq.3).or.(damconv.lt.0.25))&
       call calcxc(2,lxcpot,nspin,nconso,rdensr,rho_core,&
        volc,recc,lpctx,lpcty,lpctz,&
        enxvwn,enxcgg,ex2,vxc,nxc,excdat,xcfdat,xcpdat,&
        cwork1,ngx,ngy,ngz,nplwv,dnlg0,npwxc,timer)
      exc_and_corr_all(2,1) = ex2(1)
      exc_and_corr_all(2,2) = enxvwn-ex2(1)
      exc_and_corr_all(3,1) = ex2(2)
      exc_and_corr_all(3,2) = enxcgg-ex2(2)

! Get the non self consistent PBE
      if ((iscxc.eq.4).or.(damconv.lt.0.25))&
       call calcxc(4,lxcpot,nspin,nconso,rdensr,rho_core,&
        volc,recc,lpctx,lpcty,lpctz,&
        dummy,enxcpbe,ex,vxc,nxc,excdat,xcfdat,xcpdat,&
        cwork1,ngx,ngy,ngz,nplwv,dnlg0,npwxc,timer)
      exc_and_corr_all(4,1) = ex
      exc_and_corr_all(4,2) = enxcpbe-ex

! Get the non self consistent revPBE
      if ((iscxc.eq.5).or.(damconv.lt.0.25))&
       call calcxc(5,lxcpot,nspin,nconso,rdensr,rho_core,&
        volc,recc,lpctx,lpcty,lpctz,&
        dummy,enxcrevpbe,ex,vxc,nxc,excdat,xcfdat,xcpdat,&
        cwork1,ngx,ngy,ngz,nplwv,dnlg0,npwxc,timer)
      exc_and_corr_all(5,1) = ex
      exc_and_corr_all(5,2) = enxcrevpbe-ex

! Get the non self consistent RPBE98
      if ((iscxc.eq.6).or.(damconv.lt.0.25))&
       call calcxc(6,lxcpot,nspin,nconso,rdensr,rho_core,&
        volc,recc,lpctx,lpcty,lpctz,&
        dummy,enxcrpbe,ex,vxc,nxc,excdat,xcfdat,xcpdat,&
        cwork1,ngx,ngy,ngz,nplwv,dnlg0,npwxc,timer)
      exc_and_corr_all(6,1) = ex
      exc_and_corr_all(6,2) = enxcrpbe-ex

      call uttime(time)
      timer(TNONSC)=timer(TNONSC)+time(1)

      if (n.eq.1) then
         write(nconso,2607)'TR0',&
          '      E(free) ','      E(kin)  ','      E(pot)  ',&
          '      E(hac)  ','      E(xcc)  ','      E(vnl)  ',&
          '      E(ewa)  ','      E(alp)  ','      E(eig)  ',&
          '      E(ext)  ','      E(zero) ',&
          '      E(tot)  ','      E(dipc) '
         write(nconso,2607)'EXC',&
          '     Exc_pz  ','     Exc_gga ','     Exc_vwn  ',&
          '     Exc_pbe ','   Exc_revPBE','   Exc_RPBE' 
 2607    format(1x,a3,17a13)
      endif
      write(nconso,2605) 'TR1',entot+entrpy,enkin,&
           enpot,enhac,enxcc,envnl,enewa,enalp,eneig,&
           enion,entot+enfrec,entot,endipc
      write(nconso,2605) 'EXC',enxcpz,enxcgg,enxvwn,enxcpbe,&
                               enxcrevpbe,enxcrpbe
 2605 format(1x,a3,17f14.6)

!-------------------------------------------------------------------------
      call uttime(time)
      if (n.eq.1) then
         write(nconso,*)&
         'TOT:  CPU time                  Total energy'
         write(nconso,*)&
         'TOT:                   LDA            GGA            LDA'
         write(nconso,*)&
         'TOT:             PerdewZunger   PerdewWang91      VosWilNus'
         if (iscxc.eq.1) then
            write(nconso,*)&
         'TOT:                 selfcons   non-selfcons   non-selfcons'
         else if (iscxc.eq.2) then
            write(nconso,*)&
         'TOT:  CPU time   non-selfcons   non-selfcons       selfcons'
         else if (iscxc.eq.3) then
            write(nconso,*)&
         'TOT:  CPU time   non-selfcons       selfcons   non-selfcons'
         else
            write(nconso,*)&
         'TOT:  CPU time   non-selfcons   non-selfcons   non-selfcons'
         endif
         write(nconso,*)&
         'TOT:   seconds          eV             eV             eV'
         write(nconso,*)&
         'DFT:  CPU time                           Total energy'
         write(nconso,4505)&
         'DFT:                 LDA            GGA-II           PBE   ',&
         '      revPBE          RPBE98       '
         write(nconso,4505)&
         'DFT:             VoskoWilkNus   PerdewWang91   PerdewBurkeE',&
         '   PBE_kap=1.245  PBE_0.804exp   '
         if (iscxc.eq.2) then
            write(nconso,4505)&
         'DFT:  CPU time       selfcons   non-selfcons   non-selfcons',&
         '   non-selfcons   non-selfcons'
         else if (iscxc.eq.3) then
            write(nconso,4505)&
         'DFT:  CPU time   non-selfcons       selfcons   non-selfcons',&
         '   non-selfcons   non-selfcons'
         else if (iscxc.eq.4) then
            write(nconso,4505)&
         'DFT:  CPU time   non-selfcons   non-selfcons       selfcons',&
         '   non-selfcons   non-selfcons'
         else if (iscxc.eq.5) then
            write(nconso,4505)&
         'DFT:  CPU time   non-selfcons   non-selfcons   non-selfcons',&
         '       selfcons   non-selfcons'
         else if (iscxc.eq.6) then
            write(nconso,4505)&
         'DFT:  CPU time   non-selfcons   non-selfcons   non-selfcons',&
         '   non-selfcons      selfcons' 
         else
            write(nconso,4505)&
         'DFT:  CPU time   non-selfcons   non-selfcons   non-selfcons',&
         '   non-selfcons   non-selfcons'
         endif
         write(nconso,4505)&
         'DFT:   seconds          eV             eV             eV   ',&
         '          eV            eV   '
      endif

 4500    format(1x,a4,f10.1,6f15.4)
 4505    format(1x,a,a)
 4510          format(1x,a4,f10.1,'               ',2f15.4)

!---------------------------------------------------------------------------------------
!
      if (abs(rmagmo).le.1.d-10) then
          lmagmo=.false.
      else
          lmagmo=.true.
      endif

      txttot='TOT:'
      if (lmagmo) txttot='TOTm'
      if (lnonselfconsistent) txttot='TOTn'
      txtdft='DFT:'
      if (lmagmo) txtdft='DFTm'
      if (lnonselfconsistent) txtdft='DFTn'
      entot_all_xc(1)=enraw+enxcpz
      entot_all_xc(2)=enraw+enxvwn
      entot_all_xc(3)=enraw+enxcgg
      entot_all_xc(4)=enraw+enxcpbe
      entot_all_xc(5)=enraw+enxcrevpbe
      entot_all_xc(6)=enraw+enxcrpbe

      if (nspin.eq.1) then
         write(nconso,4500) txttot,time(1)+time(2),&
            entot_all_xc(1),entot_all_xc(3),entot_all_xc(2)
      else
         write(nconso,4510) txttot,time(1)+time(2),&
                     entot_all_xc(3),entot_all_xc(2)
      endif
      write(nconso,4500) txtdft,time(1)+time(2),&
       entot_all_xc(2),entot_all_xc(3),entot_all_xc(4),&
       entot_all_xc(5),entot_all_xc(6)

!---------------------------------------------------------------------------------------

      etota2=etota1
      etota1=etota0
      etota0=entot
      call uflush(nconso)
      call uttime(time)
      timer(TLFORC)=timer(TLFORC)-time(1)

      if (damconv.lt.0.25) then 

!     tidyup must be call to recalculate vxc, used in forloc,
!     overwritten in  non self. consistent call to pw
      call tidyup(nplwv,npwxc,rdensr,rho_core,&
               enhac,dirdat,cveff,volc,enxcc,&
               enxc,excdat,xcfdat,xcpdat,nxc,cvion,&
               recc,lpctx,lpcty,lpctz,vxc,dnlg0,cwork1,&
               iscxc,nspin,ngx,ngy,ngz,ngxs,ngys,ngzs,&
               cveff_soft,deltav,&
               ngdens_soft,nrplwv,ipwpadG_soft,nconso,idebug,timer)               

! symfor: symmetrize nl-forces
! forloc: calculate forces from local part of pseudopotential, pseudo core, 
!         and also calculated correction to force when using the 
!         Harris functional.
! iforce: sum to total forces and print out

      call symfor(nconso,nmsfor,dirnms,fnleif,nionsp,forwrk,nions,&
           numsym,nmstyp,nspec)
      call symfor(nconso,nmsfor,dirnms,ewifor,nionsp,forwrk,nions,&
           numsym,nmstyp,nspec)
      call symfor(nconso,nmsfor,dirnms,ffield,nionsp,forwrk,nions,&
           numsym,nmstyp,nspec)
      call forloc(ngx,ngy,ngz,nions,nplwv,posion,rdensr,rdnsum,&
           recc,eifor,lpctx,lpcty,lpctz,lpctx,lpcty,lpctz,&
           nspec,nionsp,psp,rho_rad,vxc, psgmax,icharg,npspts,&
           volc,nspin)
      call symfor(nconso,nmsfor,dirnms,eifor,nionsp,forwrk,nions,&
           numsym,nmstyp,nspec)

      endif 

      if (n.eq.1) then
         write(nconso,*) 'FOR:  Ion    F_x      F_y      F_z'
         write(nconso,*) 'FOR:   #     eV/A     eV/A     eV/A'
      endif
      call iforce(fortot,eifor,ewifor,fnleif,nions,nionsp,nspec,&
           nconso,0,idebug,ffield,rmove,forvar,forsha,forall)
      call uttime(time)
      timer(TLFORC)=timer(TLFORC)+time(1)

      if (iion.eq.0) then
           sizeforce = 0
           forvar = 0
           forsha = 0
      endif

      if (       ocvar.lt.occupation_convergence & 
           .and. abs(etota0-etota1).lt.energy_convergence & 
           .and. damconv.lt.density_convergence &
           .and. sizeforce.lt.fallmax &
           .and. forvar.lt.fvarmax &
           .and. forsha.lt.fchgmax ) then
         concrit_one = concrit_one + 1
      else
         concrit_one = 0
      endif

      if (concrit_one.ge.repeated_convergences) then
         if (lnonselfconsistent) then
            lstop = .true.
            write(nconso,*) 'TOTn:  Non selfconsistent run converged'
         else
            if (iion.eq.0) then
!              stop at the beginning of the next iteration
               write(nconso,4600) abs(etota0-etota1)
 4600          format(1x,'TOT:      stopping',f12.6)
               lstop=.true.
            elseif (iion==50) then
!              external relaxation convergence control for (iion==50)
               lstop=.false.
            else
!              stop at the beginning of the next iteration
               write(nconso,4601) sizeforce,entot+enfrec
 4601          format(1x,'STEP     stopping',f12.6,1x,f13.6)
               lstop=.true.
            endif
         endif
      endif

      if (n.eq.1) then
         write(nconso,*) 'convergence:    '
         write(nconso,*) 'convergence:                Change in'

         if (.not.lnonselfconsistent) then 
           write(nconso,*) 'convergence:       Density       Occup.     Energy    |F|      dF/F    |Abs. force|'
         else
           write(nconso,*) 'convergence:    Eigen Value      Occup.     Energy    |F|      dF/F    |Abs. force|'
         endif
      endif
      write(nconso,2621)' convergence:  Damp',damconv,ocvar,(etota0-etota1),forvar,forsha,forall

 2621 format(a13,1x,2f13.6,f16.9,3f8.3)
      call uflush(nconso)

! integrate the equations of motion for the ions
! iion   > 1 if ion-positions are relaxed
      if (.not.lstop.and.iion.ge.1 .and. (.not.lnonselfconsistent)&
          .and. n.ge.iwait .and. n.le.niter-5 .and.&
          forvar.lt.fvarmax.and. forsha.lt.fchgmax.and.&
          (.not.lharri.or.damconv.le.density_convergence)) then
          concrit_two = concrit_two + 1
      else
          concrit_two = 0
      endif

      if (concrit_two.ge.repeated_convergences) then
          iwait=n+3

! update the netCDF history (always for dynamic calcs.)
          call update_netcdf_history(nions,nspec,nkprun,&
               nbands,nionsp,kspin,nspin,dirc,posion,efermi,&
               entot+enfrec,entot+entrpy,rvelo,fortot,eigen,occ,entot_all_xc, &
               exc_and_corr_all) 
                                   
! -----> end of this time step <-----

          call ionmov(posion,fortot,rvelo,entot+enfrec,sizeforce,&
                      fallmax,lionmv,&
#                     include "ions_args.h"
                      idebug,nconso)                   

! update local and non-local terms plus the ewald contributions,
! if ions were moved (lionmv was set by ionmov)

          if (lionmv) then
             lupdio=.true.
             lupdnl=.true.
             lupdew=.true.
             rmm_init = .true.   ! becp overlab should be recal. in setup_bhb_bsb
             do nkp = 1,nkprun
               linitvkb(nkp) = .true.
             enddo
          endif                  ! if (lionmv)

          concrit_one = 0        ! This line ought to be unnecessary ?
          concrit_two = 0
         
      endif                      ! if (.not.lstop ...)

! update charge density and sums

      lupdch=.true.
      lupdsu=.true.

! Calculate Magnetic moment for a spin-polarized calculations 
      if (nspin.eq.2) then 
          call magnetic(nconso,ngx,ngy,ngz,nspin,nplwv,volc,dirc,&
                        rdensr,nspec,nionsp,nions,posion)
      endif

2610  continue

#ifdef PARAL
!     Multicast efermi, eigenvalues and occupancies to all slaves
      call mseigo (nconso, efermi, eigen, eigold, occ,&
        nbands, nkprun)
!        Send flags from master to all slaves
         call msflag(nconso,charge_density_initialized,lupdch,lupdio, &
                     lupdki,lupdnl,lionmv)
#endif PARAL

       if (lupdio) rmm_init = .true.

! Keep track of how long time an iteration takes
         call uttime(time)
         thisit=thisit+time(1)

! this is the end of the loop for one electronic update
 1000 continue

  999 continue

      call density_put(rdensr,rho_core,ngx,ngy,ngz,nspin,lmastr)                ! Write densities to netcdf file.
      call potential_put(cveff,ngx,ngy,ngz,nspin,lmastr)                        ! Write Effective potential to NetCDF.
      call elspot(nplwv,cwork1,rdensr,ngx,ngy,ngz,dirdat,cvhartree, & 
                  cvion,nspin)                                                  ! Calculate the electrostatic potential.
      call els_potential_put(cvhartree,ngx,ngy,ngz,nspin,lmastr)                ! Write electrostatic potential to NetCDF (including ions and dipole moment).

! ---------------- ANALYSIS PART OF CODE -------------------------------

      write (nconso,*) 'ANALYSIS PART OF CODE'
      call uflush (nconso)

! ----------------------------------------------------------------------
! calculate stress
! ----------------------------------------------------------------------
      if (output_totalstress_to_netcdf) then 
      call uttime(time)
      timer(TSTRESS)=timer(TSTRESS)-time(1)
      call kinetic_stress(cptwfp, nplwkp,occ,wtkpt,ndcbyt,ldonkp,&
#include "apply_h_args.h"
                          ,timer   )

      call hartree_stress(rdensr,nspin,ngx,ngy,ngz,dnlg0, &
                          -2.0d0*enhac,volc)

!     exchange-correlation stress
      cal_xc_gga_stress = .true.
      call calcxc(iscxc,.true.,nspin,nconso,rdensr,rho_core,&
        volc,recc,lpctx,lpcty,lpctz,&
        enxvwn,enxcgg,ex2,vxc,nxc,excdat,xcfdat,xcpdat,&
        cwork1,ngx,ngy,ngz,nplwv,dnlg0,npwxc,timer)

      call xc_stress(rdensr,vxc,enxc,nspin,ngx,ngy,ngz,dnlg0,volc, & 
                        rho_core, nions,nspec,nionsp, posion, dirc )
      cal_xc_gga_stress = .false.


      sumcvion = dble(sum(cvion(1:nplwv)))
      cvion(1:nplwv) = cvion(1:nplwv) - sumcvion
      call localpsp_stress(rdensr,nspin,ngx,ngy,ngz,nions,nspec,nionsp, &
                           volc,posion,dirc,cvion,dnlg0,enalp)


      call ewald_madl(posion,volc,nions,recc, &
            dirc,nspec,nionsp,ngx,ngy,ngz,lpctx,lpcty,lpctz, &
            vmadl,ew_force,enewa,nconso,icharge=icharc, &
            calculate_stress=.true.)

      call nonlocal_stress(cptwfp,nplwkp,ndcbyt,ldonkp, &
#include                  "apply_h_args.h"
                          ,occ,wtkpt,eigen,timer )

      call us_stress(&
                 nplwv,nrplwv,nrplwv_global,nions,nspec, &
                 nionsp,nbands, nspin,nkprun,kspin,wtkpt,ldonkp, &
                 posion,dirc,volc, cveff, &
                 ngx,ngy,ngz, &
                 nconso & 
#ifdef PARAL
                 ,&
#include PARAL_ARGS
#endif
                 ,timer)

      call get_total_stress(lmastr, nconso)
      call uttime(time)
      timer(TSTRESS)=timer(TSTRESS)+time(1)
      endif  ! if output_totalstress_to_netcdf
! ------------------------------------------------------------------------


      write(nconso,*) 'EIG   Nb  Nkpt   Eigen value      Occupation '
      write(nconso,2500)&
           ((i,j,eigen(i,j)-efermi,occ(i,j),i=1,nbands),j=1,nkprun)
 2500 format(1x,'EIG',2i4,'  ',2f16.10)   

      ispati = 0 
#ifdef PARAL
      if (par_pw_np.gt.1) then 
!       can not do atomic and multicenter projection yet for 
!       plane wave par. 
        langul = .false.   
        write(nconso,*) 'AO_LDOS: Cannot calculation Atom Projected DOS for '
        write(nconso,*) 'AO_LDOS: planewave parallel calculations'

        numovl = 0 
      endif
#endif PARAL 
        

      if (.not. lmastr) goto 1020

! Allways update the netCDF history 
! Will dublicate last iteration for dynamic calculation which are 
! converged
     call update_netcdf_history(nions,nspec,nkprun,nbands,&
         nionsp,kspin,nspin,dirc,posion,efermi,entot+enfrec,entot+entrpy,&
         rvelo,fortot,eigen,occ,entot_all_xc,&
         exc_and_corr_all) 

! in a dynamic calculation output the real space positions 
      if (iion.gt.0) &
          call write_realpos(nconso,nspec,nions,nionsp,&
                     atomic_number,posion,dirc,rmove,rvelo) 

! Display the x-y averaged charge density, electrostatic, and
! effective potential

!     if (niter.gt.0)
!    &  call chdave(nconso,ngx,ngy,ngz,cveff,rdensr,
!    &      rho_core,dirc,nplwv,npwxc,volc,enxcc,enxc,
!    &      excdat,xcfdat,xcpdat,nxc,efermi,
!    &      recc,lpctx,lpcty,lpctz,vxc,dnlg0,cwork1,
!    &      iscxc,nspin,dirdat,cvion,idebug,timer)


! Now for a final k-point loop, where some analysis is done and the
! the requested properties are calculated and printed out

1020  call uttime(time)
      timer(TANALYS)=timer(TANALYS)-time(1)

!     First: zero the vxc array, which must be zero for summing
!     up during the k-point loop.
      vxc = dcmplx(0.0d0,0.0d0)

      if (langul) then 
         allocate(adostmp(nener,9,nions,ncut,nspin))
         allocate(aoldos(nener,9,nions,ncut,nspin))
         aoldos = 0.0d0
         overls = 0.0d0; overlp = 0.0d0 ; overld = 0.0d0
      endif 


      do 1200 nkp=1,nkprun
        if (.not. ldonkp(nkp)) goto 1200

! Get the wave functions for the nkp'th k-point
         call  wfswap_get(cptwfp,ndcbyt,nkpeff,&
#include                 "apply_h_args.h"
                         )

! Derive the single center (atomic) orbital density of states

         if (langul) then
           call angula(cptwfp(1,1,nkpeff),nplwkp(nkp),&
                       overls(1,1,nkp),overlp(1,1,1,nkp),&
                       overld(1,1,1,nkp),adostmp,&
                       nlnum,mmaxx,mmax,&
                       rlog,radius,phiatm,phir2v,&
                       efermi,recc,wtkpt,cvion,&
                       occ(1,nkp),eigen(1,nkp),nkpnum,nkpibz,nkpunf,&
                       lkpinv,ncut,nener,wangul,enemin,enemax,ecut,rcutoff,&
#                      include "apply_h_args.h"
                       ,timer,nconso)

!          Sum up the total local density of states in aoldos
           call add (aoldos(1,1,1,1,kspin(nkp)), adostmp, &
                     ncut*nions*9*nener)
         endif

! Derive the multi center (molecular) orbital density of states
         call mulcen(nplwkp(nkp), cptwfp(1,1,nkpeff),mmaxx,nconso,&
                nlnum,mmax,rlog,radius,phiatm,phir2v,&
                occ(1,nkp),eigen(1,nkp),efermi,recc,&
                nkpnum,nkpibz,nkpunf,lkpinv,kspin(nkp),ecut,&
#include       "apply_h_args.h"
               ,timer)


#ifdef SERIAL
! Only for the serial version:
! Write out the wave functions for the nkp'th k-point.
! Store wavefunctions to permanent (netCDF) file 

!        output wf file to permanent file
         call netcdf_write_wf(nkprun,nspin,nbands,nrplwv,&
               kspin,nkp,cptwfp(1,1,nkpeff))
#endif SERIAL

 1200 continue

#ifdef PARAL
!     Write the slaves' wavefunctions to disk,
!     now that the master's wavefunction is not needed any longer.
!     (Note: lstowf is always false on the slaves).

      if (niter .gt. 0) then
        write (nconso,*) 'PAR: Gathering slave wavefunctions on master'
        call uflush (nconso)

!       setup dimension for work array in write_wf
        wfdim = 1
        if (par_pw_np.gt.1) wfdim = nrplwv_global

        do nkp = 1, nkprun

! Write the wave functions for the nkp'th k-point

           call write_wf(&
              cptwfp,ndcbyt,&
               wfdim, kphost,ldonkp,lmastr,nplwkp,&
#             include "apply_h_args.h"
              ,timer,nconso )                               
 

        enddo   
      endif 
#endif PARAL

! Superfluous: setup dirdat once more, since it was used as a workspace
! in calls to doszen (above).  Just in case the program
! is changed later, we don't want to create a mess:
      call inidir(ngx,ngy,ngz,nplwv,recc,volc,lpctx,lpcty,lpctz,dirdat)

! Print out results from loop over k-points

! Print the single center (atomic) orbital density of states

      if (langul) then
#ifdef PARAL
!       Slaves send partial values back to master.
!       Master receives and sums up contributions.
!       Here, we use smdens() for a different array.
!       We can use mssum() in stead, if work-arrays are large enough.
!       Barrier synchronization (send flags: a dummy action) is
!       done in between the calls of smdens to avoid mixing
!       of the various packages sent from the slaves
        call smdens (nconso, aoldos, cwork1,ncut*nions*9*nener*nspin)
        call msflag (nconso, lstop, lupdch, lupdio, lupdki, lupdnl,&
           lionmv)
        call smdens (nconso, overls, cwork1,1*nions*ncut*nkprun)
        call msflag (nconso, lstop, lupdch, lupdio, lupdki, lupdnl,&
           lionmv)
        call smdens (nconso, overlp, cwork1,3*nions*ncut*nkprun)
        call msflag (nconso, lstop, lupdch, lupdio, lupdki, lupdnl,&
           lionmv)
        call smdens (nconso, overld,cwork1, 5*nions*ncut*nkprun)
        call msflag (nconso, lstop, lupdch, lupdio, lupdki, lupdnl,&
           lionmv)
#endif PARAL
        if (lmastr)&
          call anguwr (kspin, ncut, nkprun, nions, nener,&
           enemin, enemax, overls, overlp, &
           overld, aoldos,nspin)
      endif

! Print the multi center (molecular) orbital density of states
      call mulcwr(lmastr)

#ifdef PARAL
!       setup blocking send
        call msflag (nconso, lstop, lupdch, lupdio, lupdki, lupdnl,&
           lionmv)
#endif PARAL

!      write becp overlab to netcdf file
       call netcdf_write_becp(ldonkp,lmastr,&
#                   include "apply_h_args.h"
                            )                       

       call wannier(lmastr,vkpt,&
#include            "apply_h_args.h"
                    ,timer)

!     Get the ensemble exchange correlation energies
      ensemble_energies(1) = enxcpbe
      do iscxc=7,10
       call calcxc(iscxc,lxcpot,nspin,nconso,rdensr,rho_core,&
        volc,recc,lpctx,lpcty,lpctz,&
        dummy,enxc,ex,vxc,nxc,excdat,xcfdat,xcpdat,&
        cwork1,ngx,ngy,ngz,nplwv,dnlg0,npwxc,timer)
        ensemble_energies(iscxc-5) = enxc
      enddo
      call netcdf_write_ensemble_energies(ensemble_energies,lmastr)

      call uttime(time)

! ----------------------------------------------------------------

      timer(TANALYS)=timer(TANALYS)+time(1)

! -----------------------------------------------------------------------

      nitend = n - 1
      if (n .le. 1) nitend = 1
      write(nconso,*) 'TIM: Number of iterations = ', nitend
      call uflush(nconso)

!     Write out the timing info
      call print_timer (nconso, timer, nitend)

#ifdef PARAL
      do irank = 1,par_np
!       get rank for this process
        call par_rank_world (myrank,&
#include PARAL_ARGS
        ,nconso)
        if ((myrank.eq.irank).and.(.not.lmastr)) then 
             call mssendok(nconso,irank-1)
        endif
        if ((myrank.ne.irank).and.(lmastr)) then 
             call mssendok(nconso,irank-1)
        endif
      enddo
      call wait_all(nconso)
#endif PARAL
!     Close the output file (important for PARMACS slaves)
      call clexit (nconso)


      return
      end

!=======================================================================
      subroutine libprt

      use run_context
      use ewald
      use kp_analysis 
      use g_vectors

      write(nconso, 400 )
      
      call utdate(nconso)
      call uthost(nconso)

      write(nconso, 500)&
       '==============================================================',&
       '                                                              ',&
       ' Welcome to:                                                  ',&
       '                                                              ',&
       '                         D A C A P O                          ',&
       '                                                              ',&
       ' The plane wave - pseudopotential program                     ',&
       '                                                              ',&
       ' Version:                                                     ',&
         "   "//revision_date                                          ,&
         "   "//revision_number                                        ,&
       '                                                              ',&
       '==============================================================',&
       '                                                              ',&
       '  Recent contributors:                                        ',&
       '                                                              ',&
       '  2002-  J. Rossmeisl   (Electrostatic Decoupling)            ',&
       '  1999-  A. Christensen (Fortran90 modularization,            ',&
       '                         netCDF interface )                   ',&
       '  1999-  T. Bligaard    (Fortran90)                           ',&
       '  1996   Y. Morikawa    (Constrained dynamics)                ',&
       '  1996   A.C.E.Madsen   (MD min)                              ',&
       '  1996-  L.B.Hansen     (core corr.)                          ',&
       '  1996   L.Bengtsson    (fast selfconsis. occ.s,              ',&
       '                         power expansion method)              ',&
       '  1995   J.J.Mortensen  (selfconsis. GGA)                     ',&
       '  1995-  O.H.Nielsen    (parallellization + opt.)             ',&
       '  1990-  B.Hammer                                             ',&
       '                                                              '

      write(nconso, 400 )
      write(nconso, *) ' '

 400  format( 1x, 64("&") )
 500  format( 50(1x, "&", 1x, a60, 1x, "&", /) )

  
      write(nconso,*)' '

      call report_FILE_IO()

      call uflush(nconso)

      return
      end

      subroutine add (array1, array2, n)

!     Add the real*8 array2 to array1
!     (Fortran-90: array1 = array1 + array2)

      real*8 array1(n), array2(n)
      integer n
#ifdef ESSL
      call dvea (n, array1, 1, array2, 1, array1, 1)
#else 
      integer i
 
      do 100 i = 1, n
        array1(i) = array1(i) + array2(i)
100   continue
#endif ESSL
      return
      end

      subroutine sub (array1, array2, n)
 
!     Subtract the real*8 array2 from array1
!     (Fortran-90: array1 = array1 - array2)
 
      real*8 array1(n), array2(n)
      integer n
#ifdef ESSL
      call dves (n, array1, 1, array2, 1, array1, 1)
#else
      integer i
 
      do 100 i = 1, n
        array1(i) = array1(i) - array2(i)
100   continue
#endif ESSL
      return
      end

      subroutine geteff(nkp,nkpmem,nconso,nkpeff)
! Decompose the nkp into a file-index and an effective k-point index

      implicit none

      integer nkp,nkpmem,nconso,nkpeff
      integer nfil

      nfil  =int((nkp-1)/nkpmem)+1
      nkpeff=mod(nkp-1,nkpmem)+1
      if (nkp.ne.(nfil-1)*nkpmem+nkpeff) then
         write(nconso,*)'Unexpected in libtos calculating nkpeff'
         call clexit(nconso)
      endif
      return
      end

! ===========================================================
      subroutine clexit (idum)

      use run_context

!     Exit the program gracefully 
!     remove idum, when nconso is removed from arg lists


      integer idum  
      logical yesno

#ifdef PARAL
! --- inclusion below is only to access lmaster in "ms.h"
#include "ms.h"      
      if (lmaster.and.netCDF_copy_to_final_file) &
        call system_opsys_interface('mv '//&
                       trim(netCDF_output_filename)//" "// &
                       trim(netCDF_output_final_filename))
#else
      if (netCDF_copy_to_final_file) &
        call system_opsys_interface('mv '//&
                       trim(netCDF_output_filename)//" "// &
                       trim(netCDF_output_final_filename))
#endif PARAL


      write(nconso,*) 'clexit: exiting the program'
      call uflush(nconso)


#ifdef PARAL
      call msexit (nconso)
#endif PARAL


!     remove swap files - slightly dirty check
!                         make sure filenames are nonempty

      yesno = .false.
      if (len_trim(qrad_swap_filename) > 0) &
       inquire(file=trim(qrad_swap_filename), exist=yesno)
      if (yesno) call system_opsys_interface(&
                      'rm '//trim(qrad_swap_filename))

      yesno = .false.
      if (len_trim(wf_swap_filename) > 0) &
       inquire(file=trim(wf_swap_filename), exist=yesno)
      if (yesno) call system_opsys_interface(&
                      'rm '//trim(wf_swap_filename))

      yesno = .false.
      if (len_trim(vkb_swap_filename) > 0) &
       inquire(file=trim(vkb_swap_filename), exist=yesno)
      if (yesno) call system_opsys_interface(&
                      'rm '//trim(vkb_swap_filename) )    

#ifdef VPP500
!     For debugging on VPP-500, when output is incomplete:
!     Write dummy output in order to flush I/O buffer
!      do  100 i = 1, 100
!        write (nconso,110) i
!110     format ('DUMMY', 70x, i3)
!100   continue
#endif VPP500

!     Close the output file
!      close (nconso)

! When running in external mode, tell python that dacapo is ready to die:
      if (ExternalIonMotion_script.ne."") then
         write(nconso,*) 'clexit: sending signal to python and waiting '
         call uflush(nconso)
         call system_opsys_interface(trim(ExternalIonMotion_script))
         call system_opsys_interface('sleep 10')
      endif

      call exit(0)
      return
      end

!========================================================
      subroutine abort_calc(iunit, message)
! -------------------------------------------------------
!    invoke clexit, after writing message 
! -------------------------------------------------------
      implicit none
      integer         iunit
      character*(*)   message
      
      write(iunit,*) "abort_calc: ", trim(adjustl(message))
      call clexit(iunit)

      end subroutine abort_calc


!========================================================
      subroutine libtos_allocate_error(iunit,icheck, message)
! -------------------------------------------------------
!     check icheck from allocate call, and invoke clexit 
!     if icheck > 0
! -------------------------------------------------------
      implicit none
      integer         iunit,icheck
      character*(*)   message

      if (icheck.gt.0) then  
        write(iunit,*) 'libtos: allocate error. variable : ',message,&
           'error = ',icheck

        call clexit(iunit)
      endif
 
      end subroutine libtos_allocate_error
                                        

      subroutine print_timer (nconso, timer, nitend)

!     Write out the timing info

      use module_rmm_diss
      implicit none
      real*8 timer(*)
      integer nconso, nitend

!     Local vars
#include "etime.h"
      real*8 t

      call uttime(time)
      timer(TTOTAL)=time(1)+time(2)+0.001d0
      write(nconso,100) time(1),time(2),timer(TTOTAL)
100   format(' TIM: Seconds User:',f8.1,' System:',f8.1,' U+S:',f8.1)
      t = 0.0d0

      call print_t (nconso, 'Setup     ', TSETUP, timer, nitend)
      t = t + timer(TSETUP)

      if (timer(TTPAALG) .gt. 0.1d0) then
        call print_t (nconso, 'tpaalg    ', TTPAALG, timer, nitend)
        t = t + timer(TTPAALG)
      endif

      if (timer(TDAMDEN) .gt. 0.1d0) then
        call print_t (nconso, 'damden    ', TDAMDEN, timer, nitend)
        t = t + timer(TDAMDEN)
      endif

      call print_t (nconso, 'localF    ', TLFORC, timer, nitend)
      t = t + timer(TLFORC)

      call print_t (nconso, 'nsc energy', TNONSC, timer, nitend)
      t = t + timer(TNONSC)


      if (timer(TANALYS) .gt. 0.1d0) then
        call print_t (nconso, 'Analysis  ', TANALYS, timer, nitend)
        t = t + timer(TANALYS)
      endif

!     Time spent in libtos(), excepting the times in the above modules:
      timer(TLIBTOS) = timer(TTOTAL) - t
      call print_t (nconso, 'libtos    ', TLIBTOS, timer, nitend)

      write(nconso,*)'TIM:             ------  ---------------------'
      call print_t (nconso, 'Sum       ', TTOTAL, timer, nitend)
      write(nconso,*)'TIM:             ======  ====================='

      write(nconso,*)'TIM:'
      write(nconso,*)'TIM: Timing of some individual parts'

      call print_t (nconso, 'Chg_dens  ', TCHDENS, timer, nitend)

      call print_t (nconso, 'nonlocF   ', TNLFORC, timer, nitend)
      call print_t (nconso, '  -vkbloop', TUSFOR, timer, nitend)
      call print_t (nconso, '  -newdd  ', TNEWDD, timer, nitend)


      call print_t (nconso, 'Ewald      ', TEWALD,  timer, nitend)
      call print_t (nconso, 'cal_bec    ', TCALBEC, timer, nitend)
      call print_t (nconso, '-loop1     ', TCALBEC1, timer, nitend)
      call print_t (nconso, 'V_NL       ', THPSI,  timer, nitend)
      call print_t (nconso, 'V_NL1      ', THPSI1, timer, nitend)
      call print_t (nconso, 'V_NL2      ', THPSI2, timer, nitend)

      call print_t (nconso, 'xc1        ', TXC1, timer, nitend)
      call print_t (nconso, 'xc2        ', TXC2, timer, nitend)
      call print_t (nconso, 'tblochl1   ', TGABAS, timer, nitend)
      call print_t (nconso, 'tblochl2   ', TMODEL, timer, nitend)
      call print_t (nconso, 'tblochl3   ', TMADL, timer, nitend)
      call print_t (nconso, 'tblochl4   ', TPCOR, timer, nitend)
      call print_t (nconso, 'tblochl5   ', TFORCOR, timer, nitend)
      call print_t (nconso, 'Stress     ', TSTRESS, timer, nitend)
      call print_t (nconso, 'Orthogon  ', TORTHGN, timer, nitend)

!     Tpaalg timings
      if (timer(TTPAALG) .gt. 0.0d0) then
        write(nconso,*)'TIM:'
        write(nconso,*)'TIM: Timing of individual parts of tpaalg'
        call print_t (nconso, 'GillanRt  ', TGILLAN, timer, nitend)
      endif

!     Damden timings
      if (timer(TDAMDEN) .gt. 0.0d0) then
        write(nconso,*)'TIM:'
        write(nconso,*)'TIM: Timing of individual parts of damden'
!       Timing of damden itself, less the time spent in resmin
        timer(TDAMDEN) = timer(TDAMDEN) - timer(TRESMIN)
        call print_t (nconso, 'damden    ', TDAMDEN, timer, nitend)
!       individual parts
        call print_t(nconso, '-addusdens',TADDUSDENS,timer,nitend)
        call print_t(nconso, '-nonlocF  ',TNLFORC,   timer,nitend)
        call print_t(nconso, '-dipole   ',TDIPOLE   ,timer,nitend)
        call print_t(nconso, '-densmix  ',TDENSMIX  ,timer,nitend)
        call print_t(nconso, '-newd     ',TNEWD,timer,nitend)
        call print_t(nconso, '--ddot    ',TNEWDDT  , timer,nitend)
        call print_t(nconso, '-tidyup   ',TTIDYUP   ,timer,nitend)
        call print_t(nconso, '-init     ',TDAMINIT  ,timer,nitend)

        call print_t (nconso, 'H_diagonal', TRESMIN, timer, nitend)
        call print_t (nconso, '-subdia   ', TSUBDIA, timer, nitend)
        call print_t (nconso, '-apply_H  ', TAPPLYH, timer, nitend)
        call print_t (nconso, ' -vnlwav  ', TVNLWAV, timer, nitend)
        call print_t (nconso, ' -wf_FFT  ', TPOTXWF, timer, nitend)
        call print_t (nconso, '-Orthogon ', TORTH,   timer, nitend)
        call print_t (nconso, '-Updat_wf ', TUPDWF,  timer, nitend)
        call print_t (nconso, '-diag_HEEV', THEEV,   timer, nitend)
        call print_t (nconso, '-BLAS_ops ', TBLAS,   timer, nitend)
        call print_t (nconso, '-Restart  ', TRESTART,timer, nitend)
        call print_t (nconso, '-Eig. proj ',TEIGPROJ,timer, nitend)
        call print_t (nconso, '-Residual ', TRESI,   timer, nitend)
        write(nconso,200) nint(timer(NAPPLYH)),&
          timer(TAPPLYH)/nint(timer(NAPPLYH))
200     format(' TIM: Subroutine apply_H was called ', i5,&
          ' times (', f8.3, ' sec/call)')
      endif

!     timers for subproj
!     call disp_timers(timer,nconso)
      call disp_rmm_diis_timers(timer,nconso,nitend)

#ifdef PARAL
      call mstime (timer(TMESSPASS))
      call print_t (nconso, 'MessPass  ', TMESSPASS, timer, nitend)
#endif PARAL

      return
      end

      subroutine print_t (nconso, text, it, timer, nitend)

!     Print a single, nicely formatted timing
!     text ..... A short descriptive text, max 10 chars
!     it ....... The index into the timer array
!     timer .... The timer array
!     nitend ... The number of iterations

      implicit none
      character*(*) text
      real*8 timer(*)
      integer nconso, it, nitend

      if (timer(it) .gt. 0.0d0) write(nconso,100) text,&
        100.0d0 * timer(it) / timer(TTOTAL),&
        timer(it),&
        timer(it) / nitend
100   format(' TIM: ',a10,f7.1,'%',f11.1,' CPU seconds (',&
        f8.3, ' sec/iter)')

      return
      end


