#include "definitions.h"
#if  WF_PRECISION == 8
#define CZC_GEMV newczcgemv
#define CZC_GEMM newczcgemm
#endif WF_PRECISION

!#define USEZHEEVX 1
#ifdef PARAL
!     Name of parallel argument-list file
#define PARAL_ARGS "parallel_args.h"
!     Name of parallel argument-declaration file
#define PARAL_DECL "parallel_decl.h"
#endif
!
      subroutine eigsolveid (nconso)
      write(nconso,*) '@(#)eigsolve.F	1.13 7/1/99'
      return
      end

! #define DEBUG 1
!
!-------------------------------------------------------------------
!
      subroutine eigsolver(lgenev, n, m, lambda, psi, ldpsi,&
           nb, maxiter,ndiapb, tolerance,&
#include H_TIMES_B_ARGS
#ifdef PARAL_INCL
           ,&
#include PARAL_ARGS
#endif
           , nout, timer, info)
!
!     lgenev       TRUE if generalized eigenvalue problem (input)
!     n            dimension of problem (input)
!     m            number of eigenpair requested (input)
!     lambda       sorted array of eigenvalues (output)
!     psi          array of inital/final eigenvectors (input/output)
!     ldpsi        leading dimension of psi which mighe be > n (input)
!     nb           number of eigenpairs to iterate on simultaneously (input)
!     maxiter      max number of iterations of each eigenpair (input)
!     nmax         max dimension of subspace (input)
!     tolerance    tolerance for when to consider eigenpairs converged (input)
!     nout         device write output to (input)
!     timer        array of timers (input/output)
!     info         returns non zero if algorithm failed.
!
!     This routine is essentially a wrapper for the routine below
!     It calculates the size of constants such as array sizes.
!
      implicit none
!
!cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
!     Declarations
!cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
!
!     variables from function-header
!
      logical lgenev
      integer n,m
      real*8 lambda(m)
      integer ldpsi
      complex*C_PRECISION psi(ldpsi,m)
      integer nb, nmax, maxiter,ndiapb
      real*8 tolerance
      integer nout
      real*8 timer(*)
      integer info
#include H_TIMES_B_DECL
#ifdef PARAL_INCL
#include PARAL_DECL      
#endif
!
!     local variables
!
      integer ns, nbl, maxiterl, nmaxl
!
!
      if (lgenev) then
         ns=n
      else
         ns=1
      end if
!
      nbl=nb
      maxiterl=maxiter
      nmaxl   =m+ndiapb*m
#ifdef PARAL
!     hardwire the blocksize for a start
!     if (nbl.lt.par_pw_np) nbl=par_pw_np
      nbl=par_pw_np
#endif PARAL
      if ((nmaxl.le.0).or.(maxiterl.le.0)) nmaxl=m+8*nbl
      if (maxiterl.le.0) maxiterl=2
      if (nbl.le.128) nbl=128
      if (nbl.gt.m) nbl=m
      if (nmaxl.lt.m+nbl) nmaxl=m+nbl
      nmaxl = 2*m
!
      call timer_start(timer,TDAVIDSON)
!
!     Call the core solver
!
      call eigsolvecore(lgenev, n, m, lambda, psi, ldpsi,&
           nbl, maxiterl, nmaxl, tolerance,&
#include H_TIMES_B_ARGS
#ifdef PARAL_INCL
           ,&
#include PARAL_ARGS
#endif
           , nout, timer, info, ns)
!
      call timer_stop(timer,TDAVIDSON)
!
      return
      end
!
!-------------------------------------------------------------------
!
      subroutine eigsolvecore(lgenev, n, m, lambda, psi, ldpsi,&
           nb, maxiter, nmax, tolerance,&
#include H_TIMES_B_ARGS
#ifdef PARAL_INCL
           ,&
#include PARAL_ARGS
#endif
           , nout, timer, info, ns)
!
!     lgenev       TRUE if generalized eigenvalue problem (input)
!     n            dimension of problem (input)
!     m            number of eigenpair requested (input)
!     lambda       sorted array of eigenvalues (output)
!     psi          array of inital/final eigenvectors (input/output)
!     ldpsi        leading dimension of psi which mighe be > n (input)
!     nb           number of eigenpairs to iterate on simultaneously (input)
!     maxiter      max number of iterations of each eigenpair (input)
!     nmax         max dimension of subspace (input)
!     tolerance    tolerance for when to consider eigenpairs converged (input)
!     nout         device write output to (input)
!     timer        array of timers (input/output)
!     info         returns non zero if algorithm failed.
!     ns           n if lgenev is true 1 otherwise
!
#ifdef PARAL
      use par_functions_module
#endif PARAL
      implicit none
!
!cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
!     Declarations
!cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
!
!     variables from function-header
!
      logical lgenev
      integer n,m
      real*8 lambda(m)
      integer ldpsi
      complex*C_PRECISION psi(ldpsi,m)
      integer nb, maxiter, nmax
      real*8 tolerance
      integer nout
      real*8 timer(*)
      integer info
#include H_TIMES_B_DECL
#ifdef PARAL_INCL
#include PARAL_DECL      
#endif
!     Size declarations from wrapper function
      integer ns
!     BLAS/LAPACK include file
#ifdef INCLUDEBLAS
#     include "blas.h"
#else
      real*4 SCNRM2
      real*8 DZNRM2
      external DZNRM2, SCNRM2
#endif
!
!     variables in large subspace
!
      complex*C_PRECISION psi2(ldpsi,nmax-m)
      complex*C_PRECISION hpsi(ldpsi,nmax)
      complex*C_PRECISION spsi(ldpsi,nmax)
      complex*C_PRECISION r(ldpsi,nb)
      complex*C_PRECISION eigvec(ldpsi,nb)
      complex*C_PRECISION seigvec(ldpsi,nb)
!
!     Variables for parallel version
!
#ifdef PARAL
      complex*C_PRECISION par_work(n_global,4)
      real*8 dtemp,dtempa(1)
      integer irank,dummyndx(1)
      logical exists
#else
      integer n_global    ! equal to nplwkp
#endif
!
!     Arrays in the sub-space:
!     We MUST use double precision in order to ensure accurate answers.
      complex*16 hhat(nmax,nmax),shat(nmax,nmax)
!
!     Misc. local variables
!
      integer i,j,k,l
      complex*C_PRECISION ctemp
      real*R_PRECISION stemp, nrm
      complex*16 ztemp
      complex*16 v(nmax,m)
      integer niter(m)
      integer rndx(nb)
      logical restarted
!
!     Misc. constants
!
      complex*16 zone, zzero
      logical init
      data zone, zzero,init /(1.0d0,0.0d0), (0.0d0,0.0d0),.true./
      save init
!     
!cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
!     Initialization
!cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
!
#ifdef PARAL
!     Get the rank of this processor
      call par_rank_pw(irank,&
#include PARAL_ARGS
        ,nout   )
#else
      n_global = n 
#endif

      if (init) then 
      write(nout,*) &
           'EIGSLV: Eigsolve was called with the following parameter:'
      write(nout,*) 'EIGSLV:     lgenev:         ',lgenev
      write(nout,*) 'EIGSLV:     n:              ',n
      write(nout,*) 'EIGSLV:     m:              ',m
      write(nout,*) 'EIGSLV:     ldpsi:          ',ldpsi
      write(nout,*) 'EIGSLV:     blocksize:      ',nb
      write(nout,*) 'EIGSLV:     maxiter:        ',maxiter
      write(nout,*) 'EIGSLV:     max subsp. dim: ',nmax
      write(nout,*) 'EIGSLV:     tolerance:      ',tolerance
#ifdef PARAL
      write(nout,*) 'EIGSLV:     par_proc.:      ',par_process
      write(nout,*) 'EIGSLV:     par_pw_np:      ',par_pw_np
      write(nout,*) 'EIGSLV:     N_global:       ',N_global
#endif

      init = .false.
      endif
!
!     Initialize misc. variables
!
      restarted=.false.
      do i=1, m
         niter(i)=0
      end do
!
      call timer_start(timer,TBLAS)
      do i=1,m
#ifdef SERIAL
         stemp=1.d0/NORM2(n,psi(1,i),1)
#else
         dtempa(1) = NORM2(n,psi(1,i),1)**2
         call par_sum_double('N',dtempa,dtempa,1,1,1,&
#include PARAL_ARGS
              ,timer)
         dtemp = dtempa(1) 
         stemp=1.0/sqrt(dtemp)
#endif
         call BLAS_RSCAL(n,stemp,psi(1,i),1)
      end do                      
      call timer_stop(timer,TBLAS)
!
!     Generate initial S and H products
      call H_TIMES_B (psi, hpsi,spsi,n_global, m,1,.false.,&
#include      H_TIMES_B_ARGS
              , timer)

!     Form initial projection
      call timer_start(timer,TBLAS)
      call timer_start(timer,TSPROD)
!     Form BHB
!     NOTICE: symmetry might be exploited here.
      call CCZ_GEMM('C','N',m,m,n,zone,psi,ldpsi,&
                     hpsi,ldpsi,zzero,hhat,nmax)
!     Form BSB (allways as nonorthogonal basis is used)
!     NOTICE: symmetry might be exploited here.
      if (lgenev) then
         call CCZ_GEMM('C','N',m,m,n,zone,psi,ldpsi,&
              spsi,ldpsi,zzero,shat,nmax)
      else
         call CCZ_GEMM('C','N',m,m,n,zone,psi,ldpsi,&
              psi,ldpsi,zzero,shat,nmax)
      end if
      call timer_stop(timer,TSPROD)
      call timer_stop(timer,TBLAS)
#ifdef PARAL
       call par_sum_complex('N',hhat,hhat,nmax,m,m,&
#include PARAL_ARGS
             ,timer)
      call par_sum_complex('N',shat,shat,nmax,m,m,&
#include PARAL_ARGS
             ,timer)
#endif
!     Solve eigenvalue problem in projected space.
      call eigslv(m,hhat,nmax,shat,nmax,m,lambda,v,nmax,nout,timer,info)
      if (info.ne.0) then
         write (nout,*) 'EIGSLV: ERROR: eigslv returned info=', info
         return
      end if
!
      j=m
 100  l=1
      k=0
!     while l<=m and k<nb
 200  if (niter(l).lt.maxiter) then
!     calculate residual
         call timer_start(timer,TBLAS)
         call timer_start(timer,TRESI)
         if (.not.restarted) then
            call CZC_GEMV('N',n,j,zone,hpsi,ldpsi,&
                    v(1,l),1,zzero,r(1,k+1),1)
            ztemp=-lambda(l)
            if (lgenev) then
               call CZC_GEMV('N',n,j,ztemp,spsi,ldpsi,&
                    v(1,l),1,zone,r(1,k+1),1)
            else
               call CZC_GEMV('N',n,j,ztemp,psi,ldpsi,&
                    v(1,l),1,zone,r(1,k+1),1)
            end if
         else
            call BLAS_COPY (n, hpsi(1,l), 1, r(1,k+1), 1)
            ctemp=-lambda(l)
            if (lgenev) then
               call BLAS_AXPY (n, ctemp, spsi(1,l), 1, r(1,k+1),1)
            else
               call BLAS_AXPY (n, ctemp, psi(1,l), 1, r(1,k+1), 1)
            end if
         end if
         call timer_stop(timer,TRESI)
         nrm=NORM2(n,r(1,k+1),1)
         call timer_stop(timer,TBLAS)
#ifdef PARAL
         dtempa(1)=nrm*nrm
         call par_sum_double('N',dtempa,dtempa,1,1,1,&
#include PARAL_ARGS
              ,timer)
         dtemp = dtempa(1)
         nrm=sqrt(dtemp)
#endif
#ifdef DEBUG
         write(nout,*) 'EIGSLV: Eig no. ',l,', ',lambda(l),&
                    ' has residual ',nrm
#endif
         if (nrm.gt.tolerance) then
            k=k+1
            rndx(k)=l
            niter(l)=niter(l)+1
            call timer_start(timer,TBLAS)
!     As preconditioning algorithm needs eigenvectors etc. form those
            if (.not.restarted) then
               call CZC_GEMV('N',n,m,zone,psi,ldpsi,&
                    v(1,l),1,zzero,eigvec(1,k),1)
               if ((j-m)>0) &
                 call CZC_GEMV('N',n,j-m,zone,psi2,ldpsi,&
                    v(m+1,l),1,zone,eigvec(1,k),1)
               if (lgenev) then
                  call CZC_GEMV('N',n,j,ztemp,spsi,ldpsi,&
                       v(1,l),1,zzero,seigvec(1,k),1)
               end if
            else
               call BLAS_COPY (n, psi(1,l), 1, eigvec(1,k), 1)
               if (lgenev) then
                  call BLAS_COPY (n, spsi(1,l), 1, seigvec(1,k), 1)
               end if
            end if
            call timer_stop(timer,TBLAS)
         else
            niter(l)=maxiter
         end if
      end if
      l=l+1
      if ((l.le.m).and.(k.lt.nb)) goto 200
!     If all eigenpairs have been processed then exit.
      if (k.eq.0) goto 300
!
      do i=1,k
!        Precondition the residuals
         call precondition(lgenev,n_global,eigvec(1,i),ldpsi,&
              seigvec(1,i),ldpsi,lambda(rndx(i)),&
              psi2(1,j+i-m),ldpsi,r(1,i),ldpsi,&
#             include "apply_h_args.h"
              ,timer,nout,info)
         if (info.ne.0) then
            write (nout,*) &
                 'EIGSLV: ERROR: precondition returned info=', info
            return
         end if
      enddo   ! k

!     Form H and S products
      call H_TIMES_B (psi2(1,j+1-m), hpsi(1,j+1),&
                    spsi(1,j+1), n_global, k,1,.false.,&
#include               H_TIMES_B_ARGS
              , timer)

!     Update projection (only lower triangular part)
!
      call timer_start(timer,TBLAS)
      call CCZ_GEMM('C','N',k,m,n,zone,hpsi(1,j+1),&
           ldpsi,psi,ldpsi,zzero,hhat(j+1,1),nmax)
      call CCZ_GEMM('C','N',k,j+k-m,n,zone,hpsi(1,j+1),&
           ldpsi,psi2,ldpsi,zzero,hhat(j+1,m+1),nmax)
      if (lgenev) then
         call CCZ_GEMM('C','N',k,m,n,zone,spsi(1,j+1),&
              ldpsi,psi,ldpsi,zzero,shat(j+1,1),nmax)
         call CCZ_GEMM('C','N',k,j+k-m,n,zone,spsi(1,j+1),&
              ldpsi,psi2,ldpsi,zzero,shat(j+1,m+1),nmax)
      else
         call CCZ_GEMM('C','N',k,m,n,zone,psi2(1,j+1-m),&
              ldpsi,psi,ldpsi,zzero,shat(j+1,1),nmax)
         call CCZ_GEMM('C','N',k,j+k-m,n,zone,psi2(1,j+1-m),&
              ldpsi,psi2,ldpsi,zzero,shat(j+1,m+1),nmax)
      end if
      call timer_stop(timer,TBLAS)
#ifdef PARAL
      call par_sum_complex('N',hhat(j+1,1),hhat(j+1,1),nmax,k,j+k,&
#include PARAL_ARGS
           ,timer)
      call par_sum_complex('N',shat(j+1,1),shat(j+1,1),nmax,k,j+k,&
#include PARAL_ARGS
           ,timer)
#endif
!
!     Solve eigenvalue problem in projected space.
      call eigslv(j+k,hhat,nmax,shat,nmax,m,lambda,v,nmax,nout,timer,&
           info)
      if (info.ne.0) then
         write (nout,*) 'EIGSLV: ERROR: eigslv returned info=', info
         return
      end if
!
      j=j+k
!
!     Check for restarting
!
      if (j+nb.gt.nmax) then
!     Collapse the subspace
         call timer_start(timer,TBLAS)
         call timer_start(timer,TRESTART)
#ifdef DEBUG
         write(nout,*) 'EIGSLV: Restarting'
#endif
         call vecrotate(psi,ldpsi,n,m,m,v,nmax,timer)
         call CZC_GEMM('N','N',n,m,j-m,zone,psi2,ldpsi,v(m+1,1),nmax,&
              zone,psi,ldpsi)
         call vecrotate(hpsi,ldpsi,n,m,j,v,nmax,timer)
         call vecrotate(spsi,ldpsi,n,m,j,v,nmax,timer)
         call simtrans(j,m,hhat,nmax,v,nmax)
         call simtrans(j,m,shat,nmax,v,nmax)
         call timer_stop(timer,TRESTART)
         call timer_stop(timer,TBLAS)
         j=m
         restarted=.true.
      else
         restarted=.false.
      end if
      goto 100
 300  continue
#ifdef DEBUG
      write(nout,*) 'EIGSLV: Forming final eigenvectors'
#endif
      call uflush(nout)
      if (.not.restarted) then
         call timer_start(timer,TBLAS)
         call vecrotate(psi,ldpsi,n,m,m,v,nmax,timer)
         if ((j-m).gt.0) then 
         call CZC_GEMM('N','N',n,m,j-m,zone,psi2,ldpsi,v(m+1,1),nmax,&
              zone,psi,ldpsi)
         endif
         call timer_stop(timer,TBLAS)
      end if
        
      return
      end
!
!-------------------------------------------------------------------
!
      subroutine precondition(lgenev,n,psi,ldpsi,spsi,ldspsi,&
           lambda,x,ldx,residual,ldr,&
#       include "apply_h_args.h"
           ,timer,nout,info)
!
!     Solve approximately (H-S*lambda)*x = -r ("preconditioning")
!
      implicit none
!
!     lgenev    true if generalized eigenvalue problem (S=I otherwise)
!     n         length of eigenvectors
!     psi       eigenvectors corresponding to lambda
!     ldpsi     leading dimension if psi
!     spsi      S*psi
!     ldspsi    leading dimension if spsi
!     lambda    eigenvalues
!     x         preconditioned result
!     ldx       leading dimension if x
!     residual  residuals to precondition
!     ldr       leading dimension if residual
!     timer     array of timers
!     nout      device for output
!     info      return code, 0 upon success
      logical lgenev
      integer n,ldpsi,ldspsi,ldx,ldr
      complex*C_PRECISION psi(ldpsi)
      complex*C_PRECISION spsi(ldspsi)
      real*8 lambda
      complex*C_PRECISION x(ldx)
      complex*C_PRECISION residual(ldx)
      real*R_PRECISION llambda
#     include "apply_h_decl.h"
      real*8 timer(*)
      integer nout,info
!
      call timer_start(timer,TUPDWF)
!
!     Special function:
!     Calculate band kinetic energy of eigenvector for precon: (wkintmp)
      call pre_precon (n, psi,  lgenev,&
#       include "apply_h_args.h"
        , timer)               

!     "Precondition" the residual vector
!     initially get precision right
      llambda=lambda
      call precon (n, x, residual, llambda,&
#include      H_TIMES_B_ARGS
           , timer, info)
!
      if (info.ne.0) then
         write (nout,*) 'EIGSLV: ERROR: precon returned info=', info
         call timer_stop(timer,TUPDWF)
         return
      end if

      info=0
      call timer_stop(timer,TUPDWF)
      return
      end
!
!-------------------------------------------------------------------
!
      subroutine eigslv(n,a,lda,b,ldb,k,lambda,psi,ldpsi,nout,timer,&
           info)
!
!     Solves the symmetric eigenvalue problem A*psi=B*psi*diag(lambda)
!     where B is positive definite.
!     It is assumed that n is small.
!
      implicit none
!
!i    n         dimension of problem
!io   A         matrix, only lower part accessed/updated
!i    lda       leading dimension of A
!io   B         matrix, only lower part accessed/updated
!i    ldb       leading dimension of B
!i    k         number of eigenvalues/vectors for output
!o    lambda    eigenvalues
!o    psi       eigenvectors, normalized to norm 1
!i    ldpsi     leading dimension of psi
!i    nout      device for output
!io   timer     array of timers
!o    info      return 0 upon success
!
!     Argument declarations
      integer n,lda,ldb,k,ldpsi,nout,info
      complex*16 a(lda,n),b(ldb,n),psi(ldpsi,k)
      real*8 lambda(k)
!
!     Local variables
      integer i
      complex*16 v(n,n),w(n,n),w1(n*n),work(8*n)
      real*8 timer(*)
      real*8 rwork(7*n),lmb(n)
#ifdef USEZHEEVX
      integer iwork(5*n),ifail(n)
#endif
#ifdef ESSL
      integer l,j
      complex*16 z(n,n)
#endif
!
!     Misc. constants
!
      complex*16 zone
      data zone /(1.0d0,0.0d0)/

      call timer_start(timer,TEIGPROJ)
!
!     Copy a, b to w, v as LAPACK modifes matrices
      do i=1,n
         call ZCOPY(n,a(1,i),1,w(1,i),1)
         call ZCOPY(n,b(1,i),1,v(1,i),1)
      end do
!
!     Do cholesky of V
      call ZPOTRF('L',n,v,n,info)
      if (info.ne.0) then
         write (nout,*) &
              'EIGSLV: ERROR: ZPOTRF returned info=', info
         if (info>0) then 
!           attempt too correct for v not being positive definit, 
!           this need too be implemented more correct/robust.
            do i=1,n
               call ZCOPY(n,b(1,i),1,v(1,i),1)
               if (dreal(v(i,i))<0) v(i,i) = 0.0001d0
            enddo
            v(info,info) = v(info,info) + 0.001d0
            call ZPOTRF('L',n,v,n,info)
            if (info.ne.0) then
              write (nout,*) &
               'EIGSLV: ERROR: ZPOTRF(2) returned info=',info
              goto 500
            endif
         else
            goto 500
         endif
      endif
!     Tranforms eigenvalue problem: w=inv(L)*w*inv(L'), V=L*L'
      call ZHEGST(1,'L',n,w,n,v,n,info)
      if (info.ne.0) then
         write (nout,*) &
              'EIGSLV: ERROR: ZHEGST returned info=', info
         goto 500
      end if
!     Solve standard eigenvalue problem
      call timer_start(timer,THEEV)
#ifdef USEZHEEVX
      call ZHEEVX('V','I','L',n,w,n,0.d0,0.d0,1,k,0.d0,i,lmb,&
           psi,ldpsi,work,8*n,rwork,iwork,ifail,info)
      if (info.ne.0) then
         write (nout,*) &
              'EIGSLV: ERROR: ZHEEVX returned info=', info
         goto 500
      end if
!
      do i=1,k
         lambda(i)=lmb(i)
      end do
#else
#ifndef ESSL
      call ZHEEV('V','L',n,w,n,lmb,&
           work,8*n,rwork,info)
      if (info.ne.0) then
         write (nout,*) &
              'EIGSLV: ERROR: ZHEEV returned info=', info
         goto 500
      end if
!
      do i=1,k
         lambda(i)=lmb(i)
         call ZCOPY(n,w(1,i),1,psi(1,i),1)
      end do
#else
!     Pack coefficient matrix
#ifdef DEBUG
      write (nout,*) 'EIGSLV: Using zhpev (ESSL)'
#endif DEBUG
      l=0
      do j=1,n
         do i=j,n
            l=l+1
            w1(l)=w(i,j)
         end do
      end do
      call zhpev(1,w1,lmb,z,n,n,work,8*n)
!
      do i=1,k
         lambda(i)=lmb(i)
         call ZCOPY(n,z(1,i),1,psi(1,i),1)
      end do
#endif
#endif
      call timer_stop(timer,THEEV)
!     Recover eigenvectors of generalized eigenvalue problem
      call ZTRSM('L','L','C','N',n,k,zone,v,n,psi,ldpsi)
!
 500  call timer_stop(timer,TEIGPROJ)
      return
      end
!
!-------------------------------------------------------------------
!
      subroutine vecrotate (b, ldb, n, nsub, nold, u, ldu, timer)
!
!     Rotate vectors  b(1:n,1:nsub) = b(1:n,1:nold)*u(1:nold,1:nsub)
!     Use blocking in b, in order to employ a minimal working space,
!     which hopefully can reside in cache.
!     The working space is necessary because array b is overwritten
!     on output.
!
      implicit none
      integer ldb, n, nsub, nold, ldu 
!     Arrays in the "large" space:
      complex*C_PRECISION b(ldb,nold)
!     Arrays in the sub-space:
      complex*16 u(ldu,nsub)
      real*8 timer(*)
!
!     Local variables
      integer BLOCKSIZE, niter, nn1, nn, i, j
!     Make sure that work(BLOCKSIZE,nsub) fits inside cache:
      parameter (BLOCKSIZE = 128)
      complex*C_PRECISION work(BLOCKSIZE,nsub)
      complex*16 zone, zzero
      data zone, zzero /(1.0d0,0.0d0), (0.0d0,0.0d0)/
!
      call timer_start(timer,TVROT)
!
!     Number of times to iterate over a block
      niter = n / BLOCKSIZE
      if (niter * BLOCKSIZE .lt. n) niter = niter + 1

      nn1 = 1
      nn = BLOCKSIZE
      do i = 1, niter
        nn = min (BLOCKSIZE, n - nn1 + 1)
!       b(1:n,1:nsub) = b(1:n,1:nold)*u(1:nold,1:nsub)
        call CZC_GEMM ('N', 'N', nn, nsub, nold, zone, b(nn1,1), ldb,&
          u, ldu, zzero, work, BLOCKSIZE)
!       Copy back work(:,1:nsub) into b(:,1:nsub)
        do j = 1, nsub
          call BLAS_COPY (nn, work(1,j), 1, b(nn1,j), 1)
        end do
        nn1 = nn1 + BLOCKSIZE
      end do
!
      call timer_stop(timer,TVROT)
      return
      end
!
!-------------------------------------------------------------------
!
      subroutine simtrans(n,m,x,ldx,u,ldu)
!
!     Calculates x(1:m,1:m)=u(1:n,1:m)'*x(1:n,1:n)*u(1:n,1:m)
!     where n>m and x is hermitian with only lower triag part available
!
      implicit none
      integer n,m,ldx,ldu
      complex*16 x(ldx,n),u(ldu,m)
!
      complex*16 w(n,m)
!
      complex*16 zone, zzero
      data zone, zzero /(1.0d0,0.0d0), (0.0d0,0.0d0)/
!
      call zhemm('L','L',n,m,zone,x,ldx,u,ldu,zzero,w,n)
      call zgemm('C','N',m,m,n,zone,u,ldu,w,n,zzero,x,ldx)
      return
      end
!
!-------------------------------------------------------------------
!
      subroutine timer_start(timer,idx)
      implicit none
      real*8 timer(*)
      integer idx
#include "etime.h"
      call uttime(time)
      timer(idx) = timer(idx) - time(1)
      return
      end
!
!-------------------------------------------------------------------
!
      subroutine timer_stop(timer,idx)
      implicit none
      real*8 timer(*)
      integer idx
#include "etime.h"
      call uttime(time)
      timer(idx) = timer(idx) + time(1)
      return
      end
