!----- AGPL -------------------------------------------------------------------- ! ! Copyright (C) Stichting Deltares, 2015. ! ! This file is part of Delft3D (D-Flow Flexible Mesh component). ! ! Delft3D is free software: you can redistribute it and/or modify ! it under the terms of the GNU Affero General Public License as ! published by the Free Software Foundation version 3. ! ! Delft3D is distributed in the hope that it will be useful, ! but WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! GNU Affero General Public License for more details. ! ! You should have received a copy of the GNU Affero General Public License ! along with Delft3D. If not, see . ! ! contact: delft3d.support@deltares.nl ! Stichting Deltares ! P.O. Box 177 ! 2600 MH Delft, The Netherlands ! ! All indications and logos of, and references to, "Delft3D", ! "D-Flow Flexible Mesh" and "Deltares" are registered trademarks of Stichting ! Deltares, and remain the property of Stichting Deltares. All rights reserved. ! !------------------------------------------------------------------------------- ! $Id: solve_guus.F90 43424 2015-12-04 17:30:45Z kernkam $ ! $HeadURL: https://repos.deltares.nl/repos/ds/trunk/additional/unstruc/src/solve_guus.F90 $ #ifdef HAVE_CONFIG_H #include "config.h" #endif subroutine inireduce() use m_reduce use m_flowgeom use m_partitioninfo use m_flowparameters, only: icgsolver, ipre use m_alloc ! subroutine to intialise the following variables: ! noactive ! nbrstk implicit none integer m,n,ierr, nsiz integer idmn, L, k1, k2, k lintot = lnx ; nodtot = ndx allocate ( ij(nodtot) ,stat=ierr) call aerr('ij(nodtot)',ierr, nodtot) allocate ( nodbr2(nodtot) ,stat=ierr) ; nodbr2 = 0 call aerr('nodbr2(nodtot)',ierr,nodtot) allocate ( nbrstk(nodtot) ,stat=ierr) ; nbrstk = 0 call aerr('nbrstk(nodtot)',ierr,nodtot) allocate ( nodstk(nodtot) ,stat=ierr) ; nodstk = 0 call aerr('nodstk(nodtot)',ierr,nodtot) if (allocated (noel) ) deallocate(noel) allocate ( noel(nodtot) , stat=ierr) ; noel = 0 call aerr('noel(nodtot)', ierr,nodtot) if (allocated (noel0) ) deallocate(noel0) allocate ( noel0(nodtot) , stat=ierr) ; noel0 = 0 call aerr('noel0(nodtot)', ierr,nodtot) if (allocated( ia)) deallocate(ia) allocate ( ia(nodtot) ,stat=ierr) call aerr('ia(nodtot)',ierr, nodtot) if (allocated( row)) deallocate(row) allocate ( row(nodtot) ,stat=ierr) call aerr('row(nodtot)',ierr, nodtot) if (allocated( lv2) ) deallocate (lv2) allocate ( lv2(lintot) ,stat=ierr) ; lv2 = 0 call aerr('lv2(lintot)',ierr,lintot) if (allocated( jagauss)) deallocate(jagauss) allocate ( jagauss(nodtot), stat=ierr); jagauss = 1 call aerr('jagauss(nodtot)',ierr,nodtot) ! check solver applicability if ( jampi.eq.0 ) then if ( icgsolver.gt.4 .and. icgsolver.ne.6 .and. icgsolver.ne.44 .and. icgsolver.ne.8 ) then write(6,*) 'icgsolver=', icgsolver call qnerror('inireduce: inappropiate Krylov solver', ' ', ' ') icgsolver = 1 end if else if ( icgsolver.le.4 .or. icgsolver.eq.44 ) then write(6,*) 'icgsolver=', icgsolver call qnerror('inireduce: inappropiate Krylov solver', ' ', ' ') end if end if ! set preconditioner if ( icgsolver.eq.7 ) then ipre = 4 else if ( icgsolver.eq.88 ) then ! DEBUG icgsolver = 7 ! DEBUG ipre = 0 ! DEBUG else ipre = 0 end if if ( jampi.eq.1 ) then ! do not eliminate ghost- and sendnodes do idmn=0,ndomains-1 do m=1,nghostlist_s(ndomains-1) ndn=ighostlist_s(m) if ( ndn.gt.nodtot) then call qnerror('ighost: ndn > nodtot', ' ', ' ') end if jagauss(ndn) = 0 end do do m=1,numsend_s ndn=isendlist_s(m) if ( ndn.gt.nodtot) then call qnerror('isend: ndn > nodtot', ' ', ' ') end if jagauss(ndn) = 0 end do end do end if ! if (maxdge > 0) then ! ! do L = Lnxi+1, lnx ! k2 = ln(2,L) ! jagauss(k2) = 0 ! no gauss on open bnd ! enddo ! ! endif do k = 1,ndx if (nd(k)%lnx == 0) then ! isolated points, may be caused by thindams (dry points in d3d etc) jagauss(k) = 0 endif enddo ij%l=0 do nbr=1,lintot ! m=linev(nbr,1) m = nbr nodl=ln(1,nbr) nodr=ln(2,nbr) if (m.gt.0) call ijtrue(nodl,nodr) if (nodl.ne.0) nbrstk(nodl)=nodl if (nodr.ne.0) nbrstk(nodr)=nodr enddo noactive=0 do ndn=1,nodtot if (nbrstk(ndn)==ndn) then noactive=noactive+1 endif enddo end subroutine inireduce subroutine pointonstack use m_reduce use m_alloc ! ! a point to be eliminated is found and placed on the elimination stack ! implicit none integer i,j,n,m,ierr nogauss=nogauss+1 noel(nogauss)=ndn nbrstk(ndn)=0 row(ndn)%l=nodbr2(ndn) ! GD: memory leak? allocate ( row(ndn)%j(nodbr2(ndn)),row(ndn)%a(nodbr2(ndn)),stat=ierr) call aerr('row(ndn)%j(nodbr2(ndn)),row(ndn)%a(nodbr2(ndn))',ierr,nodbr2(ndn)) m=0 do n=1,ij(ndn)%l if (ij(ndn)%b(n)) then m=m+1 row(ndn)%j(m)=ij(ndn)%j(n) endif enddo do n=1,nodbr2(ndn) j=row(ndn)%j(n) call ijfalse(ndn,j) enddo do m=1,nodbr2(ndn) i=row(ndn)%j(m) do n=m+1,nodbr2(ndn) j=row(ndn)%j(n) call ijtrue(i,j) enddo enddo do n=1,nodbr2(ndn) j=row(ndn)%j(n) if (nodbr2(j) Computes minimum degree and fills stack with nodes of the minimum degree ! subroutine mindegree use m_reduce use m_partitioninfo implicit none integer n mindgr=nodtot+10 do n=1,nodtot if (nbrstk(n)/=0 .and. jagauss(n).eq.1 ) then ndn=nbrstk(n) if (nodbr2(ndn) make the matrix and packed matrix array administration !> ia(i)%l : number of non-zero lower-diagonal entries for row i (Matrix is assumed symmetrical) !> ia(i)%j : array with the non-zero lower-diagonal column indices !> ia(i)%a : for row i, array with pointers to the packed matrix array data for matrix elements !> !> A(i,j) = Apacked(idx) !> where !> j = ia(i)%j(k), !> idx = ia(i)%a(k), for 1<=k<=ia(i)%l !> !> lv2(L) : pointer to the lower-diagonal off-diagonal entry of link L in the packed matrix array subroutine cijadres use m_reduce use m_flowgeom use m_alloc implicit none integer j,n,iadres,jj,iad2, ierr integer, external :: ijadr iadres=0 do n=1,nodtot ia(n)%l=0 do jj=1,ij(n)%l j=ij(n)%j(jj) if (j0) then iad2=0 ! GD: memory leak ! if(allocated(ia(n)%j)) deallocate(ia(n)%j) ! if(allocated(ia(n)%a)) deallocate(ia(n)%a) allocate ( ia(n)%j(ia(n)%l),ia(n)%a(ia(n)%l),stat=ierr) call aerr('ia(n)%j(ia(n)%l),ia(n)%a(ia(n)%l)',ierr,2*ia(n)%l) do jj=1,ij(n)%l j=ij(n)%j(jj) if (j0) lv2(n)=ijadr(nodl,nodr) enddo ijstck=iadres end subroutine cijadres !> get the packed matrix array index for matrix element (i,j) !> !> A(i,j) = Apacked(ijadr(i,j)) integer function ijadr(i,j) use m_reduce implicit none integer i,j,k,m,n m=max(i,j) n=min(i,j) do k=1,ia(m)%l if (ia(m)%j(k)==n) then ijadr=ia(m)%a(k) return endif enddo ijadr=0 return end function ijadr subroutine inigauss ! ! This subroutine initialises the adresses of the fill in coefficients during Gauss elimination ! use m_reduce use m_alloc implicit none integer i,j,m,n,ijij,nn,ierr integer, external :: ijadr do n=1,nogauss ndn=noel(n) ijij=0 do m=1,nodbr2(ndn) i=row(ndn)%j(m) do nn=m+1,nodbr2(ndn) j=row(ndn)%j(nn) ijij=ijij+1 enddo enddo allocate(row(ndn)%f(ijij),stat=ierr) call aerr('row(ndn)%f(ijij)',ierr,ijij) ijij=0 do m=1,nodbr2(ndn) i=row(ndn)%j(m) row(ndn)%a(m)=ijadr(ndn,i) do nn=m+1,nodbr2(ndn) j=row(ndn)%j(nn) ijij=ijij+1 row(ndn)%f(ijij)=ijadr(i,j) enddo enddo enddo ! determine ccc array size npmax = 0 do n=1,nogauss ndn=noel(n) npmax=max(npmax,nodbr2(ndn)) end do return end subroutine inigauss subroutine inicg use m_reduce use m_alloc implicit none integer i,j,n,m, ierr integer, external :: ijadr nocg=0 do i=1,nodtot ndn=nbrstk(i) if (ndn>0) then allocate (row(ndn)%j(nodbr2(ndn)),row(ndn)%a(nodbr2(ndn)),stat=ierr) call aerr('inicg',ierr,2*nodbr2(ndn)) row(ndn)%l=nodbr2(ndn) nocg=nocg+1;noel(nogauss+nocg)=ndn; m=0 do n=1,ij(ndn)%l if (ij(ndn)%b(n)) then m=m+1 row(ndn)%j(m)=ij(ndn)%j(n) endif enddo do j=1,row(ndn)%l row(ndn)%a(j)=ijadr(ndn,row(ndn)%j(j)) enddo endif enddo end subroutine inicg subroutine solve_matrix(s1,ndx,itsol) use m_flowparameters use m_reduce use m_flowtimes use m_flowgeom, only: ln, xz, yz use m_partitioninfo, only: my_rank, ndomains use m_timer #ifdef HAVE_PETSC use m_petsc #endif implicit none integer :: ndx double precision :: s1(ndx) integer :: itsol integer :: ierror ! error (1) or not (0) character(len=11) :: fmt integer :: l,m1m2,i,jj,k,fout,irowstart,bla ! integer :: ipre !< Preconditioner, 0=rowscaling, 1=GS, 2=trial integer :: ierr integer, dimension(:), allocatable :: node_numbers integer :: noel_start ierror = 0 if ( jatimer.eq.1 ) call starttimer(ITOTALSOL) call klok(cpusol(1)) if ( jatimer.eq.1 ) call starttimer(IGAUSSEL) call gauss_elimination ( ) if ( jatimer.eq.1 ) call stoptimer(IGAUSSEL) if ( jatimer.eq.1 ) call starttimer(ICG) if (icgsolver == 1) then call conjugategradient_omp (s1,ndx,ipre) ! ipre = 0,1 ! faster, solution depends on thread sequence else if (icgsolver == 2) then call conjugategradient_omp_threadsafe (s1,ndx,ipre) ! ipre = 0,1 ! slower, solution always identical else if (icgsolver == 3) then call conjugategradient (s1,ndx,ipre) ! ipre = 0,1,2 ! no omp else if (icgsolver == 4 .or. icgsolver == 44) then if ( nocg.gt.10) then if (icgsolver == 4) then ! thread-safe call conjugategradientSAAD(ddr,s1,ndx,nocgiter,1,1,ierror) ! Saad, always using omp and ILUD preconditioner else call conjugategradientSAAD(ddr,s1,ndx,nocgiter,1,0,ierror) ! Saad, always using omp and ILUD preconditioner end if if ( ierror.eq.1 ) goto 1234 else ! do *not* use Saad solver when nocg < 3 call conjugategradient (s1,ndx,ipre) ! ipre = 0,1,2 ! no omp end if else if (icgsolver == 5) then ! call conjugategradientSAAD_global(s1,ndx,nocgiter) else if (icgsolver == 6 ) then #ifdef HAVE_PETSC call conjugategradientPETSC(s1,ndx,nocgiter,1,ipre) ! 1:always compute preconditioner #else call qnerror('No PETSC solver available', ' ', ' ') #endif else if ( icgsolver == 7 ) then call conjugategradient_MPI(s1,ndx,ipre,nocgiter,ierror) ! parallel cg, ipre=0,1: "global" preconditioning, ipre=3,4: block preconditioning if ( ierror.eq.1 ) goto 1234 else if ( icgsolver == 8 ) then #ifdef HAVE_PARMS nocgiter = 999 call conjugategradient_parms(s1,ndx,nocgiter) #else call qnerror('No pARMS solver available', ' ', ' ') #endif else if ( icgsolver.eq.9 .or. icgsolver.gt.90 ) then call testsolver(Ndx, s1, nocgiter, ierror) else call qnerror('no valid solver', ' ', ' ') endif ierror = 0 ! error handling 1234 continue if ( jatimer.eq.1 ) then call stoptimer(ICG) numcgits = numcgits + nocgiter end if if ( jatimer.eq.1 ) call starttimer(IGAUSSSU) call gauss_substitution(s1,ndx) if ( jatimer.eq.1 ) call stoptimer(IGAUSSSU) if ( jatimer.eq.1 ) call stoptimer(ITOTALSOL) call klok(cpusol(2)) ; cpusol(3) = cpusol(3) + cpusol(2) - cpusol(1) itsol = nocgiter if (ierror .ne. 0) then call writematrix_matlab() endif end subroutine solve_matrix subroutine conjugategradientSAAD(righthandside,s1,ndx,its,jaini,jadosafe,ierror) use m_reduce ! use unstruc_messages USE M_SAAD use m_flowgeom, only: kfs use MessageHandling implicit none integer :: ndx, ipre, its double precision :: s1(ndx) double precision, dimension(Ndx), intent(in) :: righthandside !< right-hand side, all cells integer, intent(in) :: jaini !< compute preconditioner and permutation (1) or not (0), or initialization only (-1), or ILU solve only (2) integer, intent(in) :: jadosafe !< thread-safe (1) or not (0), will set jasafe module variable integer, intent(out) :: ierror !< error (1) or not (0) integer :: j,jj,n,ntot, na, matr, nietnul double precision :: res character(len=100) :: message ! ddr (rechterlid), bbr (diag) , ccr (off diag), s1, row, row()%j, row()%a [AvD] ierror = 0 if (nocg<=0) return ! set jasafe module variable jasafe = jadosafe if ( jaini.eq.-1 .or. jaini.eq.1 ) then do n=nogauss+1,nogauss+nocg ndn = noel(n) ! guus index ntot=row(ndn)%l nietnul = 0 do j=1,ntot jj = row(ndn)%a(j) if (ccr(jj) .ne. 0d0) then i = row(ndn)%j(j) if ( kfs(i) == 1 ) then nietnul = nietnul + 1 endif endif enddo if (nietnul == 0 .and. jaini.eq.1 ) then ! for non-parallel solver only noel(n) = -noel(n) endif enddo ! make matrix L nn = 0 do n=nogauss+1,nogauss+nocg ndn = noel(n) ! guus index if (ndn > 0) then nn = nn + 1 ngs(ndn) = nn ! guus to saad endif enddo nn = 0 na = 0 ! saad matrix non zero counter iao(1) = 1 ! do n=nogauss+1,nogauss+nocg ndn = noel(n) ! guus index if (ndn > 0) then nn = nn + 1 ! n - nogauss ! saad index sol(nn) = s1 (ndn) ! rhs(nn) = ddr(ndn) ! input argument rhs(nn) = righthandside(ndn) na = na + 1 ! saad matrix non zero counter ao (na) = bbr(ndn) jao(na) = nn ! saad row ntot=row(ndn)%l do j=1,ntot jj = row(ndn)%a(j) if (ccr(jj) .ne. 0d0) then i = row(ndn)%j(j) if ( kfs(i).eq.1 ) then na = na + 1 ao (na) = ccr(jj) jao(na) = ngs(i) ! saad row else ! internal boundary: add to right-hand side rhs(nn) = rhs(nn) - ccr(jj)*s1(i) end if endif enddo iao(nn+1) = na + 1 ! saad column, nr of non zeros plus 1 endif enddo else ! always make rhs (eliminate ghostcells) do n=nogauss+1,nogauss+nocg nn = n - nogauss ! saad index ndn = noel(n) ! guus if (ndn > 0) then rhs(nn) = righthandside(ndn) ntot=row(ndn)%l do j=1,ntot i = row(ndn)%j(j) jj = row(ndn)%a(j) if (ccr(jj) .ne. 0d0) then if ( kfs(i).eq.1 ) then else ! internal boundary: add to right-hand side rhs(nn) = rhs(nn) - ccr(jj)*s1(i) end if endif enddo end if enddo na = iao(nn+1)-1 ! total number of non-zeros end if if ( nn.gt.0 ) then call cgsaad(its,na,nn,jaini,1,ierror,res) ! nocg ipv nn !write (6,*) res ! check error if ( ierror.ne.0 ) then ! all is not lost, check residual if ( res .le. 1d4*epscg ) then ! allow a larger error call mess(LEVEL_INFO, 'conjugategradientSAAD: non-fatal error') else call qnerror('conjugategradientSAAD: error', ' ', ' ') call mess(LEVEL_ERROR, 'conjugategradientSAAD: error') end if call newfil(matr, 'saadmatrix.m') nocg = nn write(matr,'(a)') 'data = [ ...' do i=1,nocg do j=iao(i),iao(i+1)-1 write(matr,'(2I10,E17.5)') i, jao(j), ao(j) end do end do write(matr,'(a)') '];' write(matr,'("A=sparse(data(:,1),data(:,2),data(:,3));")') write(matr,'(a)') 'b=[ ...' do i=1,nocg write(matr,'(E17.5)') rhs(i) end do write(matr,'(a)') '];' write(matr,'(a)') 'sol=[ ...' do i=1,nocg write(matr,'(E17.5)') sol(i) end do write(matr,'(a)') '];' call doclose(matr) else ! write(message,*) 'Solver converged in ', its,' iterations' ! call mess(LEVEL_INFO, message) end if else ! nothing to do end if if ( jaini.ne.-1 ) then nn = 0 do n = nogauss+1, nogauss+nocg ! nn = n - nogauss ! saad index ndn = noel(n) ! guus index if (ndn > 0) then nn = nn + 1 s1(ndn) = sol(nn) ! en hier zet men thee en over else ndn = -ndn noel(n) = ndn s1(ndn) = righthandside(ndn) / bbr(ndn) endif enddo end if end subroutine conjugategradientSAAD subroutine conjugategradient(s1,ndx,ipre) use m_reduce use unstruc_messages implicit none integer :: ndx, ipre double precision :: s1(ndx) integer :: i,j,jj,n,ntot, mout, japrint = 0 double precision :: rkzki,rkzki0,pkapki,alfak,betak double precision :: eps ! ddr (rechterlid), bbr (diag) , ccr (off diag), s1, row, row()%j, row()%a [AvD] if (nocg<=0) return nocgiter = 0 eps=0d0 rkzki=0d0 ! make matrix L if (ipre == 2) then ! make L do n=nogauss+1,nogauss+nocg ndn=noel(n) bbl(ndn)=bbr(ndn) ntot=row(ndn)%l do j=1,ntot i=row(ndn)%j(j) if (indn) then jj=row(ndn)%a(j) zkr(ndn)=zkr(ndn)-ccr(jj)*zkr(i) endif enddo if (ipre == 1) then zkr(ndn)=zkr(ndn)/bbr(ndn) ! Gs else zkr(ndn)=zkr(ndn)/bbl(ndn) ! Chol endif ! if (abs(zkr(ndn)).gt.eps) eps=abs(zkr(ndn)) enddo endif if (eps.lt.epscg) then return endif nocgiter=nocgiter+1 if (nocgiter.gt.1000) then call mess(LEVEL_ERROR, ' no convergence for CG method, nr of iterations, eps : ', nocgiter, eps ) endif rkzki0=rkzki rkzki=0d0 do n=nogauss+1,nogauss+nocg ndn=noel(n) rkzki=rkzki+rk(ndn)*zkr(ndn) enddo betak=rkzki/rkzki0 do n=nogauss+1,nogauss+nocg ndn=noel(n) pk(ndn)=zkr(ndn)+betak*pk(ndn) enddo goto 10 end subroutine conjugategradient subroutine conjugategradient_omp_threadsafe(s1,ndx,ipre) use m_reduce use unstruc_messages implicit none integer :: ndx double precision :: s1(ndx) integer i,ipre,j,jj,n double precision :: rkzki,rkzki0,pkapki,alfak,betak double precision :: eps integer :: ntot logical :: isnan character(len=100) :: message if (nocg<=0) return nocgiter = 0 eps=0d0 rkzki=0d0 !$xxOMP PARALLEL DO & !$xxOMP PRIVATE(n,ndn,j) & !$xxOMP REDUCTION(+:rkzki) & !$xxOMP REDUCTION(MAX:eps) do n=nogauss+1,nogauss+nocg ndn=noel(n) rk(ndn)=ddr(ndn)-bbr(ndn)*s1(ndn) do j=1,row(ndn)%l rk(ndn)=rk(ndn)-ccr(row(ndn)%a(j))*s1(row(ndn)%j(j)) enddo eps = max(eps,abs(rk(ndn)) ) ! if (abs(rk(ndn)).gt.eps) eps=abs(rk(ndn)) abs abs pk(ndn)=rk(ndn)/bbr(ndn) zkr(ndn)=pk(ndn) rkzki=rkzki+rk(ndn)*zkr(ndn) enddo !$xxOMP END PARALLEL DO if (eps.lt.epscg) then return endif 10 CONTINUE pkapki=0d0 !$xxOMP PARALLEL DO & !$xxOMP PRIVATE(n,ndn,j) & !$xxOMP REDUCTION(+:pkapki) do n=nogauss+1,nogauss+nocg ndn=noel(n) apk(ndn)=bbr(ndn)*pk(ndn) do j=1,row(ndn)%l apk(ndn)=apk(ndn)+ccr(row(ndn)%a(j))*pk(row(ndn)%j(j)) enddo pkapki=pkapki+pk(ndn)*apk(ndn) enddo !$xxOMP END PARALLEL DO alfak=rkzki/pkapki !$OMP PARALLEL DO & !$OMP PRIVATE(n,ndn) do n=nogauss+1,nogauss+nocg ndn=noel(n) s1(ndn)=s1(ndn)+alfak*pk(ndn) rk(ndn)=rk(ndn)-alfak*apk(ndn) enddo !$OMP END PARALLEL DO eps=0d0 if (ipre==0) then !$OMP PARALLEL DO & !$OMP PRIVATE(n,ndn) & !$OMP REDUCTION(MAX:eps) do n=nogauss+1,nogauss+nocg ndn=noel(n) zkr(ndn)=rk(ndn)/bbr(ndn) eps = max(eps,abs(zkr(ndn))) ! if (abs(zkr(ndn)).gt.eps) eps=abs(zkr(ndn)) enddo !$OMP END PARALLEL DO else do n=nogauss+1,nogauss+nocg ndn=noel(n) zkr(ndn)=rk(ndn) do j=1,row(ndn)%l ! ntot i=row(ndn)%j(j) if (indn) then jj=row(ndn)%a(j) zkr(ndn)=zkr(ndn)-ccr(jj)*zkr(i) endif enddo !if (isnan(bbr(ndn) ) ) then ! write(*,*) 'before ', ndn, bbr(ndn), zkr(ndn) !endif zkr(ndn)=zkr(ndn)/bbr(ndn) !if (isnan(zkr(ndn) ) ) then ! write(*,*) 'after ', ndn, bbr(ndn), zkr(ndn) !endif if (abs(zkr(ndn)).gt.eps) eps=abs(zkr(ndn)) enddo endif !if (eps.lt.epscg) then ! return !endif if (eps.lt.epscg) then ! write(message,*) 'Solver converged in ', nocgiter,' iterations' ! call mess(LEVEL_INFO, message) return endif nocgiter=nocgiter+1 if (nocgiter.gt.1000) then call mess(LEVEL_ERROR, ' no convergence for CG method, nr of iterations, eps : ', nocgiter, eps ) endif rkzki0=rkzki rkzki=0d0 !$xOMP PARALLEL DO & !$xOMP PRIVATE(n,ndn) & !$xOMP REDUCTION(+:rkzki) do n=nogauss+1,nogauss+nocg ndn=noel(n) rkzki=rkzki+rk(ndn)*zkr(ndn) enddo !$xOMP END PARALLEL DO ! todo omp onbegrijpelijk dat dit verschil geeft betak=rkzki/rkzki0 !$OMP PARALLEL DO & !$OMP PRIVATE(n,ndn) do n=nogauss+1,nogauss+nocg ndn=noel(n) pk(ndn)=zkr(ndn)+betak*pk(ndn) enddo !$OMP END PARALLEL DO goto 10 end subroutine conjugategradient_omp_threadsafe subroutine conjugategradient_omp(s1,ndx,ipre) use m_reduce use unstruc_messages implicit none integer :: ndx double precision :: s1(ndx) integer i,ipre,j,jj,n double precision :: rkzki,rkzki0,pkapki,alfak,betak double precision :: eps integer :: ntot logical :: isnan if (nocg<=0) return nocgiter = 0 eps=0d0 rkzki=0d0 !$OMP PARALLEL DO & !$OMP PRIVATE(n,ndn,j) & !$OMP REDUCTION(+:rkzki) & !$OMP REDUCTION(MAX:eps) do n=nogauss+1,nogauss+nocg ndn=noel(n) rk(ndn)=ddr(ndn)-bbr(ndn)*s1(ndn) do j=1,row(ndn)%l rk(ndn)=rk(ndn)-ccr(row(ndn)%a(j))*s1(row(ndn)%j(j)) enddo eps = max(eps,abs(rk(ndn)) ) ! if (abs(rk(ndn)).gt.eps) eps=abs(rk(ndn)) abs abs pk(ndn)=rk(ndn)/bbr(ndn) zkr(ndn)=pk(ndn) rkzki=rkzki+rk(ndn)*zkr(ndn) enddo !$OMP END PARALLEL DO if (eps.lt.epscg) then return endif 10 CONTINUE pkapki=0d0 !$OMP PARALLEL DO & !$OMP PRIVATE(n,ndn,j) & !$OMP REDUCTION(+:pkapki) do n=nogauss+1,nogauss+nocg ndn=noel(n) apk(ndn)=bbr(ndn)*pk(ndn) do j=1,row(ndn)%l apk(ndn)=apk(ndn)+ccr(row(ndn)%a(j))*pk(row(ndn)%j(j)) enddo pkapki=pkapki+pk(ndn)*apk(ndn) enddo !$OMP END PARALLEL DO alfak=rkzki/pkapki !$OMP PARALLEL DO & !$OMP PRIVATE(n,ndn) do n=nogauss+1,nogauss+nocg ndn=noel(n) s1(ndn)=s1(ndn)+alfak*pk(ndn) rk(ndn)=rk(ndn)-alfak*apk(ndn) enddo !$OMP END PARALLEL DO eps=0d0 if (ipre==0) then !$OMP PARALLEL DO & !$OMP PRIVATE(n,ndn) & !$OMP REDUCTION(MAX:eps) do n=nogauss+1,nogauss+nocg ndn=noel(n) zkr(ndn)=rk(ndn)/bbr(ndn) eps = max(eps,abs(zkr(ndn))) ! if (abs(zkr(ndn)).gt.eps) eps=abs(zkr(ndn)) enddo !$OMP END PARALLEL DO else do n=nogauss+1,nogauss+nocg ndn=noel(n) zkr(ndn)=rk(ndn) do j=1,row(ndn)%l ! ntot i=row(ndn)%j(j) if (indn) then jj=row(ndn)%a(j) zkr(ndn)=zkr(ndn)-ccr(jj)*zkr(i) endif enddo !if (isnan(bbr(ndn) ) ) then ! write(*,*) 'before ', ndn, bbr(ndn), zkr(ndn) !endif zkr(ndn)=zkr(ndn)/bbr(ndn) !if (isnan(zkr(ndn) ) ) then ! write(*,*) 'after ', ndn, bbr(ndn), zkr(ndn) !endif if (abs(zkr(ndn)).gt.eps) eps=abs(zkr(ndn)) enddo endif if (eps.lt.epscg) then return endif nocgiter=nocgiter+1 if (nocgiter.gt.1000) then call mess(LEVEL_ERROR, ' no convergence for CG method, nr of iterations, eps : ', nocgiter, eps ) endif rkzki0=rkzki rkzki=0d0 !$OMP PARALLEL DO & !$OMP PRIVATE(n,ndn) & !$OMP REDUCTION(+:rkzki) do n=nogauss+1,nogauss+nocg ndn=noel(n) rkzki=rkzki+rk(ndn)*zkr(ndn) enddo !$OMP END PARALLEL DO ! todo omp onbegrijpelijk dat dit verschil geeft betak=rkzki/rkzki0 !$OMP PARALLEL DO & !$OMP PRIVATE(n,ndn) do n=nogauss+1,nogauss+nocg ndn=noel(n) pk(ndn)=zkr(ndn)+betak*pk(ndn) enddo !$OMP END PARALLEL DO goto 10 end subroutine conjugategradient_omp subroutine gauss_substitution(s1,ndx) use m_reduce implicit none integer :: ndx double precision :: s1(ndx) integer m,n,np do n=nogauss,1,-1 ndn=noel(n) np=row(ndn)%l d0(ndn)=ddr(ndn) do m=1,np d0(ndn)=d0(ndn)-ccr(row(ndn)%a(m))*s1(row(ndn)%j(m)) enddo s1(ndn)=d0(ndn) enddo return end subroutine gauss_substitution subroutine gauss_elimination use m_reduce implicit none !double precision :: ccc(500) integer j,k,m,n,np,m1,nodm1,m1m2,nn ccc=0d0 do n=1,nogauss ndn=noel(n) nn=ndn np=row(ndn)%l ddr(ndn)=ddr(ndn)/bbr(ndn) do m=1,np ccc(m)=ccr(row(ndn)%a(m))/bbr(ndn) enddo k=0 do m=1,np m1=row(ndn)%j(m) nodm1=row(ndn)%a(m) bbr(m1)=bbr(m1)-ccc(m)*ccr(nodm1) ddr(m1)=ddr(m1)-ddr(ndn)*ccr(nodm1) do j=m+1,np k=k+1 m1m2=row(ndn)%f(k) ccr(m1m2)=ccr(m1m2)-ccr(nodm1)*ccc(j) enddo ccr(nodm1)=ccc(m) enddo enddo return end subroutine gauss_elimination subroutine pack_matrix() use m_reduce use m_flowgeom use m_flow use m_partitioninfo IMPLICIT NONE integer nn,m,l,m1,m2 call setkfs() if ( jampi.eq.1 ) then call partition_setkfs ! exclude all water-level ghostnodes from solution vector endif nowet=0 nocg=0 nogauss=0 noexpl=0 do nn=1,nogauss0 ndn=noel0(nn) if (kfs(ndn)==1) then ! .or.abs(qi(ndn))>1d-12) then nowet=nowet+1 noel(nowet)=ndn nogauss=nogauss+1 endif enddo do nn=nogauss0+1,nogauss0+nocg0 ndn=noel0(nn) if (kfs(ndn)==1) then ! .or.abs(qi(ndn))>1d-12) then nowet=nowet+1 noel(nowet)=ndn nocg=nocg+1 endif enddo if (ivariableteta==2) then do nn=1,nogauss0+nocg0 ndn=noel0(nn) if ( kfs(ndn)==2 ) then nowet=nowet+1 noel(nowet)=ndn noexpl=noexpl+1 endif enddo endif return end subroutine reducept(Ndx, Ndxi, Lnx) ! this subroutine finds an elimination order for Gaussian elimination based upon minimum degree algorithm use m_reduce use unstruc_messages use m_flowparameters, only : icgsolver, ipre use m_partitioninfo implicit none integer :: Ndx, Ndxi, Lnx integer :: nn ! integers used for counting integer :: minold ! minimum degree values integer :: ierror ! ARS 11272, check on the maximum number of nodes and do not allow maxdge larger than that nodtot = ndx lintot = lnx if (nodtot .le. maxdge) maxdge = nodtot ! call readyy('ini Gauss/CG', 0d0) call inireduce call readyy('ini Gauss/CG', .35d0) call mindegree call readyy('ini Gauss/CG', .40d0) nogauss=0 nocg=0 nn=0 do if (mindgr>maxdge) exit nn=nn+1 ndn=nn if ( jagauss(nn).eq.1 ) then if (nbrstk(ndn)/=0.and.nodbr2(ndn)==mindgr) then minold=mindgr call pointonstack if (minold>mindgr) nn=0 endif else ! write (*,*) endif if (nn==nodtot) then ! if (nn==noactive) then call mindegree nn=0 endif enddo call readyy('ini Gauss/CG', .75d0) call cijadres call readyy('ini Gauss/CG', .80d0) call inigauss call readyy('ini Gauss/CG', .90d0) if (nogauss 0 ) then ! if (l>0) then ! s1(nn) = s1(nn) + ff*(1-teta(il))*ru(il)*au(il) ! or q(il) when called after subroutine u1q1 ! else ! s1(nn) = s1(nn) - ff*(1-teta(il))*ru(il)*au(il) ! endif ! endif ! enddo else endif enddo end subroutine explicit subroutine conjugategradient_MPI(s1,ndx,ipre,nocgiter_loc,ierror) use m_reduce ! use unstruc_messages ! BEGIN MPI #ifdef HAVE_MPI use mpi #endif use m_partitioninfo ! END MPI use m_timer use MessageHandling implicit none integer :: ndx, ipre double precision :: s1(ndx) integer, intent(out) :: nocgiter_loc integer, intent(out) :: ierror !< error (1) or not (0) character(len=100) :: message #ifdef HAVE_MPI integer :: irank, nopreconditioner integer :: i,j,jj,n,ntot, mout, japrint = 0 double precision :: rkzki,rkzki0,pkapki,alfak,betak double precision :: eps ! BEGIN MPI double precision :: eps_tmp, rkzki_tmp, pkapki_tmp ! END MPI ierror = 0 #endif nocgiter_loc = 0 ! ddr (rechterlid), bbr (diag) , ccr (off diag), s1, row, row()%j, row()%a [AvD] #ifdef HAVE_MPI eps=0d0 rkzki=0d0 if (nocg > 0) then if ( ipre.eq.2 ) then write(6,*) "ipre=2 not supported" stop else if ( ipre.eq.3 .or. ipre.eq.4 ) then ! Saad preconditioner ! compute Saad matrix, preconditioner and permutation only call conjugategradientSAAD(ddr,s1,ndx,nopreconditioner,-1,1,ierror) ! ddr and s1 not used if ( ierror.ne.0 ) goto 1234 end if else rk = 0d0 endif ! BEGIN MPI if ( jatimer.eq.1 ) call starttimer(IMPICOMM) !call update_ghost(s1, ierror) call update_ghosts(ITYPE_S, 1, nodtot, s1, ierror) if ( jatimer.eq.1 ) call stoptimer(IMPICOMM) if ( ierror.ne.0 ) goto 1234 ! END MPI do n=nogauss+1,nogauss+nocg ndn=noel(n) rk(ndn)=ddr(ndn)-bbr(ndn)*s1(ndn) ntot=row(ndn)%l do j=1,ntot i=row(ndn)%j(j) jj=row(ndn)%a(j) rk(ndn)=rk(ndn)-ccr(jj)*s1(i) enddo if (abs(rk(ndn)).gt.eps) eps=abs(rk(ndn)) ! pk(ndn)=rk(ndn)/bbr(ndn) ! zkr(ndn)=pk(ndn) ! rkzki=rkzki+rk(ndn)*zkr(ndn) enddo ! preconditioning if ( ipre.eq.3 .or. ipre.eq.4 ) then ! Saad preconditioner ! BEGIN MPI if ( jatimer.eq.1 ) call starttimer(IMPICOMM) !call update_ghost(rk,ierror) call update_ghosts(ITYPE_S, 1, nodtot, rk, ierror) if ( jatimer.eq.1 ) call stoptimer(IMPICOMM) if ( ierror.ne.0 ) goto 1234 ! END MPI if (nocg > 0) then if ( ipre.eq.3 ) then call conjugategradientSAAD(rk,zkr,ndx,nopreconditioner,0,1,ierror) ! do not (re)compute preconditioner and permutation if ( ierror.ne.0 ) goto 1234 else if ( ipre.eq.4 ) then call conjugategradientSAAD(rk,zkr,ndx,nopreconditioner,2,1,ierror) ! do not (re)compute preconditioner and permutation if ( ierror.ne.0 ) goto 1234 end if else zkr = 0d0 endif else do n=nogauss+1,nogauss+nocg ndn=noel(n) zkr(ndn)=rk(ndn)/bbr(ndn) end do end if do n=nogauss+1,nogauss+nocg ndn=noel(n) pk(ndn) = zkr(ndn) rkzki = rkzki+rk(ndn)*zkr(ndn) end do ! BEGIN MPI if ( jatimer.eq.1 ) call starttimer(IMPICOMM) call mpi_allreduce(rkzki,rkzki_tmp,1,mpi_double_precision,mpi_sum,DFM_COMM_DFMWORLD,ierror) if ( jatimer.eq.1 ) call stoptimer(IMPICOMM) if ( ierror.ne.0 ) goto 1234 rkzki=rkzki_tmp if ( jatimer.eq.1 ) call starttimer(IMPICOMM) call mpi_allreduce(eps,eps_tmp,1,mpi_double_precision,mpi_max,DFM_COMM_DFMWORLD,ierror) if ( jatimer.eq.1 ) call stoptimer(IMPICOMM) if (ierror.ne.0 ) goto 1234 eps = eps_tmp ! END MPI if (eps.lt.epscg ) then if ( my_rank.eq.0 ) then write(message,*) 'Solver converged in ', nocgiter_loc,' iterations' call mess(LEVEL_INFO, message) end if return endif 10 CONTINUE ! BEGIN MPI if ( jatimer.eq.1 ) call starttimer(IMPICOMM) !call update_ghost(pk(1:nodtot),ierror) call update_ghosts(ITYPE_S, 1, nodtot, pk(1:nodtot), ierror) if ( jatimer.eq.1 ) call stoptimer(IMPICOMM) if ( ierror.ne.0 ) goto 1234 ! END MPI ! compute A pk and pk' A Pk pkapki=0d0 do n=nogauss+1,nogauss+nocg ndn=noel(n) apk(ndn)=bbr(ndn)*pk(ndn) ntot=row(ndn)%l do j=1,ntot i=row(ndn)%j(j) jj=row(ndn)%a(j) apk(ndn)=apk(ndn)+ccr(jj)*pk(i) enddo pkapki=pkapki+pk(ndn)*apk(ndn) enddo ! BEGIN MPI if ( jatimer.eq.1 ) call starttimer(IMPICOMM) call mpi_allreduce(pkapki,pkapki_tmp,1,mpi_double_precision,mpi_sum,DFM_COMM_DFMWORLD,ierror) if ( jatimer.eq.1 ) call stoptimer(IMPICOMM) if ( ierror.ne.0 ) goto 1234 pkapki=pkapki_tmp ! END MPI alfak=rkzki/pkapki eps=0d0 do n=nogauss+1,nogauss+nocg ndn=noel(n) s1(ndn)=s1(ndn)+alfak*pk(ndn) rk(ndn)=rk(ndn)-alfak*apk(ndn) if (abs(rk(ndn)).gt.eps) eps=abs(rk(ndn)) enddo ! BEGIN MPI if ( jatimer.eq.1 ) call starttimer(IMPICOMM) call mpi_allreduce(eps,eps_tmp,1,mpi_double_precision,mpi_max,DFM_COMM_DFMWORLD,ierror) if ( jatimer.eq.1 ) call stoptimer(IMPICOMM) if (ierror.ne.0 ) goto 1234 eps = eps_tmp ! END MPI if (ipre==0) then ! row scaling do n=nogauss+1,nogauss+nocg ndn=noel(n) zkr(ndn)=rk(ndn)/bbr(ndn) ! if (abs(zkr(ndn)).gt.eps) eps=abs(zkr(ndn)) enddo else if ( ipre.eq.1 .or. ipre.eq.2 ) then ! GS, incomplete Choleski ! BEGIN MPI if ( jatimer.eq.1 ) call starttimer(IMPICOMM) !call update_ghost(rk,ierror) call update_ghosts(ITYPE_S, 1, nodtot, rk, ierror) if ( jatimer.eq.1 ) call stoptimer(IMPICOMM) if ( ierror.ne.0 ) goto 1234 zkr = rk ! END MPI do n=nogauss+1,nogauss+nocg ndn=noel(n) zkr(ndn)=rk(ndn) ntot=row(ndn)%l do j=1,ntot i=row(ndn)%j(j) if (indn) then jj=row(ndn)%a(j) zkr(ndn)=zkr(ndn)-ccr(jj)*zkr(i) endif enddo if (ipre == 1) then zkr(ndn)=zkr(ndn)/bbr(ndn) ! Gs else zkr(ndn)=zkr(ndn)/bbl(ndn) ! Chol endif ! if (abs(zkr(ndn)).gt.eps) eps=abs(zkr(ndn)) enddo else if ( ipre.eq.3 .or. ipre.eq.4 ) then ! Saad preconditioning ! BEGIN MPI if ( jatimer.eq.1 ) call starttimer(IMPICOMM) !call update_ghost(rk,ierror) call update_ghosts(ITYPE_S, 1, nodtot, rk, ierror) if ( jatimer.eq.1 ) call stoptimer(IMPICOMM) if ( ierror.ne.0 ) goto 1234 ! END MPI if (nocg>0) then if ( ipre.eq.3 ) then call conjugategradientSAAD(rk,zkr,ndx,nopreconditioner,0,1,ierror) ! do not (re)compute preconditioner and permutation if ( ierror.ne.0 ) goto 1234 else if ( ipre.eq.4 ) then call conjugategradientSAAD(rk,zkr,ndx,nopreconditioner,2,1,ierror) ! do not (re)compute preconditioner and permutation if ( ierror.ne.0 ) goto 1234 end if else zkr = 0d0 end if endif if (eps.lt.epscg) then if ( my_rank.eq.0 ) then write(message,*) 'Solver converged in ', nocgiter_loc,' iterations' call mess(LEVEL_INFO, message) end if return endif nocgiter_loc=nocgiter_loc+1 ! MPI ! if ( my_rank.eq.0 .and. 100*(nocgiter_loc/10).eq.nocgiter_loc ) write(6,*) nocgiter_loc, eps ! if ( my_rank.eq.0 ) write(6,*) nocgiter_loc, eps, alfak ! END MPI if (nocgiter_loc.gt.1000) then call mess(LEVEL_ERROR, ' no convergence for CG method, nr of iterations, eps : ', nocgiter_loc, eps ) endif rkzki0=rkzki rkzki=0d0 do n=nogauss+1,nogauss+nocg ndn=noel(n) rkzki=rkzki+rk(ndn)*zkr(ndn) enddo ! BEGIN MPI if ( jatimer.eq.1 ) call starttimer(IMPICOMM) call mpi_allreduce(rkzki,rkzki_tmp,1,mpi_double_precision,mpi_sum,DFM_COMM_DFMWORLD,ierror) if ( jatimer.eq.1 ) call stoptimer(IMPICOMM) if ( ierror.ne.0 ) goto 1234 rkzki=rkzki_tmp ! END MPI betak=rkzki/rkzki0 do n=nogauss+1,nogauss+nocg ndn=noel(n) pk(ndn)=zkr(ndn)+betak*pk(ndn) enddo goto 10 #endif ierror = 0 ! error handling 1234 continue if ( ierror.ne.0 ) call mess(LEVEL_ERROR, 'conjugategradient_MPI error') end subroutine conjugategradient_MPI subroutine writematrix_matlab() use M_reduce implicit none integer :: mout, n, i, jj, j, ntot CALL NEWFIL(MOUT, 'MATRIX.m') write(mout,*) 'N = ', maxval(noel(nogauss+1:nogauss+nocg) ) write(mout,'(A)') 'bbl=[' do n=nogauss+1,nogauss+nocg ndn=noel(n) ntot=row(ndn)%l write(mout,*) ndn, ndn, bbl(ndn) do j=1,ntot i=row(ndn)%j(j) jj=row(ndn)%a(j) if (i 0) then rhs(nn) = ddr(ndn) ntot=row(ndn)%l do j=1,ntot i = row(ndn)%j(j) jj = row(ndn)%a(j) if (ccr(jj) .ne. 0d0) then if ( kfs(i).eq.1 ) then else ! internal boundary: add to right-hand side rhs(nn) = rhs(nn) - ccr(jj)*s1(i) end if endif enddo end if enddo nn = nocg ! number of rows na = iao(nn+1)-1 ! total number of non-zeros ! solve system, ! do not reinitialize Saad solver and do not compute preconditioner again call cgsaad(its,na,nn,0,0,ierror,res) ! copy solution vector to s1 nn = 0 do n = nogauss+1, nogauss+nocg ndn = noel(n) ! guus index if (ndn > 0) then nn = nn + 1 s1(ndn) = sol(nn) ! en hier zet men thee en over else ndn = -ndn noel(n) = ndn s1(ndn) = ddr(ndn) / bbr(ndn) endif enddo ! store s1 in ghost cells before update do i=1,nghostlist_s(ndomains-1) s1_ghost(i) = s1(ighostlist_s(i)) end do ! update ghost cells if ( jatimer.eq.1 ) call starttimer(IMPICOMM) call update_ghosts(ITYPE_S, 1, Ndx, s1, ierror) if ( jatimer.eq.1 ) call stoptimer(IMPICOMM) maxdiff = 0d0 ! compute maximum difference do i=1,nghostlist_s(ndomains-1) maxdiff = max(maxdiff, abs(s1_ghost(i)-s1(ighostlist_s(i)))) end do !! compute maximum redisual ! maxresloc = 0d0 ! do n=nogauss+1,nogauss+nocg ! ndn = noel(n) ! guus ! if (ndn > 0) then ! resloc = ddr(ndn) - bbr(ndn)*s1(ndn) ! ! ntot=row(ndn)%l ! do j=1,ntot ! i = row(ndn)%j(j) ! jj = row(ndn)%a(j) ! resloc = resloc - ccr(jj)*s1(i) ! enddo ! ! maxresloc = max(maxresloc,abs(resloc)) ! end if ! end do ! ! call reduce_double3_max(maxdiff,res,maxresloc) dum = dble(its) call reduce_double3_max(maxdiff,res,dum) its = int(dum) if ( my_rank.eq.0 .and. javerbose.eq.1 ) then write(6,*) 'iter=', iter, 'maxdiff=', maxdiff, 'its=', its, 'res=', res write(iout,"(I8, E15.5, I8, E15.5, E15.5)") iter, maxdiff, its, res end if nocgiter = nocgiter+its if ( maxdiff.lt.epsdiff .and. res.lt.epscg ) then exit end if if ( iter.eq.1 ) then fpar(2) = max(epscg,0.1d0*maxdiff) else fpar(2) = min(fpar(2),max(epscg,0.1d0*maxdiff)) end if ipar(6) = maxmatvecs end do if ( my_rank.eq.0 ) then write(message,"('Solver converged in ', I0, ' (', I0, ') iterations')" ) nocgiter, iter call mess(LEVEL_INFO, message) end if return end subroutine testsolver