!||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| module POP_DistributionMod !BOP ! !MODULE: POP_DistributionMod ! ! !DESCRIPTION: ! This module provides data types and routines for distributing ! blocks across processors. ! ! !REVISION HISTORY: ! SVN:$Id: POP_DistributionMod.F90 67 2007-03-21 14:39:24Z pwjones $ ! 2006-09-06: Phil Jones ! new naming conventions, more general functionality ! and some new query capabilities ! !USES: use POP_KindsMod use POP_ErrorMod use POP_CommMod use POP_BlocksMod use POP_SpaceCurveMod implicit none private save ! !PUBLIC TYPES: type, public :: POP_distrb ! distribution data type private integer (POP_i4) :: & numProcs ,&! number of processors in this dist communicator ,&! communicator to use in this dist numLocalBlocks ! number of blocks distributed to this ! local processor integer (POP_i4), dimension(:), pointer :: & blockLocation ,&! processor location for all blocks blockLocalID ,&! local block id for all blocks blockGlobalID ! global block id for each local block end type ! !PUBLIC MEMBER FUNCTIONS: public :: POP_DistributionCreate, & POP_DistributionDestroy, & POP_DistributionGet, & POP_DistributionGetBlockLoc, & POP_DistributionGetPointLoc, & POP_DistributionGetBlockID ! !DEFINED PARAMETERS: ! supported methods for block distribution integer (POP_i4), parameter, public :: & POP_distribMethodNone = 0, & POP_distribMethodCartesian = 1, & POP_distribMethodRake = 2, & POP_distribMethodSpacecurve= 3 !EOP !BOC !EOC !*********************************************************************** contains !*********************************************************************** !BOP ! !IROUTINE: POP_DistributionCreate ! !INTERFACE: function POP_DistributionCreate(distrbMethod, numProcs, workPerBlock, & errorCode) result(newDistrb) ! !DESCRIPTION: ! This routine creates a new data structure that describes the ! distribution of blocks across processors. Based on the input ! choice of method for distribution, an appropriate creation ! routine is called. Currently only two distributions are supported: ! 2-d Cartesian distribution (cartesian) and a load-balanced ! distribution using a rake algorithm based on the amount of ! work per block. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: integer (POP_i4), intent(in) :: & numProcs, &! number of processors in this distribution distrbMethod ! method for distributing blocks integer (POP_i4), dimension(:), intent(in) :: & workPerBlock ! estimated amount of work per block ! !OUTPUT PARAMETERS: integer (POP_i4), intent(out) :: & errorCode ! returned error code type (POP_distrb) :: & newDistrb ! resulting structure describing ! distribution of blocks !EOP !BOC integer (POP_i4) :: maxWork integer (POP_i4),allocatable :: work_per_block(:) !---------------------------------------------------------------------- ! ! select the appropriate distribution type ! !---------------------------------------------------------------------- errorCode = POP_Success select case (distrbMethod) case(POP_distribMethodCartesian) !------------------------------------------------ ! The following comments and code were contributed ! by John Dennis, CISL ! ! these particular partitioning algorithms do not ! handle land block elimination anyway ! KLUDGE: probably should do something better here !------------------------------------------------ allocate(work_per_block(size(workPerBlock))) maxWork = MAXVAL(workPerBlock) work_per_block = maxWork newDistrb = POP_DistributionCreateCartesian(numProcs, & work_per_block, errorCode) deallocate(work_per_block) if (errorCode /= POP_Success) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreate: error in cartesian create') return endif case(POP_distribMethodRake) !------------------------------------------------ ! The following comments and code were contributed ! by John Dennis, CISL ! ! these particular partitioning algorithms do not ! handle land block elimination anyway ! KLUDGE: probably should do something better here !------------------------------------------------ allocate(work_per_block(size(workPerBlock))) maxWork = MAXVAL(workPerBlock) work_per_block = maxWork newDistrb = POP_DistributionCreateRake(numProcs, & work_per_block, errorCode) deallocate(work_per_block) if (errorCode /= POP_Success) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreate: error in rake create') return endif case(POP_distribMethodSpacecurve) newDistrb = POP_DistributionCreateSpacecurv(numProcs, & workPerBlock, errorCode) if (errorCode /= POP_Success) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreate: error in Spacecurve create') return endif case default call POP_ErrorSet(errorCode, & 'POP_DistributionCreate: unknown distribution method') return end select !----------------------------------------------------------------------- !EOC end function POP_DistributionCreate !*********************************************************************** !BOP ! !IROUTINE: POP_DistributionDestroy ! !INTERFACE: subroutine POP_DistributionDestroy(distribution, errorCode) ! !DESCRIPTION: ! This routine destroys a defined distribution by deallocating ! all memory associated with the distribution. ! ! !REVISION HISTORY: ! same as module ! !INPUT/OUTPUT PARAMETERS: type (POP_distrb), intent(inout) :: & distribution ! distribution to destroy ! !OUTPUT PARAMETERS: integer (POP_i4), intent(out) :: & errorCode ! returned error code !EOP !BOC !---------------------------------------------------------------------- ! ! local variables ! !---------------------------------------------------------------------- integer (POP_i4) :: istat ! status flag for deallocate !---------------------------------------------------------------------- ! ! reset scalars ! !---------------------------------------------------------------------- errorCode = POP_Success distribution%numProcs = 0 distribution%communicator = 0 distribution%numLocalBlocks = 0 !---------------------------------------------------------------------- ! ! deallocate arrays ! !---------------------------------------------------------------------- if ( associated(distribution%blockLocation) ) then deallocate(distribution%blockLocation, stat=istat) if (istat > 0 ) then call POP_ErrorSet(errorCode, & 'POP_DistributionDestroy: error deallocating blockLocation') return endif endif if ( associated(distribution%blockLocalID) ) then deallocate(distribution%blockLocalID , stat=istat) if (istat > 0 ) then call POP_ErrorSet(errorCode, & 'POP_DistributionDestroy: error deallocating blockLocalID') return endif endif if ( associated(distribution%blockGlobalID) ) then deallocate(distribution%blockGlobalID, stat=istat) if (istat > 0 ) then call POP_ErrorSet(errorCode, & 'POP_DistributionDestroy: error deallocating blockGlobalID') return endif endif !----------------------------------------------------------------------- !EOC end subroutine POP_DistributionDestroy !*********************************************************************** !BOP ! !IROUTINE: POP_DistributionGet ! !INTERFACE: subroutine POP_DistributionGet(distribution, errorCode, & numProcs, communicator, numLocalBlocks, & blockLocation, blockLocalID, blockGlobalID) ! !DESCRIPTION: ! This routine extracts information from a distribution. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: type (POP_distrb), intent(in) :: & distribution ! input distribution for which information ! is requested ! !OUTPUT PARAMETERS: integer (POP_i4), intent(out) :: & errorCode ! returned error code integer (POP_i4), intent(out), optional :: & numProcs ,&! number of processors in this dist communicator ,&! communicator to use in this dist numLocalBlocks ! number of blocks distributed to this ! local processor integer (POP_i4), dimension(:), pointer, optional :: & blockLocation ,&! processor location for all blocks blockLocalID ,&! local block id for all blocks blockGlobalID ! global block id for each local block !EOP !BOC !----------------------------------------------------------------------- ! ! depending on which optional arguments are present, extract the ! requested info ! !----------------------------------------------------------------------- errorCode = POP_Success if (present(numProcs)) numProcs = distribution%numProcs if (present(communicator)) communicator = distribution%communicator if (present(numLocalBlocks)) numLocalBlocks = distribution%numLocalBlocks if (present(blockLocation)) then if (associated(distribution%blockLocation)) then blockLocation => distribution%blockLocation else call POP_ErrorSet(errorCode, & 'POP_DistributionGet: blockLocation not allocated') return endif endif if (present(blockLocalID)) then if (associated(distribution%blockLocalID)) then blockLocalID = distribution%blockLocalID else call POP_ErrorSet(errorCode, & 'POP_DistributionGet: blockLocalID not allocated') return endif endif if (present(blockGlobalID)) then if (associated(distribution%blockGlobalID)) then blockGlobalID = distribution%blockGlobalID else call POP_ErrorSet(errorCode, & 'POP_DistributionGet: blockGlobalID not allocated') return endif endif !----------------------------------------------------------------------- !EOC end subroutine POP_DistributionGet !*********************************************************************** !BOP ! !IROUTINE: POP_DistributionGetBlockLoc ! !INTERFACE: subroutine POP_DistributionGetBlockLoc(distribution, blockID, & processor, localID, errorCode) ! !DESCRIPTION: ! Given a distribution of blocks and a global block ID, return ! the processor and local index for the block. A zero for both ! is returned in the case that the block has been eliminated from ! the distribution (i.e. has no active points). ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: type (POP_distrb), intent(in) :: & distribution ! input distribution for which information ! is requested integer (POP_i4), intent(in) :: & blockID ! global block id for which location requested ! !OUTPUT PARAMETERS: integer (POP_i4), intent(out) :: & errorCode ! returned error code integer (POP_i4), intent(out) :: & processor, &! processor on which block resides localID ! local index for this block on this proc !EOP !BOC !----------------------------------------------------------------------- ! ! check for valid blockID ! !----------------------------------------------------------------------- errorCode = POP_Success if (blockID < 0 .or. blockID > POP_numBlocks) then call POP_ErrorSet(errorCode, & 'POP_DistributionGetBlockLoc: invalid block id') return endif !----------------------------------------------------------------------- ! ! extract the location from the distribution data structure ! !----------------------------------------------------------------------- processor = distribution%blockLocation(blockID) localID = distribution%blockLocalID (blockID) !----------------------------------------------------------------------- !EOC end subroutine POP_DistributionGetBlockLoc !*********************************************************************** !BOP ! !IROUTINE: POP_DistributionGetPointLoc ! !INTERFACE: subroutine POP_DistributionGetPointLoc(distribution, iGlobal, jGlobal,& processor, iLocal, jLocal, & localBlock, errorCode) ! !DESCRIPTION: ! Given a distribution of blocks and a global point address (i,j), ! return the processor and local block index for the block containing ! this point. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: type (POP_distrb), intent(in) :: & distribution ! input distribution for which information ! is requested integer (POP_i4), intent(in) :: & iGlobal, jGlobal ! global (i,j) for which location requested ! !OUTPUT PARAMETERS: integer (POP_i4), intent(out) :: & errorCode ! returned error code integer (POP_i4), intent(out) :: & processor, &! processor on which point resides iLocal, jLocal, &! local i,j indices for this point localBlock ! local block index for this point !EOP !BOC !----------------------------------------------------------------------- ! ! local variables ! !----------------------------------------------------------------------- integer (POP_i4) :: & i,j, &! horizontal indices iblock, &! block loop index blockID ! block id for found block type (POP_block) :: & thisBlock !----------------------------------------------------------------------- ! ! first find the block containing this point ! !----------------------------------------------------------------------- errorCode = POP_Success blockID = 0 iLocal = 0 jLocal = 0 blockSearch: do iblock = 1,POP_numBlocks thisBlock = POP_BlocksGetBlock(iblock, errorCode) if (errorCode /= POP_Success) then call POP_ErrorSet(errorCode, & 'POP_DistributionGetPointLoc: error retrieving block') return endif if (jGlobal >= thisBlock%jGlobal(thisBlock%jb) .and. & jGlobal <= thisBlock%jGlobal(thisBlock%je)) then ! block has j if (iGlobal >= thisBlock%iGlobal(thisBlock%ib) .and. & iGlobal <= thisBlock%iGlobal(thisBlock%ie)) then ! block has i blockID = iblock jLoop: do j=thisBlock%jb, thisBlock%je if (jGlobal == thisBlock%jGlobal(j)) then ! found point jLocal = j exit jLoop endif end do jLoop iLoop: do i=thisBlock%ib, thisBlock%ie if (iGlobal == thisBlock%iGlobal(i)) then ! found point iLocal = i exit iLoop endif end do iLoop exit blockSearch endif endif end do blockSearch if (blockID == 0 .or. iLocal == 0 .or. jLocal == 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionGetPointLoc: could not find point') return endif !----------------------------------------------------------------------- ! ! extract the location from the distribution data structure ! !----------------------------------------------------------------------- processor = distribution%blockLocation(blockID) localBlock = distribution%blockLocalID (blockID) !----------------------------------------------------------------------- !EOC end subroutine POP_DistributionGetPointLoc !*********************************************************************** !BOP ! !IROUTINE: POP_DistributionGetBlockID ! !INTERFACE: subroutine POP_DistributionGetBlockID(distribution, localID, & blockID, errorCode) ! !DESCRIPTION: ! Given a distribution of blocks and a local block index, return ! the global block id for the block. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: type (POP_distrb), intent(in) :: & distribution ! input distribution for which information ! is requested integer (POP_i4), intent(in) :: & localID ! local index for this block on this proc ! !OUTPUT PARAMETERS: integer (POP_i4), intent(out) :: & blockID ! global block id for this local block integer (POP_i4), intent(out) :: & errorCode ! returned error code !EOP !BOC !----------------------------------------------------------------------- ! ! check for valid localID ! !----------------------------------------------------------------------- errorCode = POP_Success if (localID < 0 .or. localID > distribution%numLocalBlocks) then call POP_ErrorSet(errorCode, & 'POP_DistributionGetBlockID: invalid local id') return endif !----------------------------------------------------------------------- ! ! extract the global ID from the distribution data structure ! !----------------------------------------------------------------------- blockID = distribution%blockGlobalID (localID) !----------------------------------------------------------------------- !EOC end subroutine POP_DistributionGetBlockID !*********************************************************************** !BOP ! !IROUTINE: POP_DistributionCreateCartesian ! !INTERFACE: function POP_DistributionCreateCartesian(numProcs, workPerBlock, & errorCode) result(newDistrb) ! !DESCRIPTION: ! This function creates a distribution of blocks across processors ! using a 2-d Cartesian distribution. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: integer (POP_i4), intent(in) :: & numProcs ! number of processors in this distribution integer (POP_i4), dimension(:), intent(in) :: & workPerBlock ! amount of work per block ! !OUTPUT PARAMETERS: integer (POP_i4), intent(out) :: & errorCode ! returned error code type (POP_distrb) :: & newDistrb ! resulting structure describing Cartesian ! distribution of blocks !EOP !BOC !---------------------------------------------------------------------- ! ! local variables ! !---------------------------------------------------------------------- integer (POP_i4) :: & i, j, &! dummy loop indices istat, &! status flag for allocation iblock, jblock, &! is, ie, js, je, &! start, end block indices for each proc processor, &! processor position in cartesian decomp globalID, &! global block ID localID, &! block location on this processor numProcsX, &! num of procs in x for global domain numProcsY, &! num of procs in y for global domain numBlocksXPerProc, &! num of blocks per processor in x numBlocksYPerProc ! num of blocks per processor in y !---------------------------------------------------------------------- ! ! create communicator for this distribution ! !---------------------------------------------------------------------- errorCode = POP_Success call POP_CommCreateCommunicator(newDistrb%communicator, numProcs) !---------------------------------------------------------------------- ! ! try to find best processor arrangement ! !---------------------------------------------------------------------- newDistrb%numProcs = numProcs call POP_DistributionProcDecomp(numProcs, & numProcsX, numProcsY, errorCode) if (errorCode /= POP_Success) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateCartesian: error in proc decomposition') return endif !---------------------------------------------------------------------- ! ! allocate space for decomposition ! !---------------------------------------------------------------------- allocate (newDistrb%blockLocation(POP_numBlocks), & newDistrb%blockLocalID (POP_numBlocks), stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateCartesian: error allocating blockLoc') return endif !---------------------------------------------------------------------- ! ! distribute blocks linearly across processors in each direction ! !---------------------------------------------------------------------- numBlocksXPerProc = (POP_numBlocksX-1)/numProcsX + 1 numBlocksYPerProc = (POP_numBlocksY-1)/numProcsY + 1 do j=1,numProcsY do i=1,numProcsX processor = (j-1)*numProcsX + i ! number the processors ! left to right, bot to top is = (i-1)*numBlocksXPerProc + 1 ! starting block in i ie = i *numBlocksXPerProc ! ending block in i if (ie > POP_numBlocksX) ie = POP_numBlocksX js = (j-1)*numBlocksYPerProc + 1 ! starting block in j je = j *numBlocksYPerProc ! ending block in j if (je > POP_numBlocksY) je = POP_numBlocksY localID = 0 ! initialize counter for local index do jblock = js,je do iblock = is,ie globalID = (jblock - 1)*POP_numBlocksX + iblock if (workPerBlock(globalID) /= 0) then localID = localID + 1 newDistrb%blockLocation(globalID) = processor newDistrb%blockLocalID (globalID) = localID else ! no work - eliminate block from distribution newDistrb%blockLocation(globalID) = 0 newDistrb%blockLocalID (globalID) = 0 endif end do end do ! if this is the local processor, set number of local blocks if (POP_myTask == processor - 1) then newDistrb%numLocalBlocks = localID endif end do end do !---------------------------------------------------------------------- ! ! now store the local info ! !---------------------------------------------------------------------- if (newDistrb%numLocalBlocks > 0) then allocate (newDistrb%blockGlobalID(newDistrb%numLocalBlocks), & stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateCartesian: error allocating blockLoc') return endif do j=1,numProcsY do i=1,numProcsX processor = (j-1)*numProcsX + i if (processor == POP_myTask + 1) then is = (i-1)*numBlocksXPerProc + 1 ! starting block in i ie = i *numBlocksXPerProc ! ending block in i if (ie > POP_numBlocksX) ie = POP_numBlocksX js = (j-1)*numBlocksYPerProc + 1 ! starting block in j je = j *numBlocksYPerProc ! ending block in j if (je > POP_numBlocksY) je = POP_numBlocksY localID = 0 ! initialize counter for local index do jblock = js,je do iblock = is,ie globalID = (jblock - 1)*POP_numBlocksX + iblock if (workPerBlock(globalID) /= 0) then localID = localID + 1 newDistrb%blockGlobalID (localID) = globalID endif end do end do endif end do end do endif !---------------------------------------------------------------------- !EOC end function POP_DistributionCreateCartesian !********************************************************************** !BOP ! !IROUTINE: POP_DistributionCreateSpacecurv ! !INTERFACE: function POP_DistributionCreateSpacecurv(numProcs,workPerBlock,errorCode) result(newDistrb) ! !Description: ! This function distributes blocks across processors in a ! load-balanced manner using space-filling curves ! ! !REVISION HISTORY: ! added by J. Dennis 3/10/06 ! !INPUT PARAMETERS: integer (POP_i4), intent(in) :: & numProcs ! number of processors in this distribution integer (POP_i4), dimension(:), intent(in) :: & workPerBlock ! amount of work per block ! !OUTPUT PARAMETERS: integer (POP_i4), intent(out) :: & errorCode ! returned error code type (POP_distrb) :: & newDistrb ! resulting structure describing ! load-balanced distribution of blocks !EOP !BOC !---------------------------------------------------------------------- ! ! local variables ! !---------------------------------------------------------------------- integer (POP_i4) :: & i,j,k,n ,&! dummy loop indices pid ,&! dummy for processor id local_block ,&! local block position on processor max_work ,&! max amount of work in any block numProcsX ,&! num of procs in x for global domain numProcsY ! num of procs in y for global domain integer (POP_i4), dimension(:),allocatable :: & idxT_i,idxT_j integer (POP_i4), dimension(:,:),allocatable :: Mesh, Mesh2, Mesh3 integer (POP_i4) :: nblocksL,nblocks,ii,extra,i2,j2,tmp1,s1,ig integer (POP_i4) :: ierr logical, parameter :: Debug = .FALSE. integer (POP_i4), dimension(:), allocatable :: & priority ,&! priority for moving blocks work_tmp ,&! work per row or column for rake algrthm proc_tmp ,&! temp processor id for rake algrthm block_count ! counter to determine local block indx type (factor_t) :: xdim,ydim integer (POP_i4) :: it,jj integer (POP_i4) :: curveSize,sb_x,sb_y,itmp,numfac integer (POP_i4) :: subNum, sfcNum logical :: foundX !---------------------------------------------------------------------- ! ! first set up as Cartesian distribution ! retain the Cartesian distribution if POP_numBlocks = numProcs ! to avoid processors with no work ! !---------------------------------------------------------------------- errorCode = POP_Success !------------------------------------------------------ ! Space filling curves only work if: ! ! POP_numBlocksX = 2^m1 3^n1 5^o1 where m1,n1,o1 are integers ! POP_numBlocksY = 2^m2 3^n2 5^o2 where m2,n2,o2 are integers !------------------------------------------------------ if((.not. IsFactorable(POP_numBlocksY)) .or. (.not. IsFactorable(POP_numBlocksX))) then newDistrb = POP_DistributionCreateCartesian(numProcs, workPerBlock,errorCode) return endif !----------------------------------------------- ! Factor the numbers of blocks in each dimension !----------------------------------------------- xdim = Factor(POP_numBlocksX) ydim = Factor(POP_numBlocksY) numfac = xdim%numfact !--------------------------------------------- ! Match the common factors to create SFC curve !--------------------------------------------- curveSize=1 do it=1,numfac call MatchFactor(xdim,ydim,itmp,foundX) curveSize = itmp*curveSize enddo !-------------------------------------- ! determine the size of the sub-blocks ! within the space-filling curve !-------------------------------------- sb_x = ProdFactor(xdim) sb_y = ProdFactor(ydim) call POP_CommCreateCommunicator(newDistrb%communicator, numProcs) newDistrb%numProcs = numProcs !---------------------------------------------------------------------- ! ! allocate space for decomposition ! !---------------------------------------------------------------------- allocate (newDistrb%blockLocation(POP_numBlocks), & newDistrb%blockLocalID(POP_numBlocks)) newDistrb%blockLocation = 0 newDistrb%blockLocalID = 0 !---------------------------------------------------------------------- ! Create the array to hold the SFC !---------------------------------------------------------------------- allocate(Mesh(curveSize,curveSize)) allocate(Mesh2(POP_numBlocksX,POP_numBlocksY),Mesh3(POP_numBlocksX,POP_numBlocksY) ) Mesh = 0 Mesh2 = 0 Mesh3 = 0 allocate(idxT_i(POP_numBlocks),idxT_j(POP_numBlocks)) !---------------------------------------------------------------------- ! Generate the space-filling curve !---------------------------------------------------------------------- call GenSpaceCurve(Mesh) Mesh = Mesh + 1 ! make it 1-based indexing if(Debug) then if(POP_myTask ==0) call PrintCurve(Mesh) endif !----------------------------------------------- ! Reindex the SFC to address internal sub-blocks !----------------------------------------------- do j=1,curveSize do i=1,curveSize sfcNum = (Mesh(i,j) - 1)*(sb_x*sb_y) + 1 do jj=1,sb_y do ii=1,sb_x subNum = (jj-1)*sb_x + (ii-1) i2 = (i-1)*sb_x + ii j2 = (j-1)*sb_y + jj Mesh2(i2,j2) = sfcNum + subNum enddo enddo enddo enddo !------------------------------------------------ ! create a linear array of i,j coordinates of SFC !------------------------------------------------ idxT_i=0;idxT_j=0 do j=1,POP_numBlocksY do i=1,POP_numBlocksX n = (j-1)*POP_numBlocksX + i ig = Mesh2(i,j) if(workPerBlock(n) /= 0) then idxT_i(ig)=i;idxT_j(ig)=j endif enddo enddo !----------------------------- ! Compress out the land blocks !----------------------------- ii=0 do i=1,POP_numBlocks if(IdxT_i(i) .gt. 0) then ii=ii+1 Mesh3(idxT_i(i),idxT_j(i)) = ii endif enddo if(Debug) then if(POP_myTask==0) call PrintCurve(Mesh3) endif nblocks=ii nblocksL = nblocks/numProcs ! every cpu gets nblocksL blocks, but the first 'extra' get nblocksL+1 extra = mod(nblocks,numProcs) s1 = extra*(nblocksL+1) ! split curve into two curves: ! 1 ... s1 s2 ... nblocks ! ! s1 = extra*(nblocksL+1) (count be 0) ! s2 = s1+1 ! ! First region gets nblocksL+1 blocks per partition ! Second region gets nblocksL blocks per partition if(Debug) write(*,*) 'numProcs,extra,nblocks,nblocksL,s1: ', & numProcs,extra,nblocks,nblocksL,s1 do j=1,POP_numBlocksY do i=1,POP_numBlocksX n = (j-1)*POP_numblocksX + i ! i2 = idxT_i(n) ! j2 = idxT_j(n) ii = Mesh3(i,j) if(ii>0) then !DBG if(POP_myTask ==0) write(*,*) 'i,j,ii:= ',i,j,ii if(ii<=s1) then ii=ii-1 tmp1 = ii/(nblocksL+1) newDistrb%blockLocation(n) = tmp1+1 else ii=ii-s1-1 tmp1 = ii/nblocksL newDistrb%blockLocation(n) = extra + tmp1 + 1 endif endif enddo enddo !---------------------------------------------------------------------- ! Reset the newDistrb data structure !---------------------------------------------------------------------- allocate(proc_tmp(numProcs)) proc_tmp = 0 do n=1,POP_numBlocks pid = newDistrb%blockLocation(n) if(pid>0) then proc_tmp(pid) = proc_tmp(pid) + 1 newDistrb%blockLocalID(n) = proc_tmp(pid) endif enddo !--------------------------------------- ! Set the number of active local blocks !--------------------------------------- newDistrb%numLocalBlocks = proc_tmp(POP_myTask+1) if (newDistrb%numLocalBlocks > 0) then allocate (newDistrb%blockGlobalID(newDistrb%numLocalBlocks)) ! if (istat > 0) then ! call POP_ErrorSet(errorCode, & ! 'POP_DistributionCreateSpaceCurve: error allocating blockLoc') ! return ! endif i=1 do n=1,POP_numBlocks pid = newDistrb%blockLocation(n) if(pid == POP_myTask+1) then newDistrb%blockGlobalID(i) = n ! should work by construction i=i+1 endif enddo endif if(Debug) then if(POP_myTask==0) write(*,*) 'newDistrb%proc:= ',newDistrb%blockLocation write(*,*) 'IAM: ',POP_myTask,' SpaceCurve: Number of blocks {total,local} :=', & POP_numBlocks,nblocks,proc_tmp(POP_myTask+1) endif deallocate(proc_tmp) deallocate(Mesh,Mesh2,Mesh3) deallocate(idxT_i,idxT_j) !---------------------------------------------------------------------- !EOC end function POP_DistributionCreateSpacecurv !********************************************************************** !BOP ! !IROUTINE: POP_DistributionCreateRake ! !INTERFACE: function POP_DistributionCreateRake(numProcs, workPerBlock, & errorCode) result(newDistrb) ! !DESCRIPTION: ! This function distributes blocks across processors in a ! load-balanced manner based on the amount of work per block. ! A rake algorithm is used in which the blocks are first distributed ! in a Cartesian distribution and then a rake is applied in each ! Cartesian direction. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: integer (POP_i4), intent(in) :: & numProcs ! number of processors in this distribution integer (POP_i4), dimension(:), intent(in) :: & workPerBlock ! amount of work per block ! !OUTPUT PARAMETERS: integer (POP_i4), intent(out) :: & errorCode ! returned error code type (POP_distrb) :: & newDistrb ! resulting structure describing ! load-balanced distribution of blocks !EOP !BOC !---------------------------------------------------------------------- ! ! local variables ! !---------------------------------------------------------------------- integer (POP_i4) :: & i,j,n ,&! dummy loop indices pid ,&! dummy for processor id istat ,&! status flag for allocates localBlock ,&! local block position on processor numOcnBlocks ,&! number of ocean blocks maxWork ,&! max amount of work in any block numProcsX ,&! num of procs in x for global domain numProcsY ! num of procs in y for global domain integer (POP_i4), dimension(:), allocatable :: & priority ,&! priority for moving blocks workTmp ,&! work per row or column for rake algrthm procTmp ! temp processor id for rake algrthm type (POP_distrb) :: dist ! temp hold distribution !---------------------------------------------------------------------- ! ! first set up as Cartesian distribution ! !---------------------------------------------------------------------- errorCode = POP_Success dist = POP_DistributionCreateCartesian(numProcs, & workPerBlock, errorCode) if (errorCode /= POP_Success) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error creating initial dist') return endif !---------------------------------------------------------------------- ! ! if the number of blocks is close to the number of processors, ! only do a 1-d rake on the entire distribution ! !---------------------------------------------------------------------- numOcnBlocks = count (workPerBlock /= 0) maxWork = maxval(workPerBlock) if (numOcnBlocks <= 2*numProcs) then allocate(priority(POP_numBlocks), stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error allocating priority array') return endif !*** initialize priority array do j=1,POP_numBlocksY do i=1,POP_numBlocksX n=(j-1)*POP_numBlocksX + i if (workPerBlock(n) > 0) then priority(n) = maxWork + n - workPerBlock(n) else priority(n) = 0 endif end do end do allocate(workTmp(numProcs), procTmp(numProcs), stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error allocating tmps') return endif workTmp(:) = 0 do i=1,numProcs procTmp(i) = i do n=1,POP_numBlocks if (dist%blockLocation(n) == i) then workTmp(i) = workTmp(i) + workPerBlock(n) endif end do end do call POP_DistributionRake (workTmp, procTmp, workPerBlock, & priority, dist, errorCode) if (errorCode /= POP_Success) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error in linear rake') return endif deallocate(workTmp, procTmp, stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error deallocating tmps') return endif !---------------------------------------------------------------------- ! ! otherwise re-distribute blocks using a rake in each direction ! !---------------------------------------------------------------------- else call POP_DistributionProcDecomp(dist%numProcs, & numProcsX, numProcsY, errorCode) if (errorCode /= POP_Success) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error in proc decomposition') return endif !---------------------------------------------------------------------- ! ! load-balance using a rake algorithm in the x-direction first ! !---------------------------------------------------------------------- allocate(priority(POP_numBlocks), stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error allocating priority array') return endif !*** set highest priority such that eastern-most blocks !*** and blocks with the least amount of work are !*** moved first do j=1,POP_numBlocksY do i=1,POP_numBlocksX n=(j-1)*POP_numBlocksX + i if (workPerBlock(n) > 0) then priority(n) = (maxWork + 1)*(POP_numBlocksX + i) - & workPerBlock(n) else priority(n) = 0 endif end do end do allocate(workTmp(numProcsX), procTmp(numProcsX), stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error allocating tmps') return endif do j=1,numProcsY workTmp(:) = 0 do i=1,numProcsX pid = (j-1)*numProcsX + i procTmp(i) = pid do n=1,POP_numBlocks if (dist%blockLocation(n) == pid) then workTmp(i) = workTmp(i) + workPerBlock(n) endif end do end do call POP_DistributionRake (workTmp, procTmp, workPerBlock, & priority, dist, errorCode) if (errorCode /= POP_Success) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error in first rake') return endif end do deallocate(workTmp, procTmp, stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error deallocating tmps') return endif !---------------------------------------------------------------------- ! ! use a rake algorithm in the y-direction now ! !---------------------------------------------------------------------- !*** set highest priority for northern-most blocks do j=1,POP_numBlocksY do i=1,POP_numBlocksX n=(j-1)*POP_numBlocksX + i if (workPerBlock(n) > 0) then priority(n) = (maxWork + 1)*(POP_numBlocksY + j) - & workPerBlock(n) else priority(n) = 0 endif end do end do allocate(workTmp(numProcsY), procTmp(numProcsY), stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error allocating tmps 2') return endif do i=1,numProcsX workTmp(:) = 0 do j=1,numProcsY pid = (j-1)*numProcsX + i procTmp(j) = pid do n=1,POP_numBlocks if (dist%blockLocation(n) == pid) then workTmp(j) = workTmp(j) + workPerBlock(n) endif end do end do call POP_DistributionRake (workTmp, procTmp, workPerBlock, & priority, dist, errorCode) if (errorCode /= POP_Success) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error in second rake') return endif end do deallocate(workTmp, procTmp, priority, stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error deallocating arrays') return endif endif ! 1d or 2d rake !---------------------------------------------------------------------- ! ! create new distribution with info extracted from the temporary ! distribution ! !---------------------------------------------------------------------- newDistrb%numProcs = numProcs newDistrb%communicator = dist%communicator allocate(newDistrb%blockLocation(POP_numBlocks), & newDistrb%blockLocalID(POP_numBlocks), stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error allocating blockLocation') return endif allocate(procTmp(numProcs), stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error allocating last procTmp') return endif procTmp = 0 localBlock = 0 do n=1,POP_numBlocks pid = dist%blockLocation(n) ! processor id newDistrb%blockLocation(n) = pid if (pid > 0) then procTmp(pid) = procTmp(pid) + 1 newDistrb%blockLocalID (n) = procTmp(pid) else newDistrb%blockLocalID (n) = 0 endif end do newDistrb%numLocalBlocks = procTmp(POP_myTask+1) if (minval(procTmp) < 1) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: processors left with no blocks') return endif deallocate(procTmp, stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error allocating last procTmp') return endif allocate(newDistrb%blockGlobalID(newDistrb%numLocalBlocks), & stat=istat) if (istat > 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error allocating blockGlobalID') return endif localBlock = 0 do n=1,POP_numBlocks if (newDistrb%blockLocation(n) == POP_myTask+1) then localBlock = localBlock + 1 newDistrb%blockGlobalID(localBlock) = n endif end do !---------------------------------------------------------------------- call POP_DistributionDestroy(dist, errorCode) if (errorCode /= POP_Success) then call POP_ErrorSet(errorCode, & 'POP_DistributionCreateRake: error destroying temp distrib') return endif !---------------------------------------------------------------------- !EOC end function POP_DistributionCreateRake !********************************************************************** !BOP ! !IROUTINE: POP_DistributionProcDecomp ! !INTERFACE: subroutine POP_DistributionProcDecomp(numProcs, & numProcsX, numProcsY, errorCode) ! !DESCRIPTION: ! This subroutine attempts to find an optimal (nearly square) ! 2d processor decomposition for a given number of processors. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: integer (POP_i4), intent(in) :: & numProcs ! total number or processors ! !OUTPUT PARAMETERS: integer (POP_i4), intent(out) :: & numProcsX, numProcsY, &! number of procs in each dimension errorCode ! returned error code !EOP !BOC !---------------------------------------------------------------------- ! ! local variables ! !---------------------------------------------------------------------- integer (POP_i4) :: & iguess, jguess ! guesses for numProcsX,Y real (POP_r8) :: & squareRootN ! square root of numProcs !---------------------------------------------------------------------- ! ! start with an initial guess that is closest to square decomp ! !---------------------------------------------------------------------- squareRootN = sqrt(real(numProcs)) numProcsX = 0 numProcsY = 0 iguess = nint(squareRootN) !---------------------------------------------------------------------- ! ! try various decompositions to find the best ! !---------------------------------------------------------------------- procLoop: do jguess = numProcs/iguess if (iguess*jguess == numProcs) then ! valid decomp !*** !*** if the blocks can be evenly distributed, it is a !*** good decomposition !*** if (mod(POP_numBlocksX,iguess) == 0 .and. & mod(POP_numBlocksY,jguess) == 0) then numProcsX = iguess numProcsY = jguess exit procLoop !*** !*** if the blocks can be evenly distributed in a !*** transposed direction, it is a good decomposition !*** else if (mod(POP_numBlocksX,jguess) == 0 .and. & mod(POP_numBlocksY,iguess) == 0) then numProcsX = jguess numProcsY = iguess exit procLoop !*** !*** A valid decomposition, but keep searching for !*** a better one !*** else if (numProcsX == 0) then numProcsX = iguess numProcsY = jguess endif iguess = iguess - 1 if (iguess == 0) then exit procLoop else cycle procLoop endif endif else ! invalid decomp - keep trying iguess = iguess - 1 if (iguess == 0) then exit procLoop else cycle procLoop endif endif end do procLoop if (numProcsX == 0) then call POP_ErrorSet(errorCode, & 'POP_DistributionProcDecomp: Unable to find 2d config') return endif !---------------------------------------------------------------------- !EOC end subroutine POP_DistributionProcDecomp !********************************************************************** !BOP ! !IROUTINE: POP_DistributionRake ! !INTERFACE: subroutine POP_DistributionRake (procWork, procID, blockWork, & priority, distribution, errorCode) ! !DESCRIPTION: ! This subroutine performs a rake algorithm to distribute the work ! along a vector of processors. In the rake algorithm, a work ! threshold is first set. Then, moving from left to right, work ! above that threshold is raked to the next processor in line. ! The process continues until the end of the vector is reached ! and then the threshold is reduced by one for a second rake pass. ! In this implementation, a priority for moving blocks is defined ! such that the rake algorithm chooses the highest priority ! block to be moved to the next processor. This can be used ! for example to always choose the eastern-most block or to ! ensure a block does not stray too far from its neighbors. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: integer (POP_i4), intent(in), dimension(:) :: & blockWork ,&! amount of work per block procID ! global processor number ! !INPUT/OUTPUT PARAMETERS: integer (POP_i4), intent(inout), dimension(:) :: & procWork ,&! amount of work per processor priority ! priority for moving a given block type (POP_distrb), intent(inout) :: & distribution ! distribution to change ! !OUTPUT PARAMETERS: integer (POP_i4), intent(out) :: & errorCode ! returned error code !EOP !BOC !---------------------------------------------------------------------- ! ! local variables ! !---------------------------------------------------------------------- integer (POP_i4) :: & i, n, &! dummy loop indices np1, &! n+1 corrected for cyclical wrap iproc, inext, &! processor ids for current and next numProcs, numBlocks, &! number of blocks, processors lastPriority, &! priority for most recent block minPriority, &! minimum priority lastLoc, &! location for most recent block meanWork, maxWork, &! mean,max work per processor diffWork, residual, &! work differences and residual work numTransfers ! counter for number of block transfers !---------------------------------------------------------------------- ! ! initialization ! !---------------------------------------------------------------------- errorCode = POP_Success numProcs = size(procWork) numBlocks = size(blockWork) !*** compute mean,max work per processor meanWork = sum(procWork)/numProcs + 1 maxWork = maxval(procWork) residual = mod(meanWork,numProcs) minPriority = 1000000 do n=1,numProcs iproc = procID(n) do i=1,numBlocks if (distribution%blockLocation(i) == iproc) then minPriority = min(minPriority,priority(i)) endif end do end do !---------------------------------------------------------------------- ! ! do two sets of transfers ! !---------------------------------------------------------------------- transferLoop: do !---------------------------------------------------------------------- ! ! do rake across the processors ! !---------------------------------------------------------------------- numTransfers = 0 do n=1,numProcs if (n < numProcs) then np1 = n+1 else np1 = 1 endif iproc = procID(n) inext = procID(np1) if (procWork(n) > meanWork) then !*** pass work to next diffWork = procWork(n) - meanWork rake1: do while (diffWork > 1) !*** attempt to find a block with the required !*** amount of work and with the highest priority !*** for moving (eg boundary blocks first) lastPriority = 0 lastLoc = 0 do i=1,numBlocks if (distribution%blockLocation(i) == iproc) then if (priority(i) > lastPriority ) then lastPriority = priority(i) lastLoc = i endif endif end do if (lastLoc == 0) exit rake1 ! could not shift work numTransfers = numTransfers + 1 distribution%blockLocation(lastLoc) = inext if (np1 == 1) priority(lastLoc) = minPriority diffWork = diffWork - blockWork(lastLoc) procWork(n ) = procWork(n )-blockWork(lastLoc) procWork(np1) = procWork(np1)+blockWork(lastLoc) end do rake1 endif end do !---------------------------------------------------------------------- ! ! increment meanWork by one and repeat ! !---------------------------------------------------------------------- meanWork = meanWork + 1 if (numTransfers == 0 .or. meanWork > maxWork) exit transferLoop end do transferLoop !---------------------------------------------------------------------- !EOC end subroutine POP_DistributionRake !*********************************************************************** end module POP_DistributionMod !|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||