#include "ESMFPIO.h" !=================================================== ! DO NOT EDIT THIS FILE, it was generated using genf90.pl ! Any changes you make to this file may be lost !=================================================== #define __PIO_FILE__ "box_rearrange.F90" !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! Module box_rearrange ! ! Perform data rearrangement with each io processor ! owning a rectangular box in the output domain ! ! ! 20070726 Initial version - R. Loy ! 20070807 Improved way MPI is called - R. Loy ! 20070825 fix hardcoded dim and unintentionally templated int - R. Loy ! 20071111 cache rearranger setup (1st and 2nd communications) ! 20090512 added flow-control logic option to comp2io and io2comp ! (imported flow-controlled alltoall logic ("swapm") from ! Community Atmosphere Model) - P. Worley ! 20100207 added flow-control logic option to box_rearrange_create ! - P. Worley !#define DEBUG 0 !#define DEBUG_INDICES 0 !#define DEBUG_BARRIER 0 ! cache communication pattern for rearranger in the ioDesc #define BOX_CACHE 1 ! communication algorithm options #define COLLECTIVE 0 #define POINT_TO_POINT 1 #define FLOW_CONTROL 2 #define DEF_P2P_HANDSHAKE .true. #define DEF_P2P_ISEND .false. #define DEF_P2P_MAXREQ 64 #ifndef _MPISERIAL #ifdef _USE_FLOW_CONTROL #define _USE_COMP2IO_FC 1 #define _USE_IO2COMP_FC 1 #define _USE_CREATE_FC 1 #endif #endif ! ! The completely unreadable nature of the following lines is required by some compilers ! #ifdef _USE_ALLTOALLW #define DEF_COMP2IO_OPTION 0 #define DEF_IO2COMP_OPTION 0 #else #ifdef _USE_COMP2IO_FC #define DEF_COMP2IO_OPTION 2 #else #define DEF_COMP2IO_OPTION 1 #endif #ifdef _USE_IO2COMP_FC #define DEF_IO2COMP_OPTION 2 #else #define DEF_IO2COMP_OPTION 1 #endif #endif ! ! USESPACE ! By default, use an algorithm on the senders that uses ! very little temp space but needs to make one pass through ! the compdof destinations for each io proc. If _USE_SPACE ! is nonzero, enable an alternate algorithm that makes a ! single pass but needs a lot of temp space to store the ! results ! ! NOTE: code change caching is allocating anyway so do not shut off #define _USE_SPACE 1 !#undef _USE_SPACE ! ! arbitrary mpi message tags used for the rearrange ! #define TAG0 100 #define TAG1 101 #define TAG2 102 module box_rearrange #ifdef _USEBOX use pio_kinds use pio_types, only : io_desc_t, iosystem_desc_t #ifdef NO_MPI2 use pio_support, only : MPI_TYPE_CREATE_INDEXED_BLOCK, piodie, & Debug, DebugIO, CheckMPIReturn, pio_fc_gather_int #else use pio_support, only : piodie, Debug, DebugIO, CheckMPIReturn, & pio_fc_gather_int #endif use alloc_mod use pio_spmd_utils implicit none private include 'mpif.h' ! _EXTERNAL public :: box_rearrange_create, & box_rearrange_free, & box_rearrange_comp2io, & box_rearrange_io2comp # 108 "box_rearrange.F90.in" interface box_rearrange_comp2io ! TYPE int,real,double module procedure box_rearrange_comp2io_int ! TYPE int,real,double module procedure box_rearrange_comp2io_real ! TYPE int,real,double module procedure box_rearrange_comp2io_double end interface # 113 "box_rearrange.F90.in" interface box_rearrange_io2comp ! TYPE int,real,double module procedure box_rearrange_io2comp_int ! TYPE int,real,double module procedure box_rearrange_io2comp_real ! TYPE int,real,double module procedure box_rearrange_io2comp_double end interface character(len=*), parameter :: modName='box_rearrange' # 120 "box_rearrange.F90.in" contains #ifdef _MPISERIAL ! ! box_rearrange.inc ! ! these are the templated routines for the box_rearrange.F90 module ! ! TYPE real,double,int !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! box_rearrange_comp2io_real ! ! rearrange an integer compdof ! ! Note use of ioDesc instead of varDesc ! ! This version uses mpi types to pack the data. There is extra ! work and additional communication to set this up but it should ! be more efficient overall. ! ! TYPE real,double,int # 141 "box_rearrange.F90.in" subroutine box_rearrange_comp2io_real (IOsystem, ioDesc, s1, src, s2, & dest, comm_option, fc_options) implicit none type (IOsystem_desc_t), intent(inout) :: IOsystem type (IO_desc_t) :: ioDesc integer, intent(in) :: s1, s2 real(r4), intent(in) :: src(s1) real(r4), intent(out) :: dest(s2) integer, optional, intent(in) :: comm_option integer, optional, intent(in) :: fc_options(3) ! 1: handshake (0/false,1/true) ! 2: send (0) vs isend (1) ! 3: max number of outstanding requests ! local vars character(len=*), parameter :: subName=modName//'::box_rearrange_comp2io_real' integer :: ndof integer :: niodof integer :: num_tasks integer :: num_iotasks integer :: i integer :: ioproc,ioindex ! begin ndof = size(ioDesc%dest_ioindex) niodof = size(dest) num_tasks = IOsystem%num_tasks num_iotasks = IOsystem%num_iotasks if (num_tasks /= 1 .or. num_iotasks /= 1) & call piodie( __PIO_FILE__,__LINE__, & 'built with -D_MPISERIAL but num_tasks=', num_tasks, & 'num_iotasks=', num_iotasks ) if (size(src)>0 .and. size(src)< ndof) & call piodie( __PIO_FILE__,__LINE__, & 'box_rearrange_comp2io: size(compbuf)=', size(src), & ' not equal to size(compdof)=', ndof ) do i=1,ndof ioproc = ioDesc%dest_ioproc(i) ioindex = ioDesc%dest_ioindex(i) if (ioproc /= -1 ) then ! ignore sender hole if (ioproc /= 1) & ! ioproc is 1-based call piodie( __PIO_FILE__,__LINE__, & 'box_rearrange_comp2io: i=', i, & 'dest_ioproc(i)=', ioproc ) if ( ioindex<0 .or. ioindex>=niodof ) & call piodie( __PIO_FILE__,__LINE__, & 'box_rearrange_comp2io: i=', i, & 'dest_ioindex(i) out of range=', ioindex ) dest(ioindex+1) = src(i) ! ioindex is 0-based endif end do end subroutine box_rearrange_comp2io_real ! ! box_rearrange.inc ! ! these are the templated routines for the box_rearrange.F90 module ! ! TYPE real,double,int !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! box_rearrange_comp2io_double ! ! rearrange an integer compdof ! ! Note use of ioDesc instead of varDesc ! ! This version uses mpi types to pack the data. There is extra ! work and additional communication to set this up but it should ! be more efficient overall. ! ! TYPE real,double,int # 141 "box_rearrange.F90.in" subroutine box_rearrange_comp2io_double (IOsystem, ioDesc, s1, src, s2, & dest, comm_option, fc_options) implicit none type (IOsystem_desc_t), intent(inout) :: IOsystem type (IO_desc_t) :: ioDesc integer, intent(in) :: s1, s2 real(r8), intent(in) :: src(s1) real(r8), intent(out) :: dest(s2) integer, optional, intent(in) :: comm_option integer, optional, intent(in) :: fc_options(3) ! 1: handshake (0/false,1/true) ! 2: send (0) vs isend (1) ! 3: max number of outstanding requests ! local vars character(len=*), parameter :: subName=modName//'::box_rearrange_comp2io_double' integer :: ndof integer :: niodof integer :: num_tasks integer :: num_iotasks integer :: i integer :: ioproc,ioindex ! begin ndof = size(ioDesc%dest_ioindex) niodof = size(dest) num_tasks = IOsystem%num_tasks num_iotasks = IOsystem%num_iotasks if (num_tasks /= 1 .or. num_iotasks /= 1) & call piodie( __PIO_FILE__,__LINE__, & 'built with -D_MPISERIAL but num_tasks=', num_tasks, & 'num_iotasks=', num_iotasks ) if (size(src)>0 .and. size(src)< ndof) & call piodie( __PIO_FILE__,__LINE__, & 'box_rearrange_comp2io: size(compbuf)=', size(src), & ' not equal to size(compdof)=', ndof ) do i=1,ndof ioproc = ioDesc%dest_ioproc(i) ioindex = ioDesc%dest_ioindex(i) if (ioproc /= -1 ) then ! ignore sender hole if (ioproc /= 1) & ! ioproc is 1-based call piodie( __PIO_FILE__,__LINE__, & 'box_rearrange_comp2io: i=', i, & 'dest_ioproc(i)=', ioproc ) if ( ioindex<0 .or. ioindex>=niodof ) & call piodie( __PIO_FILE__,__LINE__, & 'box_rearrange_comp2io: i=', i, & 'dest_ioindex(i) out of range=', ioindex ) dest(ioindex+1) = src(i) ! ioindex is 0-based endif end do end subroutine box_rearrange_comp2io_double ! ! box_rearrange.inc ! ! these are the templated routines for the box_rearrange.F90 module ! ! TYPE real,double,int !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! box_rearrange_comp2io_int ! ! rearrange an integer compdof ! ! Note use of ioDesc instead of varDesc ! ! This version uses mpi types to pack the data. There is extra ! work and additional communication to set this up but it should ! be more efficient overall. ! ! TYPE real,double,int # 141 "box_rearrange.F90.in" subroutine box_rearrange_comp2io_int (IOsystem, ioDesc, s1, src, s2, & dest, comm_option, fc_options) implicit none type (IOsystem_desc_t), intent(inout) :: IOsystem type (IO_desc_t) :: ioDesc integer, intent(in) :: s1, s2 integer(i4), intent(in) :: src(s1) integer(i4), intent(out) :: dest(s2) integer, optional, intent(in) :: comm_option integer, optional, intent(in) :: fc_options(3) ! 1: handshake (0/false,1/true) ! 2: send (0) vs isend (1) ! 3: max number of outstanding requests ! local vars character(len=*), parameter :: subName=modName//'::box_rearrange_comp2io_int' integer :: ndof integer :: niodof integer :: num_tasks integer :: num_iotasks integer :: i integer :: ioproc,ioindex ! begin ndof = size(ioDesc%dest_ioindex) niodof = size(dest) num_tasks = IOsystem%num_tasks num_iotasks = IOsystem%num_iotasks if (num_tasks /= 1 .or. num_iotasks /= 1) & call piodie( __PIO_FILE__,__LINE__, & 'built with -D_MPISERIAL but num_tasks=', num_tasks, & 'num_iotasks=', num_iotasks ) if (size(src)>0 .and. size(src)< ndof) & call piodie( __PIO_FILE__,__LINE__, & 'box_rearrange_comp2io: size(compbuf)=', size(src), & ' not equal to size(compdof)=', ndof ) do i=1,ndof ioproc = ioDesc%dest_ioproc(i) ioindex = ioDesc%dest_ioindex(i) if (ioproc /= -1 ) then ! ignore sender hole if (ioproc /= 1) & ! ioproc is 1-based call piodie( __PIO_FILE__,__LINE__, & 'box_rearrange_comp2io: i=', i, & 'dest_ioproc(i)=', ioproc ) if ( ioindex<0 .or. ioindex>=niodof ) & call piodie( __PIO_FILE__,__LINE__, & 'box_rearrange_comp2io: i=', i, & 'dest_ioindex(i) out of range=', ioindex ) dest(ioindex+1) = src(i) ! ioindex is 0-based endif end do end subroutine box_rearrange_comp2io_int #else /* not _MPISERIAL */ ! TYPE real,double,int # 208 "box_rearrange.F90.in" subroutine box_rearrange_comp2io_real (IOsystem, ioDesc, s1, src, s2, & dest, comm_option, fc_options) implicit none type (IOsystem_desc_t), intent(inout) :: IOsystem type (IO_desc_t) :: ioDesc integer, intent(in) :: s1, s2 real(r4), intent(in) :: src(s1) real(r4), intent(out) :: dest(s2) integer, optional, intent(in) :: comm_option integer, optional, intent(in) :: fc_options(3) ! 1: handshake (0/false,1/true) ! 2: send (0) vs isend (1) ! 3: max number of outstanding requests ! local vars character(len=*), parameter :: subName=modName//'::box_rearrange_comp2io_real' integer :: pio_option logical :: pio_hs logical :: pio_isend integer :: pio_maxreq integer :: ndof integer :: niodof integer :: num_iotasks integer :: nrecvs integer :: i integer :: ierror integer :: io_comprank integer :: myrank integer :: nprocs integer :: status(MPI_STATUS_SIZE) integer,pointer :: rfrom(:) ! rank of ith sender to this ioproc integer,pointer :: rtype(:) integer,pointer :: scount(:) integer,pointer :: stype(:) integer :: from integer,pointer :: a2a_displs(:) integer,pointer :: a2a_sendcounts(:) integer,pointer :: a2a_sendtypes(:) integer,pointer :: a2a_recvcounts(:) integer,pointer :: a2a_recvtypes(:) integer,pointer :: sreq(:) integer,pointer :: rreq(:) ! receive requests ! begin if ( present( comm_option ) ) then if ((comm_option == COLLECTIVE) & .or. (comm_option == POINT_TO_POINT) & .or. (comm_option == FLOW_CONTROL)) then pio_option = comm_option endif else pio_option = DEF_COMP2IO_OPTION endif if (pio_option == FLOW_CONTROL) then pio_hs = DEF_P2P_HANDSHAKE pio_isend = DEF_P2P_ISEND pio_maxreq = DEF_P2P_MAXREQ if ( present(fc_options) ) then if (fc_options(1) == 0) then pio_hs = .false. endif if (fc_options(2) == 1) then pio_isend = .true. endif if (fc_options(3) >=-1) then pio_maxreq = fc_options(3) endif endif endif ndof = size(ioDesc%dest_ioindex) niodof = size(dest) nrecvs = ioDesc%nrecvs ! number of distinct senders to the ioproc myrank = IOsystem%union_rank nprocs = IOsystem%num_tasks num_iotasks = IOsystem%num_iotasks if (size(src) > 0 .and. size(src) ioDesc%scount stype => ioDesc%stype if (pio_option /= POINT_TO_POINT) then call alloc_check(a2a_sendcounts, nprocs) call alloc_check(a2a_displs, nprocs) call alloc_check(a2a_sendtypes, nprocs) call alloc_check(a2a_recvcounts, nprocs) call alloc_check(a2a_recvtypes, nprocs) do i=1,nprocs a2a_displs(i) = 0 a2a_sendcounts(i) = 0 a2a_sendtypes(i) = MPI_INTEGER a2a_recvcounts(i) = 0 a2a_recvtypes(i) = MPI_INTEGER end do if (IOsystem%IOproc) then do i=1,nrecvs from = rfrom(i)+1 ! array is 1-based a2a_recvcounts(from) = 1 a2a_recvtypes(from) = rtype(i) end do endif do i=1,num_iotasks if (scount(i) /= 0) then ! go from 1-based io rank to 0-based comprank io_comprank = find_io_comprank(IOsystem,i) + 1 ! array is 1-based a2a_sendcounts(io_comprank) = 1 a2a_sendtypes(io_comprank) = stype(i) endif end do #ifdef _USE_ALLTOALLW if (pio_option == COLLECTIVE) then call MPI_ALLTOALLW(src, a2a_sendcounts, a2a_displs, a2a_sendtypes, & dest, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, ierror ) call CheckMPIReturn('box_rearrange', ierror) else #endif call pio_swapm( nprocs, myrank, & src, ndof, a2a_sendcounts, a2a_displs, a2a_sendtypes, & dest, niodof, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq ) #ifdef _USE_ALLTOALLW endif #endif call dealloc_check(a2a_sendcounts) call dealloc_check(a2a_displs) call dealloc_check(a2a_sendtypes) call dealloc_check(a2a_recvcounts) call dealloc_check(a2a_recvtypes) else call alloc_check(sreq, num_iotasks, 'send requests') #ifdef DEBUG if (myrank==0) then print *,'comp2io using cached rearranger info' endif #endif ! ! send data from comp procs ! do i=1,num_iotasks if (scount(i) /= 0) then ! go from 1-based io rank to 0-based comprank io_comprank=find_io_comprank(IOsystem,i) if(Debug) print *, __PIO_FILE__,__LINE__,myrank,': send posted dest=',io_comprank,' count=',scount(i), stype(i) call MPI_ISEND( src, 1, stype(i), & ! buf, count, type io_comprank,TAG2, & ! destination,tag IOsystem%union_comm,sreq(i),ierror ) call CheckMPIReturn('box_rearrange',ierror) endif end do ! ! post receives on io procs ! if (IOsystem%IOproc) then do i=1,nrecvs call MPI_IRECV( dest,1, rtype(i), & ! buf, count, type rfrom(i), TAG2, & ! source, tag IOsystem%union_comm,rreq(i),ierror ) call CheckMPIReturn('box_rearrange',ierror) end do endif ! ! finish up ! if (IOsystem%IOproc) then do i=1,nrecvs call MPI_WAIT( rreq(i), status, ierror ) call CheckMPIReturn('box_rearrange',ierror) end do call dealloc_check(rreq, 'receive requests') endif do i=1,num_iotasks if (scount(i) /= 0) then call MPI_WAIT( sreq(i), status, ierror ) call CheckMPIReturn('box_rearrange',ierror) endif end do call dealloc_check(sreq, 'send requests') #if DEBUG_BARRIER call MPI_BARRIER(IOsystem%union_comm,ierror) call CheckMPIReturn(subName,ierror) if (myrank==0) print *,'BARRIER - end of comp2io' #endif endif ! POINT_TO_POINT end subroutine box_rearrange_comp2io_real ! TYPE real,double,int # 208 "box_rearrange.F90.in" subroutine box_rearrange_comp2io_double (IOsystem, ioDesc, s1, src, s2, & dest, comm_option, fc_options) implicit none type (IOsystem_desc_t), intent(inout) :: IOsystem type (IO_desc_t) :: ioDesc integer, intent(in) :: s1, s2 real(r8), intent(in) :: src(s1) real(r8), intent(out) :: dest(s2) integer, optional, intent(in) :: comm_option integer, optional, intent(in) :: fc_options(3) ! 1: handshake (0/false,1/true) ! 2: send (0) vs isend (1) ! 3: max number of outstanding requests ! local vars character(len=*), parameter :: subName=modName//'::box_rearrange_comp2io_double' integer :: pio_option logical :: pio_hs logical :: pio_isend integer :: pio_maxreq integer :: ndof integer :: niodof integer :: num_iotasks integer :: nrecvs integer :: i integer :: ierror integer :: io_comprank integer :: myrank integer :: nprocs integer :: status(MPI_STATUS_SIZE) integer,pointer :: rfrom(:) ! rank of ith sender to this ioproc integer,pointer :: rtype(:) integer,pointer :: scount(:) integer,pointer :: stype(:) integer :: from integer,pointer :: a2a_displs(:) integer,pointer :: a2a_sendcounts(:) integer,pointer :: a2a_sendtypes(:) integer,pointer :: a2a_recvcounts(:) integer,pointer :: a2a_recvtypes(:) integer,pointer :: sreq(:) integer,pointer :: rreq(:) ! receive requests ! begin if ( present( comm_option ) ) then if ((comm_option == COLLECTIVE) & .or. (comm_option == POINT_TO_POINT) & .or. (comm_option == FLOW_CONTROL)) then pio_option = comm_option endif else pio_option = DEF_COMP2IO_OPTION endif if (pio_option == FLOW_CONTROL) then pio_hs = DEF_P2P_HANDSHAKE pio_isend = DEF_P2P_ISEND pio_maxreq = DEF_P2P_MAXREQ if ( present(fc_options) ) then if (fc_options(1) == 0) then pio_hs = .false. endif if (fc_options(2) == 1) then pio_isend = .true. endif if (fc_options(3) >=-1) then pio_maxreq = fc_options(3) endif endif endif ndof = size(ioDesc%dest_ioindex) niodof = size(dest) nrecvs = ioDesc%nrecvs ! number of distinct senders to the ioproc myrank = IOsystem%union_rank nprocs = IOsystem%num_tasks num_iotasks = IOsystem%num_iotasks if (size(src) > 0 .and. size(src) ioDesc%scount stype => ioDesc%stype if (pio_option /= POINT_TO_POINT) then call alloc_check(a2a_sendcounts, nprocs) call alloc_check(a2a_displs, nprocs) call alloc_check(a2a_sendtypes, nprocs) call alloc_check(a2a_recvcounts, nprocs) call alloc_check(a2a_recvtypes, nprocs) do i=1,nprocs a2a_displs(i) = 0 a2a_sendcounts(i) = 0 a2a_sendtypes(i) = MPI_INTEGER a2a_recvcounts(i) = 0 a2a_recvtypes(i) = MPI_INTEGER end do if (IOsystem%IOproc) then do i=1,nrecvs from = rfrom(i)+1 ! array is 1-based a2a_recvcounts(from) = 1 a2a_recvtypes(from) = rtype(i) end do endif do i=1,num_iotasks if (scount(i) /= 0) then ! go from 1-based io rank to 0-based comprank io_comprank = find_io_comprank(IOsystem,i) + 1 ! array is 1-based a2a_sendcounts(io_comprank) = 1 a2a_sendtypes(io_comprank) = stype(i) endif end do #ifdef _USE_ALLTOALLW if (pio_option == COLLECTIVE) then call MPI_ALLTOALLW(src, a2a_sendcounts, a2a_displs, a2a_sendtypes, & dest, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, ierror ) call CheckMPIReturn('box_rearrange', ierror) else #endif call pio_swapm( nprocs, myrank, & src, ndof, a2a_sendcounts, a2a_displs, a2a_sendtypes, & dest, niodof, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq ) #ifdef _USE_ALLTOALLW endif #endif call dealloc_check(a2a_sendcounts) call dealloc_check(a2a_displs) call dealloc_check(a2a_sendtypes) call dealloc_check(a2a_recvcounts) call dealloc_check(a2a_recvtypes) else call alloc_check(sreq, num_iotasks, 'send requests') #ifdef DEBUG if (myrank==0) then print *,'comp2io using cached rearranger info' endif #endif ! ! send data from comp procs ! do i=1,num_iotasks if (scount(i) /= 0) then ! go from 1-based io rank to 0-based comprank io_comprank=find_io_comprank(IOsystem,i) if(Debug) print *, __PIO_FILE__,__LINE__,myrank,': send posted dest=',io_comprank,' count=',scount(i), stype(i) call MPI_ISEND( src, 1, stype(i), & ! buf, count, type io_comprank,TAG2, & ! destination,tag IOsystem%union_comm,sreq(i),ierror ) call CheckMPIReturn('box_rearrange',ierror) endif end do ! ! post receives on io procs ! if (IOsystem%IOproc) then do i=1,nrecvs call MPI_IRECV( dest,1, rtype(i), & ! buf, count, type rfrom(i), TAG2, & ! source, tag IOsystem%union_comm,rreq(i),ierror ) call CheckMPIReturn('box_rearrange',ierror) end do endif ! ! finish up ! if (IOsystem%IOproc) then do i=1,nrecvs call MPI_WAIT( rreq(i), status, ierror ) call CheckMPIReturn('box_rearrange',ierror) end do call dealloc_check(rreq, 'receive requests') endif do i=1,num_iotasks if (scount(i) /= 0) then call MPI_WAIT( sreq(i), status, ierror ) call CheckMPIReturn('box_rearrange',ierror) endif end do call dealloc_check(sreq, 'send requests') #if DEBUG_BARRIER call MPI_BARRIER(IOsystem%union_comm,ierror) call CheckMPIReturn(subName,ierror) if (myrank==0) print *,'BARRIER - end of comp2io' #endif endif ! POINT_TO_POINT end subroutine box_rearrange_comp2io_double ! TYPE real,double,int # 208 "box_rearrange.F90.in" subroutine box_rearrange_comp2io_int (IOsystem, ioDesc, s1, src, s2, & dest, comm_option, fc_options) implicit none type (IOsystem_desc_t), intent(inout) :: IOsystem type (IO_desc_t) :: ioDesc integer, intent(in) :: s1, s2 integer(i4), intent(in) :: src(s1) integer(i4), intent(out) :: dest(s2) integer, optional, intent(in) :: comm_option integer, optional, intent(in) :: fc_options(3) ! 1: handshake (0/false,1/true) ! 2: send (0) vs isend (1) ! 3: max number of outstanding requests ! local vars character(len=*), parameter :: subName=modName//'::box_rearrange_comp2io_int' integer :: pio_option logical :: pio_hs logical :: pio_isend integer :: pio_maxreq integer :: ndof integer :: niodof integer :: num_iotasks integer :: nrecvs integer :: i integer :: ierror integer :: io_comprank integer :: myrank integer :: nprocs integer :: status(MPI_STATUS_SIZE) integer,pointer :: rfrom(:) ! rank of ith sender to this ioproc integer,pointer :: rtype(:) integer,pointer :: scount(:) integer,pointer :: stype(:) integer :: from integer,pointer :: a2a_displs(:) integer,pointer :: a2a_sendcounts(:) integer,pointer :: a2a_sendtypes(:) integer,pointer :: a2a_recvcounts(:) integer,pointer :: a2a_recvtypes(:) integer,pointer :: sreq(:) integer,pointer :: rreq(:) ! receive requests ! begin if ( present( comm_option ) ) then if ((comm_option == COLLECTIVE) & .or. (comm_option == POINT_TO_POINT) & .or. (comm_option == FLOW_CONTROL)) then pio_option = comm_option endif else pio_option = DEF_COMP2IO_OPTION endif if (pio_option == FLOW_CONTROL) then pio_hs = DEF_P2P_HANDSHAKE pio_isend = DEF_P2P_ISEND pio_maxreq = DEF_P2P_MAXREQ if ( present(fc_options) ) then if (fc_options(1) == 0) then pio_hs = .false. endif if (fc_options(2) == 1) then pio_isend = .true. endif if (fc_options(3) >=-1) then pio_maxreq = fc_options(3) endif endif endif ndof = size(ioDesc%dest_ioindex) niodof = size(dest) nrecvs = ioDesc%nrecvs ! number of distinct senders to the ioproc myrank = IOsystem%union_rank nprocs = IOsystem%num_tasks num_iotasks = IOsystem%num_iotasks if (size(src) > 0 .and. size(src) ioDesc%scount stype => ioDesc%stype if (pio_option /= POINT_TO_POINT) then call alloc_check(a2a_sendcounts, nprocs) call alloc_check(a2a_displs, nprocs) call alloc_check(a2a_sendtypes, nprocs) call alloc_check(a2a_recvcounts, nprocs) call alloc_check(a2a_recvtypes, nprocs) do i=1,nprocs a2a_displs(i) = 0 a2a_sendcounts(i) = 0 a2a_sendtypes(i) = MPI_INTEGER a2a_recvcounts(i) = 0 a2a_recvtypes(i) = MPI_INTEGER end do if (IOsystem%IOproc) then do i=1,nrecvs from = rfrom(i)+1 ! array is 1-based a2a_recvcounts(from) = 1 a2a_recvtypes(from) = rtype(i) end do endif do i=1,num_iotasks if (scount(i) /= 0) then ! go from 1-based io rank to 0-based comprank io_comprank = find_io_comprank(IOsystem,i) + 1 ! array is 1-based a2a_sendcounts(io_comprank) = 1 a2a_sendtypes(io_comprank) = stype(i) endif end do #ifdef _USE_ALLTOALLW if (pio_option == COLLECTIVE) then call MPI_ALLTOALLW(src, a2a_sendcounts, a2a_displs, a2a_sendtypes, & dest, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, ierror ) call CheckMPIReturn('box_rearrange', ierror) else #endif call pio_swapm( nprocs, myrank, & src, ndof, a2a_sendcounts, a2a_displs, a2a_sendtypes, & dest, niodof, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq ) #ifdef _USE_ALLTOALLW endif #endif call dealloc_check(a2a_sendcounts) call dealloc_check(a2a_displs) call dealloc_check(a2a_sendtypes) call dealloc_check(a2a_recvcounts) call dealloc_check(a2a_recvtypes) else call alloc_check(sreq, num_iotasks, 'send requests') #ifdef DEBUG if (myrank==0) then print *,'comp2io using cached rearranger info' endif #endif ! ! send data from comp procs ! do i=1,num_iotasks if (scount(i) /= 0) then ! go from 1-based io rank to 0-based comprank io_comprank=find_io_comprank(IOsystem,i) if(Debug) print *, __PIO_FILE__,__LINE__,myrank,': send posted dest=',io_comprank,' count=',scount(i), stype(i) call MPI_ISEND( src, 1, stype(i), & ! buf, count, type io_comprank,TAG2, & ! destination,tag IOsystem%union_comm,sreq(i),ierror ) call CheckMPIReturn('box_rearrange',ierror) endif end do ! ! post receives on io procs ! if (IOsystem%IOproc) then do i=1,nrecvs call MPI_IRECV( dest,1, rtype(i), & ! buf, count, type rfrom(i), TAG2, & ! source, tag IOsystem%union_comm,rreq(i),ierror ) call CheckMPIReturn('box_rearrange',ierror) end do endif ! ! finish up ! if (IOsystem%IOproc) then do i=1,nrecvs call MPI_WAIT( rreq(i), status, ierror ) call CheckMPIReturn('box_rearrange',ierror) end do call dealloc_check(rreq, 'receive requests') endif do i=1,num_iotasks if (scount(i) /= 0) then call MPI_WAIT( sreq(i), status, ierror ) call CheckMPIReturn('box_rearrange',ierror) endif end do call dealloc_check(sreq, 'send requests') #if DEBUG_BARRIER call MPI_BARRIER(IOsystem%union_comm,ierror) call CheckMPIReturn(subName,ierror) if (myrank==0) print *,'BARRIER - end of comp2io' #endif endif ! POINT_TO_POINT end subroutine box_rearrange_comp2io_int #endif /* not _MPISERIAL */ #ifdef _MPISERIAL !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! box_rearrange_io2comp_real ! ! rearrange from the io decomposition to the comp decomposition ! ! TYPE real,double,int # 444 "box_rearrange.F90.in" subroutine box_rearrange_io2comp_real (IOsystem, ioDesc, s1, iobuf, & s2, compbuf, comm_option, fc_options) implicit none type (IOsystem_desc_t), intent(inout) :: IOsystem type (IO_desc_t) :: ioDesc integer, intent(in) :: s1, s2 real(r4), intent(in) :: iobuf(s1) real(r4), intent(out) :: compbuf(s2) integer, optional, intent(in) :: comm_option integer, optional, intent(in) :: fc_options(3) ! local vars character(len=*), parameter :: subName=modName//'::box_rearrange_io2comp_real' integer :: ndof integer :: niodof integer :: num_tasks integer :: num_iotasks integer :: i integer :: ioproc, ioindex ! begin compbuf(:) = 0 ndof = size(iodesc%dest_ioindex) niodof = size(iobuf) num_tasks = IOsystem%num_tasks num_iotasks = IOsystem%num_iotasks if (num_tasks /= 1 .or. num_iotasks /= 1) & call piodie( __PIO_FILE__,__LINE__, & 'built with -D_MPISERIAL but num_tasks=', num_tasks, & 'num_iotasks=', num_iotasks ) if (size(compbuf) > 0 .and. size(compbuf)=-1) then pio_maxreq = fc_options(3) endif endif endif compbuf(:) = 0 ndof = size(iodesc%dest_ioindex) niodof = size(iobuf) nrecvs = ioDesc%nrecvs ! number of distinct senders to the ioproc myrank = IOsystem%union_rank nprocs = IOsystem%num_tasks num_iotasks = IOsystem%num_iotasks if (size(compbuf) > 0 .and. size(compbuf) ioDesc%scount stype => ioDesc%stype if (pio_option /= POINT_TO_POINT) then call alloc_check(a2a_sendcounts, nprocs) call alloc_check(a2a_displs, nprocs) call alloc_check(a2a_sendtypes, nprocs) call alloc_check(a2a_recvcounts, nprocs) call alloc_check(a2a_recvtypes, nprocs) do i=1,nprocs a2a_displs(i) = 0 a2a_sendcounts(i) = 0 a2a_sendtypes(i) = MPI_INTEGER a2a_recvcounts(i) = 0 a2a_recvtypes(i) = MPI_INTEGER end do do i=1,num_iotasks if (scount(i) /= 0) then ! go from 1-based io rank to 0-based comprank io_comprank = find_io_comprank(IOsystem,i) +1 ! array is 1-based a2a_recvcounts(io_comprank) = 1 a2a_recvtypes(io_comprank) = stype(i) endif end do if (IOsystem%IOproc) then do i=1,nrecvs comprank = rfrom(i) +1 ! array is 1-based a2a_sendcounts(comprank) = 1 a2a_sendtypes(comprank) = rtype(i) end do endif #ifdef _USE_ALLTOALLW if (pio_option == COLLECTIVE) then call MPI_ALLTOALLW(iobuf, a2a_sendcounts, a2a_displs, a2a_sendtypes, & compbuf, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, ierror ) call CheckMPIReturn(subName, ierror) else #endif call pio_swapm( nprocs, myrank, & iobuf, niodof, a2a_sendcounts, a2a_displs, a2a_sendtypes, & compbuf, ndof, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq ) #ifdef _USE_ALLTOALLW endif #endif call dealloc_check(a2a_sendcounts) call dealloc_check(a2a_displs) call dealloc_check(a2a_sendtypes) call dealloc_check(a2a_recvcounts) call dealloc_check(a2a_recvtypes) else call alloc_check(rreq, num_iotasks, 'recv requests') ! ! post receives on comp procs ! do i=1,num_iotasks if (scount(i) /= 0) then ! go from 1-based io rank to 0-based comprank io_comprank=find_io_comprank(IOsystem,i) call MPI_IRECV( compbuf, 1, stype(i), & ! buf, count, type io_comprank,TAG2, & ! destination,tag IOsystem%union_comm,rreq(i),ierror ) call CheckMPIReturn(subName,ierror) endif end do ! ! do sends on io procs ! if (IOsystem%IOproc) then do i=1,nrecvs call MPI_ISEND( iobuf,1, rtype(i), & ! buf, count, type rfrom(i), TAG2, & ! dest, tag IOsystem%union_comm,sreq(i),ierror ) call CheckMPIReturn(subName,ierror) end do endif ! ! finish up ! do i=1,num_iotasks if (scount(i) /= 0) then call MPI_WAIT( rreq(i), status, ierror ) call CheckMPIReturn(subName,ierror) endif end do call dealloc_check(rreq,'recv requests') if (IOsystem%IOproc) then do i=1,nrecvs call MPI_WAIT( sreq(i), status, ierror ) call CheckMPIReturn(subName,ierror) end do call dealloc_check(sreq,'send requests') endif endif ! POINT_TO_POINT end subroutine box_rearrange_io2comp_real ! TYPE real,double,int # 508 "box_rearrange.F90.in" subroutine box_rearrange_io2comp_double (IOsystem,ioDesc,s1, iobuf,s2, compbuf, & comm_option, fc_options) implicit none type (IOsystem_desc_t), intent(inout) :: IOsystem type (IO_desc_t) :: ioDesc integer, intent(in) :: s1, s2 real(r8), intent(in) :: iobuf(s1) real(r8), intent(out) :: compbuf(s2) integer, optional, intent(in) :: comm_option integer, optional, intent(in) :: fc_options(3) ! local vars character(len=*), parameter :: subName=modName//'::box_rearrange_io2comp_double' integer :: pio_option logical :: pio_hs logical :: pio_isend integer :: pio_maxreq integer :: ndof integer :: niodof integer :: num_iotasks integer :: nrecvs integer :: i integer :: ierror integer :: io_comprank integer :: myrank integer :: comprank integer :: nprocs integer :: status(MPI_STATUS_SIZE) integer,pointer :: rfrom(:) ! rank of ith sender to this ioproc integer,pointer :: rtype(:) ! mpi receive types integer,pointer :: scount(:) ! scount(i) = no. sends to ith ioproc integer,pointer :: stype(:) ! mpi send types integer,pointer :: a2a_displs(:) integer,pointer :: a2a_sendcounts(:) integer,pointer :: a2a_sendtypes(:) integer,pointer :: a2a_recvcounts(:) integer,pointer :: a2a_recvtypes(:) integer,pointer :: sreq(:) integer,pointer :: rreq(:) ! receive requests for comp procs ! begin if ( present( comm_option ) ) then if ((comm_option == COLLECTIVE) & .or. (comm_option == POINT_TO_POINT) & .or. (comm_option == FLOW_CONTROL)) then pio_option = comm_option endif else pio_option = DEF_IO2COMP_OPTION endif if (pio_option == FLOW_CONTROL) then pio_hs = DEF_P2P_HANDSHAKE pio_isend = DEF_P2P_ISEND pio_maxreq = DEF_P2P_MAXREQ if ( present(fc_options) ) then if (fc_options(1) == 0) then pio_hs = .false. endif if (fc_options(2) == 1) then pio_isend = .true. endif if (fc_options(3) >=-1) then pio_maxreq = fc_options(3) endif endif endif compbuf(:) = 0 ndof = size(iodesc%dest_ioindex) niodof = size(iobuf) nrecvs = ioDesc%nrecvs ! number of distinct senders to the ioproc myrank = IOsystem%union_rank nprocs = IOsystem%num_tasks num_iotasks = IOsystem%num_iotasks if (size(compbuf) > 0 .and. size(compbuf) ioDesc%scount stype => ioDesc%stype if (pio_option /= POINT_TO_POINT) then call alloc_check(a2a_sendcounts, nprocs) call alloc_check(a2a_displs, nprocs) call alloc_check(a2a_sendtypes, nprocs) call alloc_check(a2a_recvcounts, nprocs) call alloc_check(a2a_recvtypes, nprocs) do i=1,nprocs a2a_displs(i) = 0 a2a_sendcounts(i) = 0 a2a_sendtypes(i) = MPI_INTEGER a2a_recvcounts(i) = 0 a2a_recvtypes(i) = MPI_INTEGER end do do i=1,num_iotasks if (scount(i) /= 0) then ! go from 1-based io rank to 0-based comprank io_comprank = find_io_comprank(IOsystem,i) +1 ! array is 1-based a2a_recvcounts(io_comprank) = 1 a2a_recvtypes(io_comprank) = stype(i) endif end do if (IOsystem%IOproc) then do i=1,nrecvs comprank = rfrom(i) +1 ! array is 1-based a2a_sendcounts(comprank) = 1 a2a_sendtypes(comprank) = rtype(i) end do endif #ifdef _USE_ALLTOALLW if (pio_option == COLLECTIVE) then call MPI_ALLTOALLW(iobuf, a2a_sendcounts, a2a_displs, a2a_sendtypes, & compbuf, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, ierror ) call CheckMPIReturn(subName, ierror) else #endif call pio_swapm( nprocs, myrank, & iobuf, niodof, a2a_sendcounts, a2a_displs, a2a_sendtypes, & compbuf, ndof, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq ) #ifdef _USE_ALLTOALLW endif #endif call dealloc_check(a2a_sendcounts) call dealloc_check(a2a_displs) call dealloc_check(a2a_sendtypes) call dealloc_check(a2a_recvcounts) call dealloc_check(a2a_recvtypes) else call alloc_check(rreq, num_iotasks, 'recv requests') ! ! post receives on comp procs ! do i=1,num_iotasks if (scount(i) /= 0) then ! go from 1-based io rank to 0-based comprank io_comprank=find_io_comprank(IOsystem,i) call MPI_IRECV( compbuf, 1, stype(i), & ! buf, count, type io_comprank,TAG2, & ! destination,tag IOsystem%union_comm,rreq(i),ierror ) call CheckMPIReturn(subName,ierror) endif end do ! ! do sends on io procs ! if (IOsystem%IOproc) then do i=1,nrecvs call MPI_ISEND( iobuf,1, rtype(i), & ! buf, count, type rfrom(i), TAG2, & ! dest, tag IOsystem%union_comm,sreq(i),ierror ) call CheckMPIReturn(subName,ierror) end do endif ! ! finish up ! do i=1,num_iotasks if (scount(i) /= 0) then call MPI_WAIT( rreq(i), status, ierror ) call CheckMPIReturn(subName,ierror) endif end do call dealloc_check(rreq,'recv requests') if (IOsystem%IOproc) then do i=1,nrecvs call MPI_WAIT( sreq(i), status, ierror ) call CheckMPIReturn(subName,ierror) end do call dealloc_check(sreq,'send requests') endif endif ! POINT_TO_POINT end subroutine box_rearrange_io2comp_double ! TYPE real,double,int # 508 "box_rearrange.F90.in" subroutine box_rearrange_io2comp_int (IOsystem,ioDesc,s1, iobuf,s2, compbuf, & comm_option, fc_options) implicit none type (IOsystem_desc_t), intent(inout) :: IOsystem type (IO_desc_t) :: ioDesc integer, intent(in) :: s1, s2 integer(i4), intent(in) :: iobuf(s1) integer(i4), intent(out) :: compbuf(s2) integer, optional, intent(in) :: comm_option integer, optional, intent(in) :: fc_options(3) ! local vars character(len=*), parameter :: subName=modName//'::box_rearrange_io2comp_int' integer :: pio_option logical :: pio_hs logical :: pio_isend integer :: pio_maxreq integer :: ndof integer :: niodof integer :: num_iotasks integer :: nrecvs integer :: i integer :: ierror integer :: io_comprank integer :: myrank integer :: comprank integer :: nprocs integer :: status(MPI_STATUS_SIZE) integer,pointer :: rfrom(:) ! rank of ith sender to this ioproc integer,pointer :: rtype(:) ! mpi receive types integer,pointer :: scount(:) ! scount(i) = no. sends to ith ioproc integer,pointer :: stype(:) ! mpi send types integer,pointer :: a2a_displs(:) integer,pointer :: a2a_sendcounts(:) integer,pointer :: a2a_sendtypes(:) integer,pointer :: a2a_recvcounts(:) integer,pointer :: a2a_recvtypes(:) integer,pointer :: sreq(:) integer,pointer :: rreq(:) ! receive requests for comp procs ! begin if ( present( comm_option ) ) then if ((comm_option == COLLECTIVE) & .or. (comm_option == POINT_TO_POINT) & .or. (comm_option == FLOW_CONTROL)) then pio_option = comm_option endif else pio_option = DEF_IO2COMP_OPTION endif if (pio_option == FLOW_CONTROL) then pio_hs = DEF_P2P_HANDSHAKE pio_isend = DEF_P2P_ISEND pio_maxreq = DEF_P2P_MAXREQ if ( present(fc_options) ) then if (fc_options(1) == 0) then pio_hs = .false. endif if (fc_options(2) == 1) then pio_isend = .true. endif if (fc_options(3) >=-1) then pio_maxreq = fc_options(3) endif endif endif compbuf(:) = 0 ndof = size(iodesc%dest_ioindex) niodof = size(iobuf) nrecvs = ioDesc%nrecvs ! number of distinct senders to the ioproc myrank = IOsystem%union_rank nprocs = IOsystem%num_tasks num_iotasks = IOsystem%num_iotasks if (size(compbuf) > 0 .and. size(compbuf) ioDesc%scount stype => ioDesc%stype if (pio_option /= POINT_TO_POINT) then call alloc_check(a2a_sendcounts, nprocs) call alloc_check(a2a_displs, nprocs) call alloc_check(a2a_sendtypes, nprocs) call alloc_check(a2a_recvcounts, nprocs) call alloc_check(a2a_recvtypes, nprocs) do i=1,nprocs a2a_displs(i) = 0 a2a_sendcounts(i) = 0 a2a_sendtypes(i) = MPI_INTEGER a2a_recvcounts(i) = 0 a2a_recvtypes(i) = MPI_INTEGER end do do i=1,num_iotasks if (scount(i) /= 0) then ! go from 1-based io rank to 0-based comprank io_comprank = find_io_comprank(IOsystem,i) +1 ! array is 1-based a2a_recvcounts(io_comprank) = 1 a2a_recvtypes(io_comprank) = stype(i) endif end do if (IOsystem%IOproc) then do i=1,nrecvs comprank = rfrom(i) +1 ! array is 1-based a2a_sendcounts(comprank) = 1 a2a_sendtypes(comprank) = rtype(i) end do endif #ifdef _USE_ALLTOALLW if (pio_option == COLLECTIVE) then call MPI_ALLTOALLW(iobuf, a2a_sendcounts, a2a_displs, a2a_sendtypes, & compbuf, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, ierror ) call CheckMPIReturn(subName, ierror) else #endif call pio_swapm( nprocs, myrank, & iobuf, niodof, a2a_sendcounts, a2a_displs, a2a_sendtypes, & compbuf, ndof, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq ) #ifdef _USE_ALLTOALLW endif #endif call dealloc_check(a2a_sendcounts) call dealloc_check(a2a_displs) call dealloc_check(a2a_sendtypes) call dealloc_check(a2a_recvcounts) call dealloc_check(a2a_recvtypes) else call alloc_check(rreq, num_iotasks, 'recv requests') ! ! post receives on comp procs ! do i=1,num_iotasks if (scount(i) /= 0) then ! go from 1-based io rank to 0-based comprank io_comprank=find_io_comprank(IOsystem,i) call MPI_IRECV( compbuf, 1, stype(i), & ! buf, count, type io_comprank,TAG2, & ! destination,tag IOsystem%union_comm,rreq(i),ierror ) call CheckMPIReturn(subName,ierror) endif end do ! ! do sends on io procs ! if (IOsystem%IOproc) then do i=1,nrecvs call MPI_ISEND( iobuf,1, rtype(i), & ! buf, count, type rfrom(i), TAG2, & ! dest, tag IOsystem%union_comm,sreq(i),ierror ) call CheckMPIReturn(subName,ierror) end do endif ! ! finish up ! do i=1,num_iotasks if (scount(i) /= 0) then call MPI_WAIT( rreq(i), status, ierror ) call CheckMPIReturn(subName,ierror) endif end do call dealloc_check(rreq,'recv requests') if (IOsystem%IOproc) then do i=1,nrecvs call MPI_WAIT( sreq(i), status, ierror ) call CheckMPIReturn(subName,ierror) end do call dealloc_check(sreq,'send requests') endif endif ! POINT_TO_POINT end subroutine box_rearrange_io2comp_int #endif /* not _MPISERIAL */ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! io_comprank ! ! find the rank in union_comm of the ith io processor ! # 734 "box_rearrange.F90.in" integer function find_io_comprank( Iosystem, ioprocindex ) implicit none type (Iosystem_desc_t), intent(in) :: Iosystem integer ioprocindex find_io_comprank=iosystem%ioranks(ioprocindex) end function find_io_comprank !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! gindex_to_coord ! ! find global xyz coordinates given a global index ! # 750 "box_rearrange.F90.in" subroutine gindex_to_coord( gindex, gstride, ndim, gcoord ) implicit none integer,intent(in) :: gindex ! 0-based global index integer,intent(in) :: gstride(:) ! stride for each dimension ! e.g. (nx,nx*ny,nx*ny*nz) integer,intent(in) :: ndim ! number of dimesions e.g. 2 or 3 integer,intent(out) :: gcoord(:) ! output global coords (0-based) ! local vars character(len=*), parameter :: subName=modName//'::gindex_to_coord' integer i integer tempindex ! loop outermost to innermost e.g. z,y,x tempindex=gindex do i=ndim,2,-1 gcoord(i) = tempindex/gstride(i-1) ! integer division tempindex = tempindex - gcoord(i)*gstride(i-1) ! remainder end do ! base case - innermost dimension gcoord(1) = tempindex end subroutine gindex_to_coord !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! find_ioproc ! ! determine if a coordinate is in any ioproc's box '! extra apostrophy added for cpp ! if so, return a 1-based ioproc number ! and 1-based index for that ioproc's iobuf ' ! # 786 "box_rearrange.F90.in" logical function find_ioproc( gcoord, lb, ub, lstride, ndim, nioproc, & io_proc, io_index ) implicit none integer,intent(in) :: gcoord(:) integer,intent(in) :: ndim integer,intent(in) :: nioproc integer,intent(in) :: lb(ndim,nioproc) integer,intent(in) :: ub(ndim,nioproc) integer,intent(in) :: lstride(ndim,nioproc) integer,intent(out) :: io_proc integer,intent(out) :: io_index character(len=*), parameter :: subName=modName//'::find_ioproc' integer i,j logical found integer :: lcoord(ndim) integer lindex found = .false. io_proc = -1 io_index = -1 loop_ioproc: do i=1,nioproc do j=1,ndim if ( gcoord(j) < lb(j,i) .or. & gcoord(j) >= ub(j,i) ) then cycle loop_ioproc endif end do ! gcoord matches this box if (found) then call piodie(__PIO_FILE__,__LINE__, 'multiple matches') endif found = .true. io_proc = i ! 1-based here exit loop_ioproc end do loop_ioproc find_ioproc = found if (found) then ! find location within the ioproc's box ' do i=1,ndim lcoord(i) = gcoord(i)-lb(i,io_proc) end do ! find index into ioproc's buffer ' lindex = lcoord(1) do i=2,ndim lindex = lindex+lcoord(i)*lstride(i-1,io_proc) end do ! io_index=lindex+1 ! convert to 1-based io_index = lindex ! 0-based endif end function find_ioproc !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! compute_dest ! ! compute destination ioproc and index for every compdof ! ! # 857 "box_rearrange.F90.in" subroutine compute_dest(compdof, start, count, gsize, ndim, nioproc, & dest_ioproc, dest_ioindex ) implicit none integer, intent(in) :: compdof(:) integer, intent(in) :: start(:,:) ! start(ndim,nioproc) integer, intent(in) :: count(:,:) ! count(ndim,nioproc) integer, intent(in) :: gsize(:) ! global domain size gsize(ndim) integer, intent(in) :: ndim integer, intent(in) :: nioproc integer, intent(out) :: dest_ioproc(:) ! ioproc number to send to integer, intent(out) :: dest_ioindex(:) ! index in iobuf on that ioproc ! local vars character(len=*), parameter :: subName=modName//'::compute_dest' integer i,j integer ndof integer gindex integer lb(ndim,nioproc) ! 0-based lower bound of boxes integer ub(ndim,nioproc) ! 0-based upper bound of boxes integer gcoord(ndim) ! 0-based xyz coordinates integer gstride(ndim) ! stride for each dimension integer lstride(ndim,nioproc) ! stride for each dim on each ioprocs integer ioproc, ioindex ! compute 0-based start array do i=1,nioproc do j=1,ndim ! rml fix 3->ndim lb(j,i) = start(j,i)-1 ub(j,i) = lb(j,i)+count(j,i) end do end do ! compute stride for each dimension of array ! e.g. (NX,NX*NY,NX*NY*NZ) gstride(1) = gsize(1) ! innermost dimension do i=2,ndim gstride(i) = gsize(i)*gstride(i-1) end do do i=1,nioproc ! loop over all io boxes lstride(1,i) = count(1,i) ! innermost dimension do j=2,ndim lstride(j,i) = count(j,i)*lstride(j-1,i) end do end do ndof=size(compdof) if(Debug) print *,__PIO_FILE__,__LINE__,minval(compdof), maxval(compdof) do i=1,ndof ! Compute global coordinates for compdof(i) if (compdof(i)==0) then ! sender hole dest_ioproc(i) = -1 dest_ioindex(i) = -1 else gindex = compdof(i)-1 ! 0-based index call gindex_to_coord(gindex, gstride, ndim, gcoord) ! if(Debug) print *, subName,':: dof ',i,' index=',gindex,' gcoord=',gcoord ! determine if gcoord lies in any io proc's start/count box ' if (.not. find_ioproc(gcoord, lb, ub, lstride, ndim, nioproc, & ioproc, ioindex)) then print *, subName,':: ERROR: no destination found for compdof=', compdof(i) print *, subName,':: INFO: gsize=', gsize print *, subName,':: INFO: nioproc',nioproc,' ioproc ',ioproc,' ioindex ',ioindex do j=1,nioproc print *, subName, ':: INFO io ', j, ' start=', start(:,j), ' count=', count(:,j) end do do j=1,nioproc print *, subName, ':: INFO io ', j, ' lb=', lb(:,j), ' ub=', ub(:,j) end do print *, subName, ':: INFO dof ', i, ' index=', gindex, ' gcoord=', gcoord call piodie( __PIO_FILE__,__LINE__, 'quitting' ) endif dest_ioproc(i) = ioproc dest_ioindex(i) = ioindex endif end do ! i=1,ndof end subroutine compute_dest #ifdef _USE_CREATE_FC !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! box_rearrange_create ! ! create a rearranger ! ! this will allocate the following storage in ioDesc: ! dest_ioproc(ndof) ! dest_ioindex(ndof) ! ! this space should be freed in box_rearrange_free ! # 968 "box_rearrange.F90.in" subroutine box_rearrange_create(Iosystem, compdof, gsize, ndim, & nioproc, ioDesc) implicit none type (Iosystem_desc_t), intent(in) :: Iosystem integer, intent(in) :: compdof(:) ! global indices for compbuf integer, intent(in) :: gsize(:) ! global domain size gsize(ndim) integer, intent(in) :: ndim, nioproc type (IO_desc_t), intent(inout) :: ioDesc ! local vars character(len=*), parameter :: subName=modName//'::box_rearrange_create' integer :: start(ndim,nioproc) integer :: count(ndim,nioproc) integer :: ndof integer :: ierror integer :: i integer :: niodof !!!!!! ndof = size(compdof) call alloc_check( ioDesc%dest_ioproc, ndof, & 'box_rearrange_create dest_ioproc' ) call alloc_check( ioDesc%dest_ioindex, ndof, & 'box_rearrange_create dest_ioindex') !!!!!! ! Gather iodesc%start,iodesc%count from IO procs to root IO proc ! then broadcast to all procs if(ndim.ne.size(iodesc%start)) then print *,__PIO_FILE__,__LINE__,ndim, size(iodesc%start) call piodie(__PIO_FILE__,__LINE__,'bad ndim size',ndim) end if start = 0 count = 0 if (Iosystem%IOproc) then call pio_fc_gather_int(int(iodesc%start), ndim, MPI_INTEGER, & ! sendbuf, count, type start, ndim, MPI_INTEGER, & ! recvbuf, count, type 0, Iosystem%IO_comm ) call pio_fc_gather_int(int(iodesc%count), ndim, MPI_INTEGER, & ! sendbuf, count, type count, ndim, MPI_INTEGER, & ! recvbuf, count, type 0, Iosystem%IO_comm ) if(Debug) then print *, __PIO_FILE__,__LINE__,iodesc%start, iodesc%count if(iosystem%io_rank==0) & print *,__PIO_FILE__,__LINE__,ndim,(i,' :', & start(:,i), count(:,i),i=1,iosystem%num_iotasks) end if ! note that index in start,count is the io_rank not comp_rank endif call MPI_BCAST(start, ndim*Iosystem%num_iotasks, MPI_INTEGER, & ! buf, cnt Iosystem%ioranks(1), Iosystem%union_comm, ierror ) call CheckMPIReturn(subName, ierror) call MPI_BCAST(count, ndim*Iosystem%num_iotasks, MPI_INTEGER, & ! buf, cnt Iosystem%ioranks(1), Iosystem%union_comm, ierror ) call CheckMPIReturn(subName, ierror) #if DEBUG if (debug .and. Iosystem%comp_rank==0) then do i=1,Iosystem%num_iotasks print *, subName,':: comp_rank=', Iosystem%comp_rank, ': io ', & i, ' start=',start(:,i), ' count=', count(:,i) end do endif #endif !!!!!!! ! compute io dest and indices call compute_dest(compdof, start, count, gsize, ndim, & Iosystem%num_iotasks, ioDesc%dest_ioproc, ioDesc%dest_ioindex ) #ifdef _MPISERIAL ! Version for use with mpi-serial. ! NOTE: cached values in iodesc other than dest_ioproc() and dest_ioindex() ! will NOT be allocated in this build if (Iosystem%num_tasks /= 1 .or. Iosystem%num_iotasks /= 1) then call piodie( __PIO_FILE__,__LINE__, & 'pio was built with -D_MPISERIAL but tasks=', & Iosystem%num_tasks, & 'iotasks=', Iosystem%num_iotasks) endif #else ! else not _MPISERIAL niodof = ioDesc%count(1) do i=2,ndim niodof = niodof*ioDesc%count(i) end do call compute_counts(Iosystem, ioDesc, ndof, niodof) ! not _MPISERIAL #endif end subroutine box_rearrange_create !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! routines broken out of box_rearrange.inc ! #ifndef _MPISERIAL # 1081 "box_rearrange.F90.in" subroutine compute_counts(Iosystem, ioDesc, ndof, niodof) type (Iosystem_desc_t), intent(in) :: Iosystem type (IO_desc_t),intent(inout) :: ioDesc integer, intent(in) :: ndof integer, intent(in) :: niodof ! local vars character(len=*), parameter :: subName=modName//'::compute_counts' integer :: myrank ! local task id integer :: num_tasks ! size of comp communicator integer :: num_iotasks ! size of I/O communicator integer :: i ! loop index integer :: iorank ! i/o task id in i/o communicator + 1 integer :: io_comprank ! i/o task id in comp communicator integer :: nrecvs ! if i/o task, number of comp tasks sending ! to/receiving from this task (cached) integer :: ioindex ! offset for data to be sent to i/o task integer :: pos ! array offset integer :: ierror ! MPI error return integer,pointer :: scount(:) ! scount(num_iotasks) is no. sends to each i/o task (cached) integer,pointer :: sindex(:) ! sindex(ndof) is blocks of src indices integer,pointer :: s2rindex(:)! s2rindex(ndof) is local blocks of dest indices integer,pointer :: spos(:) ! spos(num_iotasks) is start in sindex for each i/o task integer,pointer :: tempcount(:) ! used in calculating sindex and s2rindex integer,pointer :: stype(:) ! MPI type used in i/o sends (cached) ! needed on ioprocs only integer,pointer :: rcount(:) ! rcount(nrecvs) is no. recvs from each sender integer,pointer :: rfrom(:) ! rfrom(nrecvs) is id of each sender (cached) integer,pointer :: rindex(:) ! rindex(niodof) is blocks of dest indices integer,pointer :: rtype(:) ! MPI type used in comp receives (cached) ! swapm alltoall communication variables integer,pointer :: sr_types(:) integer,pointer :: send_counts(:) integer,pointer :: send_displs(:) integer :: rbuf_size integer,pointer :: recv_buf(:) integer,pointer :: recv_counts(:) integer,pointer :: recv_displs(:) ! swapm flow control parameters logical :: pio_hs logical :: pio_isend integer :: pio_maxreq !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Communication initialization pio_hs = DEF_P2P_HANDSHAKE pio_isend = DEF_P2P_ISEND pio_maxreq = DEF_P2P_MAXREQ ! First communication ! comp procs tell io procs how many items they will send ! init myrank = Iosystem%union_rank num_tasks = IOsystem%num_tasks num_iotasks = Iosystem%num_iotasks !need to cache call alloc_check(ioDesc%scount, num_iotasks, 'scount buffer') scount=>ioDesc%scount ! determine number of items going to each io proc scount=0 do i=1,ndof iorank=ioDesc%dest_ioproc(i) if (iorank /= -1) then ! not a sender hole if (iorank<1 .or. iorank>num_iotasks) & call piodie(__PIO_FILE__,__LINE__,'io destination out of range',iorank) scount(iorank) = scount(iorank) + 1 endif end do #if DEBUG print *,myrank,': scount()=',scount #endif ! allocate and initialize swapm specification arguments call alloc_check(sr_types, num_tasks, 'sr_types temp') sr_types = MPI_INTEGER ! send data structures for all processes ! send_buf (num_iotasks) is scount ! sbuf_size = num_iotasks ! send_counts(num_tasks) = 0 for non-io, 1 for i/o ! send_displs(num_tasks) = 0 for non-io, (i-1) for i/o call alloc_check(send_counts, num_tasks, 'send_counts temp') send_counts = 0 call alloc_check(send_displs, num_tasks, 'send_displs temp') send_displs = 0 do i=1,num_iotasks ! go from 1-based io rank to 0-based rank in union_comm io_comprank = find_io_comprank(IOsystem,i) + 1 ! arrays are 1-based send_counts(io_comprank) = 1 send_displs(io_comprank) = i-1 end do ! receive data structures if (Iosystem%IOproc) then ! for i/o processes: ! recv_buf (num_tasks) == scount from each process ! rbuf_size = num_tasks ! recv_counts(num_tasks) == 1 ! recv_displs(num_tasks) == (i-1) rbuf_size = num_tasks call alloc_check(recv_buf, rbuf_size, 'recv_buf temp') recv_buf = 0 call alloc_check(recv_counts, num_tasks, 'recv_counts temp') recv_counts = 1 call alloc_check(recv_displs, num_tasks, 'recv_displs temp') do i=1,num_tasks recv_displs(i) = i-1 end do else ! for non-i/o processes ! recv_buf(1) is ignored ! rbuf_size = 1 ! recv_counts(num_tasks) == 0 ! recv_displs(num_tasks) == 0 rbuf_size = 1 call alloc_check(recv_buf, rbuf_size, 'recv_buf temp') recv_buf = 0 call alloc_check(recv_counts, num_tasks, 'recv_counts temp') recv_counts = 0 call alloc_check(recv_displs, num_tasks, 'recv_displs temp') recv_displs = 0 endif call pio_swapm( num_tasks, myrank, & scount, num_iotasks, send_counts, send_displs, sr_types, & recv_buf, rbuf_size, recv_counts, recv_displs, sr_types, & IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq ) ! determine nrecvs, rcount, and rfrom nrecvs = 0 if (Iosystem%IOproc) then do i=1,num_tasks if (recv_buf(i) /= 0) then nrecvs = nrecvs + 1 endif enddo call alloc_check(rcount, nrecvs, 'rcount buffer') rcount = 0 !need to cache call alloc_check(ioDesc%rfrom, nrecvs, 'rfrom') rfrom=>ioDesc%rfrom nrecvs = 0 do i=1,num_tasks if (recv_buf(i) /= 0) then nrecvs = nrecvs + 1 rcount(nrecvs) = recv_buf(i) rfrom(nrecvs) = i-1 endif enddo endif ioDesc%nrecvs = nrecvs call dealloc_check(recv_buf, 'recv_buf temp') !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Second communication ! send indices to io procs ! sindex() contains blocks of indices defining ! data going to/coming from the i/o processes call alloc_check(sindex, ndof, 'sindex temp') sindex = 0 ! s2rindex() contains the destination indices ! corresponding to sindex call alloc_check(s2rindex, ndof, 'sindex temp') s2rindex = 0 ! spos(i) is the position in sindex() where the ! block of indices going to the ith ioproc starts call alloc_check(spos, num_iotasks, 'spos temp') spos(1)=1 do i=2,num_iotasks spos(i)=spos(i-1)+scount(i-1) if (scount(i)/=0 .and. spos(i) > ndof) & call piodie(__PIO_FILE__,__LINE__,'spos=',spos(i),'> ndof=',ndof) end do call alloc_check(tempcount, num_iotasks, 'tempcount') tempcount=0 do i=1,ndof iorank = ioDesc%dest_ioproc(i) ioindex = ioDesc%dest_ioindex(i) if (iorank /= -1) then ! skip sender hole sindex(spos(iorank)+tempcount(iorank)) = i-1 s2rindex(spos(iorank)+tempcount(iorank)) = ioindex tempcount(iorank) = tempcount(iorank) + 1 if (tempcount(iorank) > scount(iorank)) & call piodie(__PIO_FILE__,__LINE__,'tempcount>scount') endif end do call dealloc_check(tempcount, 'tempcount') ! send data mapping for all processes ! send_buf (ndof) is s2rindex ! sbuf_size = ndof ! send_counts(num_tasks) = 0 for non-i/o, scount for i/o ! send_displs(num_tasks) = 0 for non-i/o, spos-1 for i/o send_counts = 0 send_displs = 0 do i=1,num_iotasks ! go from 1-based io rank to 0-based rank in union_comm io_comprank = find_io_comprank(IOsystem,i) + 1 ! arrays are 1-based send_counts(io_comprank) = scount(i) send_displs(io_comprank) = spos(i)-1 end do call dealloc_check(spos, 'spos temp') ! receive data structures if (Iosystem%IOproc) then ! for i/o processes: ! recv_buf (niodof) is rindex ! rbuf_size = niodof ! recv_counts(num_tasks) is 0 for non-'rfrom', is rcount for 'rfrom' ! recv_displs(num_tasks) is 0 for non-'rfrom', is sum_i recv_counts for 'rfrom' rbuf_size = niodof call alloc_check(rindex, niodof, 'rindex buffer') rindex = 0 recv_counts = 0 do i=1,nrecvs recv_counts(rfrom(i)+1) = rcount(i) enddo recv_displs = 0 do i=2,nrecvs recv_displs(rfrom(i)+1) = recv_displs(rfrom(i-1)+1) + rcount(i-1) enddo else ! for non-i/o processes ! recv_buf(1) is rindex, which is ignored ! rbuf_size = 1 ! recv_counts(num_tasks) == 0 ! recv_displs(num_tasks) == 0 rbuf_size = 1 call alloc_check(rindex, rbuf_size) rindex = 0 recv_counts = 0 recv_displs = 0 endif call pio_swapm( num_tasks, myrank, & s2rindex, ndof, send_counts, send_displs, sr_types, & rindex, rbuf_size, recv_counts, recv_displs, sr_types, & IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq ) call dealloc_check(s2rindex, 's2rindex temp') call dealloc_check(sr_types, 'sr_types temp') call dealloc_check(send_counts, 'send_counts temp') call dealloc_check(send_displs, 'send_displs temp') call dealloc_check(recv_counts, 'recv_counts temp') call dealloc_check(recv_displs, 'recv_displs temp') !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! Create the mpi types for io proc receives ! if (Iosystem%IOproc) then !need to cache call alloc_check(ioDesc%rtype, nrecvs, 'mpi recv types') rtype=>ioDesc%rtype pos = 1 do i=1,nrecvs #if DEBUG #if DEBUG_INDICES print *, subName,':: myrank=',myrank,': recv indices from ',rfrom(i), & ' count=',rcount(i),' value=',rindex(pos:pos+rcount(i)-1) #else print *, subName,':: myrank=',myrank,': recv indices from ',rfrom(i), & ' count=',rcount(i) #endif #endif ! need rindex to contain 0-based displacements here call MPI_TYPE_CREATE_INDEXED_BLOCK( & rcount(i), 1, rindex(pos:pos+rcount(i)-1), & ! count,blen, disp ioDesc%baseTYPE, rtype(i), ierror ) ! oldtype, newtype call CheckMPIReturn(subName,ierror) call MPI_TYPE_COMMIT(rtype(i), ierror) call CheckMPIReturn(subName,ierror) pos = pos + rcount(i) end do endif ! ! Create the mpi types for the comp proc sends !need to cache call alloc_check(ioDesc%stype, num_iotasks, 'mpi send types') stype=>ioDesc%stype pos = 1 do i=1,num_iotasks if (scount(i) /= 0) then call MPI_TYPE_CREATE_INDEXED_BLOCK( & scount(i), 1, sindex(pos:pos+scount(i)-1), & ! count, blen, disp ioDesc%baseTYPE, stype(i), ierror ) ! oldtype, newtype call CheckMPIReturn(subName,ierror) call MPI_TYPE_COMMIT(stype(i), ierror) call CheckMPIReturn(subName,ierror) pos = pos + scount(i) endif end do ! ! clean up ! if (Iosystem%IOproc) then call dealloc_check(rcount, 'rcount temp') call dealloc_check(rindex, 'rindex temp') endif call dealloc_check(sindex, 'sindex temp') end subroutine compute_counts ! ifndef _MPISERIAL #endif #else !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! box_rearrange_create ! ! create a rearranger ! ! this will allocate the following storage in ioDesc: ! dest_ioproc(ndof) ! dest_ioindex(ndof) ! ! this space should be freed in box_rearrange_free ! # 1470 "box_rearrange.F90.in" subroutine box_rearrange_create( Iosystem,compdof,gsize,ndim,nioproc,ioDesc) implicit none type (Iosystem_desc_t), intent(in) :: Iosystem integer, intent(in) :: compdof(:) ! global indices for compbuf integer, intent(in) :: gsize(:) ! global domain size gsize(ndim) integer, intent(in) :: ndim, nioproc type (IO_desc_t), intent(inout) :: ioDesc ! local vars character(len=*), parameter :: subName=modName//'::box_rearrange_create' integer :: start(ndim,nioproc) integer :: count(ndim,nioproc) integer :: ndof integer :: ierror integer :: i integer :: nsends(nioproc) ! nsends(nioproc) integer :: root integer :: dest integer :: nsends_total, nsends_global integer :: nrecvs_global integer :: nrecvs integer :: niodof integer :: num_iotasks !!!!!! ndof=size(compdof) call alloc_check( ioDesc%dest_ioproc,ndof, & 'box_rearrange_create dest_ioproc') call alloc_check( ioDesc%dest_ioindex,ndof, & 'box_rearrange_create dest_ioindex') !!!!!! ! Gather iodesc%start,iodesc%count from IO procs to root IO proc ! then broadcast to all procs if(ndim.ne.size(iodesc%start)) then print *,__PIO_FILE__,__LINE__,ndim, size(iodesc%start) call piodie(__PIO_FILE__,__LINE__,'bad ndim size',ndim) end if start=0 count=0 if (Iosystem%IOproc) then call MPI_GATHER( int(iodesc%start),ndim,MPI_INTEGER, & ! sendbuf, count, type start,ndim,MPI_INTEGER, & ! recvbuf, count, type 0,Iosystem%IO_comm,ierror ) call CheckMPIReturn(subName,ierror) call MPI_GATHER( int(iodesc%count),ndim,MPI_INTEGER, & ! sendbuf, count, type count,ndim,MPI_INTEGER, & ! recvbuf, count, type 0,Iosystem%IO_comm,ierror ) call CheckMPIReturn(subName,ierror) if(Debug) then print *, __PIO_FILE__,__LINE__,iodesc%start, iodesc%count if(iosystem%io_rank==0) print *,__PIO_FILE__,__LINE__,ndim,(i,' :',start(:,i), count(:,i),i=1,iosystem%num_iotasks) end if ! note that index in start,count is the io_rank not comp_rank endif call MPI_BCAST( start,ndim*Iosystem%num_iotasks,MPI_INTEGER, & ! buf, cnt Iosystem%ioroot, Iosystem%union_comm,ierror ) call CheckMPIReturn(subName,ierror) call MPI_BCAST( count,ndim*Iosystem%num_iotasks,MPI_INTEGER, & ! buf, cnt Iosystem%ioroot, Iosystem%union_comm,ierror ) call CheckMPIReturn(subName,ierror) #if DEBUG if (Iosystem%comp_rank==0) then do i=1,Iosystem%num_iotasks print *, subName,':: comp_rank=',Iosystem%comp_rank,': io ',i,' start=',start(:,i), & ' count=',count(:,i) end do endif #endif !!!!!!! ! compute io dest and indices call compute_dest( compdof,start,count,gsize,ndim,Iosystem%num_iotasks, & ioDesc%dest_ioproc,ioDesc%dest_ioindex) #ifdef _MPISERIAL ! Version for use with mpi-serial. ! NOTE: cached values in iodesc other than dest_ioproc() and dest_ioindex() ! will NOT be allocated in this build if (Iosystem%num_tasks /= 1 .or. Iosystem%num_iotasks /= 1) then call piodie( __PIO_FILE__,__LINE__, & 'pio was built with -D_MPISERIAL but tasks=', & Iosystem%num_tasks, & 'iotasks=', Iosystem%num_iotasks) endif #else ! else not _MPISERIAL !!!!!!! ! loop over iodest and count sends to each ioproc nsends=0 nsends_total=0 do i=1,ndof ! change: only want to know how many distinct senders dest=ioDesc%dest_ioproc(i) if (dest/= -1) then ! ignore sender 'holes' if (nsends(dest)==0) then nsends(dest)=1 nsends_total=nsends_total+1 endif endif end do #if DEBUG print *,Iosystem%comp_rank,': nsends()=',nsends #endif ioDesc%nrecvs=0 ! this will only be overridden on io procs do i=1,Iosystem%num_iotasks root = find_io_comprank(ioSystem,i) call MPI_REDUCE( nsends(i),ioDesc%nrecvs,1,MPI_INTEGER, & MPI_SUM,root,Iosystem%union_comm,ierror ) call CheckMPIReturn(subName,ierror) end do #if DEBUG if (Iosystem%IOproc) & print *,Iosystem%comp_rank,': nrecvs=',ioDesc%nrecvs #endif ! Consistency check - collect totals to io root for a check call MPI_REDUCE( nsends_total,nsends_global,1,MPI_INTEGER, & MPI_SUM,Iosystem%ioranks(1),Iosystem%union_comm,ierror ) call CheckMPIReturn(subName,ierror) if (Iosystem%IOproc) & call MPI_REDUCE( ioDesc%nrecvs,nrecvs_global,1,MPI_INTEGER, & MPI_SUM,0,Iosystem%IO_comm,ierror ) call CheckMPIReturn(subName,ierror) if (Iosystem%union_rank == Iosystem%ioranks(1)) then if (Iosystem%io_rank/=0) then call piodie( __PIO_FILE__,__LINE__,'Iosystem%iomaster rank ',Iosystem%iomaster, & 'has nonzero io_rank ',Iosystem%io_rank) endif if (nsends_global /= nrecvs_global ) & call piodie( __PIO_FILE__,__LINE__,'nsends_global=',nsends_global, & '!= nrecvs_global=',nrecvs_global ) endif ! cache info for rearrange nrecvs=ioDesc%nrecvs num_iotasks=Iosystem%num_iotasks niodof=ioDesc%count(1) do i=2,ndim niodof=niodof*ioDesc%count(i) end do call compute_counts(Iosystem,ioDesc,ndof,niodof) ! not _MPISERIAL #endif end subroutine box_rearrange_create !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! routines broken out of box_rearrange.inc ! #ifndef _MPISERIAL # 1658 "box_rearrange.F90.in" subroutine compute_counts( Iosystem,ioDesc,ndof,niodof ) type (Iosystem_desc_t), intent(in) :: Iosystem type (IO_desc_t),intent(in) :: ioDesc integer :: ndof integer :: niodof ! needed on ioprocs only integer,pointer :: rcount(:) ! rcount(nrecvs)= # recvs from ith sender integer,pointer :: rindex(:) ! rindex(niodof)= blocks of dest indices integer,pointer :: rfrom(:) ! rfrom(nrecvs)= rank of ith sender integer,pointer :: rreq(:) ! rreq(nrecvs)= receive requests on ioprocs integer,pointer :: rtype(:) ! needed on all procs integer,pointer :: scount(:) ! scount(num_iotasks)= # sends to ith ioproc integer,pointer :: srcindex(:)! srcindex(ndof)= blocks of src indices integer,pointer :: stype(:) #ifdef NO_MPI2 integer, pointer :: blens(:) #endif ! local vars character(len=*), parameter :: subName=modName//'::compute_counts' integer :: nrecvs integer :: num_iotasks integer :: myrank integer :: ierror integer :: sreq integer :: max_scount integer :: iorank integer :: ioindex integer :: io_comprank integer :: i, j integer :: status(MPI_STATUS_SIZE) integer :: pos ! position in array integer :: count integer :: sendtype, recvtype integer,pointer :: spos(:) ! spos(i) = start in sindex for ith ioproc integer,pointer :: tempcount(:) integer :: basetype_mpi ! init num_iotasks=Iosystem%num_iotasks nrecvs=ioDesc%nrecvs myrank=Iosystem%comp_rank ! alloc if (Iosystem%IOproc) then call alloc_check(rcount,nrecvs,'rcount buffer') call alloc_check(rindex,niodof,'rindex buffer') call alloc_check(rreq,nrecvs,'rreq') !need to cache call alloc_check(ioDesc%rfrom,nrecvs,'rfrom') rfrom=>ioDesc%rfrom call alloc_check(ioDesc%rtype,nrecvs,'mpi recv types') rtype=>ioDesc%rtype endif !need to cache call alloc_check(ioDesc%scount,num_iotasks,'scount buffer') scount=>ioDesc%scount call alloc_check(ioDesc%stype,num_iotasks,'mpi send types') stype=>ioDesc%stype !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! First communication ! comp procs tell io procs how many items they will send #if DEBUG_BARRIER call MPI_BARRIER(Iosystem%union_comm,ierror) call CheckMPIReturn(subName,ierror) if (myrank==0) print *,'BARRIER - start of phase 1' #endif !!!!! ! set up receives on io procs if (Iosystem%IOproc) then ! data will be the number of indices to receive later ! order in rcount is random depending on incoming messages do i=1,nrecvs call MPI_IRECV( rcount(i),1,MPI_INTEGER, & ! buf,count,type MPI_ANY_SOURCE,TAG0, & ! source, tag Iosystem%union_comm,rreq(i),ierror) call CheckMPIReturn(subName,ierror) end do endif !!!!! ! send number of items going to each io proc scount=0 max_scount=0 do i=1,ndof iorank=ioDesc%dest_ioproc(i) if (iorank /= -1) then ! not a sender hole if (iorank<1 .or. iorank>num_iotasks) & call piodie(__PIO_FILE__,__LINE__,'io destination out of range',iorank) scount(iorank) = scount(iorank) + 1 if (max_scount ndof) & call piodie(__PIO_FILE__,__LINE__,'spos=',spos(i),'> ndof=',ndof) end do ! srcindex() contains blocks of indices in the src array ! the ith block in src/dstindex goes to the ith ioproc tempcount=0 do i=1,ndof iorank = ioDesc%dest_ioproc(i) ioindex = ioDesc%dest_ioindex(i) if (iorank /= -1) then ! skip sender hole srcindex(spos(iorank)+tempcount(iorank)) = i-1 ! 0-based tempcount(iorank) = tempcount(iorank) + 1 if (tempcount(iorank) > scount(iorank)) & call piodie(__PIO_FILE__,__LINE__,'tempcount>scount') endif end do call dealloc_check(tempcount,'tempcount') ! construct a type to send a sparse subset of dest_ioindex() pos=1 do i=1,num_iotasks if (scount(i) /= 0) then call MPI_TYPE_CREATE_INDEXED_BLOCK( & scount(i), 1, srcindex(pos:pos+scount(i)-1), & ! count, blen, disp MPI_INTEGER, sendtype, ierror ) ! oldtype, newtype call CheckMPIReturn(subName,ierror) call MPI_TYPE_COMMIT(sendtype, ierror) call CheckMPIReturn(subName,ierror) ! go from 1-based io rank to 0-based comprank io_comprank=find_io_comprank(Iosystem,i) #if DEBUG print *,myrank,': send posted dest=',io_comprank,' count=',scount(i) #endif call MPI_ISEND( ioDesc%dest_ioindex, 1, sendtype, & ! buf, count, type io_comprank,TAG1, & ! destination,tag Iosystem%union_comm,sreq,ierror ) call CheckMPIReturn(subName,ierror) call MPI_TYPE_FREE(sendtype, ierror) call CheckMPIReturn(subName,ierror) call MPI_REQUEST_FREE(sreq,ierror) call CheckMPIReturn(subName,ierror) pos=pos+scount(i) endif end do call dealloc_check(spos,'spos') ! _USE_SPACE #else ! This alternative trades uses much less temp space ! but makes one pass through the compdof for each ioproc ! so that it can pull out the indices on-the-fly call alloc_check(srcindex,max_scount,'sindex(max_scount)') do i=1,num_iotasks ! send indices to ioproc i if (scount(i) /= 0) then count=0 loop1: do j=1,ndof ! search for indices to ioproc i iorank=ioDesc%dest_ioproc(j) ! sender hole is -1 if (iorank==i) then count=count+1 srcindex(count)=j-1 ! 0-based if (count==scount(i)) exit loop1 endif end do loop1 if (count/=scount(i)) call piodie(__PIO_FILE__,__LINE__,'count/=scount(i)') if (count>max_scount) call piodie(__PIO_FILE__,__LINE__,'count>max_scount') call MPI_TYPE_CREATE_INDEXED_BLOCK( & scount(i), 1, srcindex, & ! count, blen, disp MPI_INTEGER, sendtype, ierror ) ! oldtype, newtype call CheckMPIReturn(subName,ierror) call MPI_TYPE_COMMIT(sendtype, ierror) call CheckMPIReturn(subName,ierror) ! go from 1-based io rank to 0-based comprank io_comprank=find_io_comprank(Iosystem,i) #if DEBUG print *,myrank,': send posted dest=',io_comprank,' count=',scount(i) #endif call MPI_ISEND( ioDesc%dest_ioindex, 1, sendtype, & ! buf, count, type io_comprank,TAG1, & ! destination,tag Iosystem%union_comm,sreq,ierror ) call CheckMPIReturn(subName,ierror) call MPI_TYPE_FREE(sendtype, ierror) call CheckMPIReturn(subName,ierror) call MPI_REQUEST_FREE(sreq,ierror) call CheckMPIReturn(subName,ierror) endif end do #endif #if DEBUG_BARRIER call MPI_BARRIER(Iosystem%union_comm,ierror) call CheckMPIReturn(subName,ierror) if (myrank==0) print *,'BARRIER - end of phase 2' #endif ! ! Moved the iproc wait rreq() to here from top of third communication ! ! ioprocs complete receive of indices if (Iosystem%IOproc) then do i=1,nrecvs call MPI_WAIT( rreq(i), status, ierror ) call CheckMPIReturn(subName,ierror) end do endif !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! Create the mpi types for io proc receives ! if (Iosystem%IOproc) then pos=1 do i=1,nrecvs #if DEBUG #if DEBUG_INDICES print *, subName,':: myrank=',myrank,': recv indices from ',status(MPI_SOURCE), & ' count=',rcount(i),' value=',rindex(pos:pos+rcount(i)-1) #else print *, subName,':: myrank=',myrank,': recv indices from ',status(MPI_SOURCE), & ' count=',rcount(i) #endif #endif ! need rindex to contain 0-based displacements here call MPI_TYPE_CREATE_INDEXED_BLOCK( & rcount(i), 1, rindex(pos:pos+rcount(i)-1), & ! count,blen, disp ioDesc%baseTYPE, rtype(i), ierror ) ! oldtype, newtype call CheckMPIReturn(subName,ierror) call MPI_TYPE_COMMIT(rtype(i), ierror) call CheckMPIReturn(subName,ierror) pos=pos+rcount(i) end do endif ! ! Create the mpi types for the comp proc sends #ifdef _USE_SPACE pos=1 do i=1,num_iotasks if (scount(i) /= 0) then call MPI_TYPE_CREATE_INDEXED_BLOCK( & scount(i), 1, srcindex(pos:pos+scount(i)-1), & ! count, blen, disp ioDesc%baseTYPE, stype(i), ierror ) ! oldtype, newtype call CheckMPIReturn(subName,ierror) call MPI_TYPE_COMMIT(stype(i), ierror) call CheckMPIReturn(subName,ierror) pos=pos+scount(i) endif end do ! if _USE_SPACE #else ! This alternative uses more time but less space do i=1,num_iotasks ! send indices to ioproc i if (scount(i) /= 0) then count=0 loop2: do j=1,ndof ! search for indices to ioproc i iorank=ioDesc%dest_ioproc(j) ! sender hole is -1 if (iorank==i) then count=count+1 srcindex(count)=j-1 ! 0-based if (count==scount(i)) exit loop2 endif end do loop2 if (count/=scount(i)) call piodie(__PIO_FILE__,__LINE__,'count/=scount(i)') if (count>max_scount) call piodie(__PIO_FILE__,__LINE__,'count>max_scount') call MPI_TYPE_CREATE_INDEXED_BLOCK( & scount(i), 1, srcindex, & ! count, blen, disp ioDesc%baseTYPE, stype(i), ierror ) ! oldtype, newtype call CheckMPIReturn(subName,ierror) call MPI_TYPE_COMMIT(stype(i), ierror) call CheckMPIReturn(subName,ierror) endif end do ! if _USE_SPACE #endif ! ! clean up ! if (Iosystem%IOproc) then call dealloc_check(rcount,'rcount temp') call dealloc_check(rindex,'rindex temp') call dealloc_check(rreq,'receive requests') endif call dealloc_check(srcindex,'srcindex temp') end subroutine compute_counts ! ifndef _MPISERIAL #endif ! ifndef _USE_CREATE_FC #endif !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! box_rearrange_free ! ! free the storage in the ioDesc that was allocated for ! the rearrangement ! # 2145 "box_rearrange.F90.in" subroutine box_rearrange_free(Iosystem,ioDesc) implicit none type (Iosystem_desc_t), intent(in) ::Iosystem type (IO_desc_t),intent(inout) :: ioDesc ! local vars character(len=*), parameter :: subName=modName//'::box_rearrange_free' integer :: i integer :: ierror if(associated(iodesc%dest_ioproc)) then call dealloc_check(ioDesc%dest_ioproc,'ioDesc%dest_ioproc') nullify(iodesc%dest_ioproc) end if if(associated(iodesc%dest_ioindex)) then call dealloc_check(ioDesc%dest_ioindex,'ioDesc%dest_ioindex') nullify(iodesc%dest_ioindex) end if #ifdef _MPISERIAL ! Other vars not allocated in _MPISERIAL build #else !else not _MPISERIAL if (Iosystem%IOproc) then if(associated(iodesc%rfrom)) then call dealloc_check(ioDesc%rfrom) nullify(iodesc%rfrom) end if do i=1,ioDesc%nrecvs call MPI_TYPE_FREE(ioDesc%rtype(i), ierror) call CheckMPIReturn(subName,ierror) end do if(associated(iodesc%rtype)) then call dealloc_check(ioDesc%rtype,'iodesc%rtype') nullify(iodesc%rtype) end if endif do i=1,Iosystem%num_iotasks if (ioDesc%scount(i) /= 0) then call MPI_TYPE_FREE(ioDesc%stype(i), ierror) call CheckMPIReturn(subName,ierror) endif end do if(associated(iodesc%scount)) then call dealloc_check(ioDesc%scount) nullify(iodesc%scount) end if if(associated(iodesc%stype)) then call dealloc_check(ioDesc%stype,'iodesc%stype') nullify(iodesc%stype) end if ! not _MPISERIAL #endif end subroutine box_rearrange_free ! _USEBOX #endif end module box_rearrange