module general_mpi_module ! to tell the compiler that unused dummy parameters are ok, ! use for example in subroutine sub(x,i) (x is array): ! FAKE(kind(x)+i) ! use kind() for everything that can't be added toa complex*16 #define FAKE(x) if(.false.) general_mpi_dummy=x ! to tell the compiler that intent(out) aguments are set use ! for example FAKESET(x,0) #define FAKESET(x,y) if(.false.) x=y #ifdef USEMPI use mpi #endif implicit none private save public matrix_coll,matrix_distr, shift_borders, det_submatrices, vector_distr, decomp public general_mpi_dummy complex*16 :: general_mpi_dummy !#define COLLECT_TIMER !#define DISTRIBUTE_TIMER ! ! NOTE: the matrices are considered to be bordered by a columns ! left and right and rows up and down. So, n general, ! only b(2:mb-1,2:nb-1) is send to the master process. ! HOWEVER: if matrix b is ! on the lowy side: b(:,1) is also sent ! on the highy side: b(:,nb) is also sent ! on the lowx side: b(1,:) is also sent ! on the highx side: b(mb,:) is also sent ! The master process receives a(1:ma,1:na). ! dimension of b (1:mb,1:nb). ! dimension of a (1:ma,1:na) ! ! interface matrix_distr module procedure matrix_distr_real8 module procedure matrix_distr_integer end interface matrix_distr interface matrix_coll module procedure matrix_coll_real8 module procedure matrix_coll_integer end interface matrix_coll interface shift_borders module procedure shift_borders_matrix_real8 end interface shift_borders interface vector_distr module procedure vector_distr_real8 module procedure block_vector_distr_real8 end interface vector_distr contains !_____________________________________________________________________________ subroutine vector_distr_real8(a,b,is,lm,root,comm) ! ! Distribute vector a on processes in communicator comm ! Process P receives a(is(P+1):is(P+1)+lm(P+1)-1) (P: 0..size_of_comm - 1) ! ! Parameters: ! real*8 a (in): vector to distribute, only used at root ! real*8 b (out): local vector ! integer is(number-of-processes) (in): see above: needed on root ! integer lm(number-of-processes) (in): see above: needed on all processes ! integer root (in): process that holds a, is ! integer comm (in): MPI communicator ! ! complication: a and b are not necessarily contiguous, ! but calling mpi_alltoallw with a and b, the compiler ! will generate contiguous arrays. ! ! root process must have rank .eq. 0 ! #ifdef USEMPI use xmpi_module, only : xmpi_ocomm #endif implicit none real*8, dimension(:), intent(in) :: a real*8, dimension(:), intent(out) :: b integer, dimension(:), intent(in) :: is integer, dimension(:), intent(in) :: lm integer, intent(in) :: root integer, intent(in) :: comm #ifdef USEMPI integer :: ier,ra,sz,basic_type,i integer,allocatable,dimension(:) :: recvtypes,sendtypes,recvcounts,sendcounts,sdispls,rdispls integer,dimension(1) :: sizes,subsizes,starts basic_type = MPI_DOUBLE_PRECISION call MPI_Comm_size(comm, sz, ier) call MPI_Comm_rank(comm, ra, ier) if (root /= 0) then print *,'Error in vector_distr_real8: root must be 0, but is:',root call MPI_Abort(xmpi_ocomm,1,ier) endif allocate(recvtypes(sz)) allocate(sendtypes(sz)) allocate(recvcounts(sz)) allocate(sendcounts(sz)) allocate(sdispls(sz)) allocate(rdispls(sz)) sdispls = 0 rdispls = 0 recvtypes = MPI_CHARACTER sendtypes = MPI_CHARACTER recvcounts = 0 sendcounts = 0 ! ! Create MPI types ! ! MPI_TYPE_CREATE_SUBARRAY(NDIMS, ARRAY_OF_SIZES, ARRAY_OF_SUBSIZES, ! ARRAY_OF_STARTS, ORDER, OLDTYPE, NEWTYPE, IERROR) ! INTEGER NDIMS, ARRAY_OF_SIZES(*), ARRAY_OF_SUBSIZES(*), ! ARRAY_OF_STARTS(*), ORDER, OLDTYPE, NEWTYPE, IERROR ! determine mpi_types for the receive matrices ! all processes will receive only from root sizes = shape(b) subsizes = lm(ra+1) starts = 0 call MPI_Type_create_subarray(1,sizes,subsizes,starts, & MPI_ORDER_FORTRAN, basic_type,recvtypes(1), ier) call MPI_Type_commit(recvtypes(1),ier) recvcounts(1) = 1 ! determine mpi types for the senders if(ra == root) then ! root sends to everyone do i=1,sz sizes = shape(a) subsizes = lm(i) starts = is(i) - 1 sendcounts = 1 call MPI_Type_create_subarray(1,sizes,subsizes,starts, & MPI_ORDER_FORTRAN, basic_type,sendtypes(i),ier) call MPI_Type_commit(sendtypes(i),ier) enddo endif call MPI_Alltoallw(a,sendcounts,sdispls,sendtypes, & b,recvcounts,rdispls,recvtypes,comm,ier) do i=1,sz if (sendtypes(i) /= MPI_CHARACTER) then call MPI_Type_free(sendtypes(i),ier) endif if (recvtypes(i) /= MPI_CHARACTER) then call MPI_Type_free(recvtypes(i),ier) endif enddo #else FAKE(kind(a)+kind(b)+kind(is)+kind(lm)+root+comm) FAKESET(b,0) #endif end subroutine vector_distr_real8 !_____________________________________________________________________________ subroutine block_vector_distr_real8(a,b,is,lm,root,comm) ! ! Distribute matrix a on non-root processes ! Process P receives a(is(P+1):is(P+1)+lm(P+1)-1,:) ( P: 0..size_of_comm - 1 ) ! ! Parameters: ! real*8 a(:,:) (in): matrix to distribute, only used at root ! real*8 b(:,:) (out): local matrix ! integer is(number-of-processes) (in): see above: needed on root ! integer lm(number-of-processes) (in): see above: needed on all processes ! integer root (in): process that holds a, is ! integer comm (in): MPI communicator ! ! complication: a and b are not necessarily contiguous, ! but calling mpi_alltoallw with a and b, the compiler ! will generate contiguous arrays. ! ! root process must have rank .eq. 0 ! #ifdef USEMPI use xmpi_module, only : xmpi_ocomm #endif implicit none real*8, dimension(:,:), intent(in) :: a real*8, dimension(:,:), intent(out) :: b integer, dimension(:), intent(in) :: is integer, dimension(:), intent(in) :: lm integer, intent(in) :: root integer, intent(in) :: comm #ifdef USEMPI integer :: ier,ra,sz,basic_type,i integer,allocatable,dimension(:) :: recvtypes,sendtypes,recvcounts,sendcounts,sdispls,rdispls integer,dimension(2) :: sizes,subsizes,starts basic_type = MPI_DOUBLE_PRECISION call MPI_Comm_size(comm, sz, ier) call MPI_Comm_rank(comm, ra, ier) if (root /= 0) then print *,'Error in block_vector_distr_real8: root must be 0, but is:',root call MPI_Abort(xmpi_ocomm,1,ier) endif allocate(recvtypes(sz)) allocate(sendtypes(sz)) allocate(recvcounts(sz)) allocate(sendcounts(sz)) allocate(sdispls(sz)) allocate(rdispls(sz)) sdispls = 0 rdispls = 0 recvtypes = MPI_CHARACTER sendtypes = MPI_CHARACTER recvcounts = 0 sendcounts = 0 ! ! Create MPI types ! ! MPI_TYPE_CREATE_SUBARRAY(NDIMS, ARRAY_OF_SIZES, ARRAY_OF_SUBSIZES, ! ARRAY_OF_STARTS, ORDER, OLDTYPE, NEWTYPE, IERROR) ! INTEGER NDIMS, ARRAY_OF_SIZES(*), ARRAY_OF_SUBSIZES(*), ! ARRAY_OF_STARTS(*), ORDER, OLDTYPE, NEWTYPE, IERROR ! determine mpi_types for the receive matrices ! all processes will receive only from root sizes = shape(b) subsizes = (/ lm(ra+1), size(b,2) /) starts = 0 call MPI_Type_create_subarray(2,sizes,subsizes,starts, & MPI_ORDER_FORTRAN, basic_type,recvtypes(1), ier) call MPI_Type_commit(recvtypes(1),ier) recvcounts(1) = 1 ! determine mpi types for the senders if (ra == root) then ! root sends to everyone do i=1,sz sizes = shape(a) subsizes = (/ lm(i), size(a,2) /) starts = (/ is(i) - 1, 0 /) sendcounts = 1 call MPI_Type_create_subarray(2,sizes,subsizes,starts, & MPI_ORDER_FORTRAN, basic_type,sendtypes(i),ier) call MPI_Type_commit(sendtypes(i),ier) enddo endif call MPI_Alltoallw(a,sendcounts,sdispls,sendtypes, & b,recvcounts,rdispls,recvtypes,comm,ier) do i=1,sz if (sendtypes(i) /= MPI_CHARACTER) then call MPI_Type_free(sendtypes(i),ier) endif if (recvtypes(i) /= MPI_CHARACTER) then call MPI_Type_free(recvtypes(i),ier) endif enddo #else FAKE(kind(a)+kind(b)+kind(is)+kind(lm)+root+comm) FAKESET(b,0) #endif end subroutine block_vector_distr_real8 !___________________________________________________________________________ subroutine matrix_distr_real8(a,b,is,lm,js,ln, & root,comm) ! ! This subroutine distributes matrix on root to the ! processes in communicator comm. ! Root MUST be process #0 in comm ! ! a: matrix to be distributed ! b: receive matrix, process p ! (1:lm(p+1),1:ln(p+1)) will be filled (p=0..size_of_comm - 1) ! Normally: the whole of b ! is,js: on process p, b(1,1) coincides with a(is(p),js(p)), ! p=1..size of comm ! root: root process, must be zero ! comm: communicator ! implicit none real*8, intent(in), dimension(:,:) :: a real*8, intent(out), dimension(:,:) :: b integer, intent(in), dimension(:) :: is, lm integer, intent(in), dimension(:) :: js, ln integer, intent(in) :: root, comm #ifdef USEMPI call doit_distr(is,lm,js,ln,root,MPI_DOUBLE_PRECISION,comm) contains ! subroutine doit_distr is included here: #include "genmpi_distr.inc" #else FAKE(kind(a)+kind(b)+kind(is)+kind(lm)+kind(js)+kind(ln)+root+comm) FAKESET(b,0) #endif end subroutine matrix_distr_real8 !___________________________________________________________________________ subroutine matrix_distr_integer(a,b,is,lm,js,ln, & root,comm) ! ! This subroutine distributes matrix on root to the ! processes in communicator comm. ! Root MUST be process #0 in comm ! ! a: matrix to be distributed ! b: receive matrix, process p ! (1:lm(p+1),1:ln(p+1)) will be filled (p=0..size_of_comm - 1) ! Normally: the whole of b ! is,js: on process p, b(1,1) coincides with a(is(p),js(p)), ! p=1..size of comm ! root: root process, must be zero ! comm: communicator ! ! implicit none integer, intent(in), dimension(:,:) :: a integer, intent(out), dimension(:,:) :: b integer, intent(in), dimension(:) :: is, lm integer, intent(in), dimension(:) :: js, ln integer, intent(in) :: root, comm #ifdef USEMPI call doit_distr(is,lm,js,ln,root,MPI_INTEGER,comm) contains ! subroutine doit_distr is included here: #include "genmpi_distr.inc" #else FAKE(kind(a)+kind(b)+kind(is)+kind(lm)+kind(js)+kind(ln)+root+comm) FAKESET(b,0) #endif end subroutine matrix_distr_integer !___________________________________________________________________________ subroutine matrix_coll_real8(a,b,is,lm,js,ln,& & islowy,ishighy,islowx,ishighx,root,comm,w_only) ! collect matrix on root process ! NOTE: the collect is done from the non-root processes only, but see w_only below ! ! parameters: ! real*8 a(:,:) (out): matrix to be collected. ! This matrix has to be available, it is not ! allocated by this subroutine. ! real*8 b(:,:) (in): local scattered matrix ! integer is,lm,js,ln(number of non-root processes) (in): description of ! the location of the submatrices b in matrix a: ! Element b(1,1) coincides with a(is(rank+1),js(rank+1)), where rank is ! the rank of the process (0..procs-1) ! The length of the 1st dimension of b is given by lm(rank+1) ! The length of the 2nd dimension of b is given by ln(rank+1) ! logical islowy, ishighy, islowx, ishighx (in): is this submatrix at the ! lowy, highy, lowx or highx side of the general matrix. ! integer root (in): the rank of the process where a is available ! integer comm (in): the MPI communicator to be used. ! logical, optional: w_only: if present and .true. collection ! is done on the root process (must be 0) of the worker processes, ! so, in this case, the root process contains data to be collected ! ! Note: a,is,js are used at the root process only ! ! Note: the root process must have rank .eq. 0 ! implicit none real*8, intent(out), dimension(:,:) :: a real*8, intent(in), dimension(:,:) :: b integer, intent(in), dimension(:) :: is,lm integer, intent(in), dimension(:) :: js,ln logical, intent(in), dimension(:) :: islowy, ishighy, islowx, ishighx integer, intent(in) :: root,comm logical, intent(in), optional :: w_only #ifdef USEMPI call doit_coll(is,lm,js,ln,islowy,ishighy,islowx,ishighx,root,MPI_DOUBLE_PRECISION,comm,w_only) contains ! subroutine doit_coll is included here: #include "genmpi_coll.inc" #else FAKE(kind(a)+kind(b)+kind(is)+kind(lm)+kind(js)+kind(ln)+kind(w_only)) FAKE(kind(islowy)+kind(ishighy)+kind(islowx)+kind(ishighx)+root+comm) FAKESET(a,0) #endif end subroutine matrix_coll_real8 !___________________________________________________________________________ subroutine matrix_coll_integer(a,b,is,lm,js,ln,& & islowy,ishighy,islowx,ishighx,root,comm,w_only) ! for description, see matrix-coll_real8 implicit none integer, intent(out), dimension(:,:) :: a integer, intent(in), dimension(:,:) :: b integer, intent(in), dimension(:) :: is, lm integer, intent(in), dimension(:) :: js, ln logical, intent(in), dimension(:) :: islowy, ishighy, islowx, ishighx integer, intent(in) :: root, comm logical, intent(in), optional :: w_only #ifdef USEMPI call doit_coll(is,lm,js,ln,islowy,ishighy,islowx,ishighx,root,MPI_INTEGER,comm,w_only) contains ! subroutine doit_coll is included here: #include "genmpi_coll.inc" #else FAKE(kind(a)+kind(b)+kind(is)+kind(lm)+kind(js)+kind(ln)+kind(w_only)) FAKE(kind(islowy)+kind(ishighy)+kind(islowx)+kind(ishighx)+root+comm) FAKESET(a,0) #endif end subroutine matrix_coll_integer !___________________________________________________________________________ subroutine decomp(n, numprocs, myid, s, e) ! From the mpich-distribution: MPE_Decomp1d ! ! n : integer, input (for example: number of tasks or ! dimension of an array) ! numprocs : integer, input (number of processes) ! myid : integer, input (MPI id of this process) ! s : integer, output ! e : integer, output ! ! The purpose of this subroutine is to get an as equal as possible ! division of the integers 1..n among numprocs intervals. ! A common use is to divide work among MPI processes. ! ! example: ! ! integer numprocs, ier, myid, n, s, e, i ! call MPI_Comm_size(MPI_COMM_WORLD, numprocs, ier) ! call MPI_Comm_rank(MPI_COMM_WORLD, myid, ier) ! n = 1000 ! call decomp(n, nprocs, myid, s, e) ! do i = s,e ! ... ! enddo ! integer, intent(in) :: n, numprocs, myid integer, intent(out):: s, e integer nlocal integer deficit nlocal = n / numprocs s = myid * nlocal + 1 deficit = mod(n,numprocs) s = s + min(myid,deficit) if (myid .lt. deficit) then nlocal = nlocal + 1 endif e = s + nlocal - 1 if (e .gt. n .or. myid .eq. numprocs-1) then e = n endif end subroutine decomp subroutine det_submatrices(ma,na,mp,np,is,lm,js,ln,islowy,ishighy,islowx,ishighx) ! ! determine a division of a ma*na matrix on mp*np processes ! ma: integer(in): 1st dimension of matrix to divide ! na: integer(in): 2nd dimension of this matrix ! mp: integer(in): 1st dimension of processorgrid ! np: integer(in): 2nd dimension of processorgrid ! is(mp*np): integer(out): is(i) will be equal to the ! start row of the i-th submatrix ! lm(mp*np): integer(out): lm(i) will be equal to the ! first dimension of the i-th submatrix ! js(mp*np): integer(out): js(i) will be equal to the ! start column of the i-th submatrix ! ln(mp*np): integer(out): ln(i) will be equal to the ! 2nd dimension of the i-th submatrix ! islowy(mp*np): logical(out): islowy(i) is true if the i-th submatrix ! shares the first column with the global matrix ! a(:,1) ! ishighy(mp*np): logical(out): ishighy(i) is true if the i-th submatrix ! shares the last column with the global matrix ! a(ma,:) ! islowx(mp*np): logical(out): islowx(i) is true if the i-th submatrix ! shares the first row with the global matrix ! a(1,:) ! ishighx(mp*np): logical(out): ishighx(i) is true if the i-th submatrix ! shares the last row with the global matrix ! a(na,:) ! ! The submatrices overlap: ! 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 ! 1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8 9 0 1 ! 1 2 3 4 5 6 7 8 9 0 1 2 3 ! ! + + + + + x x x x + + + + + x x x x + + + + + + + ! + + + + + x x x x + + + + + x x x x + + + + + + + ! + + + + + x x x x + + + + + x x x x + + + + + + + ! + + + + + x x x x + + + + + x x x x + + + + + + + ! + + + + + x x x x + + + + + x x x x + + + + + + + ! + + + + + x x x x + + + + + x x x x + + + + + + + ! + + + + + x x x x + + + + + x x x x + + + + + + + ! ! This picture is for the overlap in horizontal direction, ! The overlap is 4. ! + -> matrix element ! x -> overlapping matrix element ! the order of the submatrices is 9,13,11. ! The order of the global matrix is 25 ! implicit none integer, intent(in) :: ma,na,mp,np integer, dimension(:), intent(out) :: is,lm,js,ln logical, dimension(:), intent(out) :: islowy,ishighy,islowx,ishighx #ifdef USEMPI integer i,j,s,e,k integer, parameter :: nover = 4 #endif islowy = .false. ishighy = .false. islowx = .false. ishighx = .false. is = 0 lm = 0 js = 0 ln = 0 #ifdef USEMPI do i = 1, mp call decomp(ma-nover, mp, i - 1, s, e) k = 0 do j = 1, np if (j .eq. 1) then islowy(i+k) = .true. endif if (j .eq. np) then ishighy(i+k) = .true. endif is(i + k) = s lm(i + k) = e - s + nover + 1 k = k + mp enddo enddo k = 0 do j=1, np call decomp(na-nover, np, j - 1, s, e) do i = 1, mp if (i .eq. 1) then islowx(i+k) = .true. endif if (i .eq. mp) then ishighx(i+k) = .true. endif js(i + k) = s ln(i + k) = e - s + nover + 1 enddo k = k + mp enddo #else FAKE(ma+na+mp+np) #endif end subroutine det_submatrices subroutine shift_borders_matrix_real8(a,lowy,highy,lowx,highx,comm) ! ! shift borders to and from neighbours. ! implicit none real*8, dimension(:,:), intent(inout) :: a integer, intent(in) :: lowy,highy,lowx,highx,comm #ifdef USEMPI integer, parameter :: datatype = MPI_DOUBLE_PRECISION integer ierror,ma,na ma = ubound(a,1) na = ubound(a,2) ! send to lowy, receive from highy call MPI_Sendrecv(a(2:ma-1,2), ma-2, datatype, & lowy, 1, & a(2:ma-1,na), ma-2, datatype, & highy, 1, & comm, MPI_STATUS_IGNORE, ierror) ! send to highy, receive from lowy call MPI_Sendrecv(a(2:ma-1,na-1), ma-2, datatype, & highy, 2, & a(2:ma-1,1), ma-2, datatype, & lowy, 2, & comm, MPI_STATUS_IGNORE, ierror) ! send to highx, receive from lowx call MPI_Sendrecv(a(ma-1,:), na, datatype, & highx, 3, & a(1,:), na, datatype, & lowx, 3, & comm, MPI_STATUS_IGNORE, ierror) ! send to lowx, receive from highx call MPI_Sendrecv(a(2,:), na, datatype, & lowx, 4, & a(ma,:), na, datatype, & highx, 4, & comm, MPI_STATUS_IGNORE, ierror) #else FAKE(kind(a)+lowy+highy+lowx+highx+comm) #endif end subroutine shift_borders_matrix_real8 end module general_mpi_module #undef FAKE #undef FAKESET