<p><b>duda</b> 2009-09-14 14:41:43 -0600 (Mon, 14 Sep 2009)</p><p>Rewrite halo exchange routines to use non-blocking MPI calls. In addition to<br>
making these routines simpler, the new implementation should be faster on larger<br>
numbers of processors; for example, with nCells=655362 and 128 cpus on bluefire,<br>
halo exchanges during time integration are 2-3 times faster than before.<br>
<br>
M src/module_dmpar.F<br>
</p><hr noshade><pre><font color="gray">Modified: trunk/swmodel/src/module_dmpar.F
===================================================================
--- trunk/swmodel/src/module_dmpar.F        2009-09-11 17:25:09 UTC (rev 52)
+++ trunk/swmodel/src/module_dmpar.F        2009-09-14 20:41:43 UTC (rev 53)
@@ -26,6 +26,9 @@
integer :: nlist
integer, dimension(:), pointer :: list
type (exchange_list), pointer :: next
+ real (kind=RKIND), dimension(:), pointer :: rbuffer
+ integer, dimension(:), pointer :: ibuffer
+ integer :: reqID
end type exchange_list
@@ -841,6 +844,8 @@
sendListPtr % nlist = numToSend
allocate(sendListPtr % list(numToSend))
nullify(sendListPtr % next)
+! nullify(sendListPtr % rbuffer)
+! nullify(sendListPtr % ibuffer)
kk = 1
do j=1,nOwnedList
if (recipientList(j) /= -1) then
@@ -874,6 +879,8 @@
recvListPtr % nlist = numToRecv
allocate(recvListPtr % list(numToRecv))
nullify(recvListPtr % next)
+! nullify(recvListPtr % rbuffer)
+! nullify(recvListPtr % ibuffer)
kk = 1
do j=1,nNeededList
if (ownerListIn(j) == -i) then
@@ -1592,129 +1599,55 @@
type (exchange_list), pointer :: sendList, recvList
type (exchange_list), pointer :: sendListPtr, recvListPtr
- integer :: i, j, istart, nLoop, leftNeighbor, rightNeighbor
- integer :: nSendBuf, nRecvBuf, lastPackedIdx, lastUnpackedIdx, nPacked, nUnpacked
- real (kind=RKIND), allocatable, dimension(:) :: buffer
- logical :: sendFirst
+ integer :: lastPackedIdx, lastUnpackedIdx, nPacked, nUnpacked
integer :: mpi_ierr
- integer :: d1, d2
+ integer :: d2
#ifdef _MPI
- nLoop = dminfo % nprocs / 2
- d1 = dim1
- d2 = dim2
-
- allocate(buffer(BUFSIZE))
-
+ recvListPtr => recvList
+ do while (associated(recvListPtr))
+ if (recvListPtr % procID /= dminfo % my_proc_id) then
+ d2 = dim1 * recvListPtr % nlist
+ allocate(recvListPtr % rbuffer(d2))
+ call MPI_Irecv(recvListPtr % rbuffer, d2, MPI_REALKIND, &
+ recvListPtr % procID, recvListPtr % procID, dminfo % comm, recvListPtr % reqID, mpi_ierr)
+ end if
+ recvListPtr => recvListPtr % next
+ end do
+
sendListPtr => sendList
do while (associated(sendListPtr))
- if (sendListPtr % procID == dminfo % my_proc_id) exit
+ if (sendListPtr % procID /= dminfo % my_proc_id) then
+ d2 = dim1 * sendListPtr % nlist
+ allocate(sendListPtr % rbuffer(d2))
+ call packSendBuf2dReal(1, dim1, dim2, array, sendListPtr, 1, d2, sendListPtr % rbuffer, nPacked, lastPackedIdx)
+ call MPI_Isend(sendListPtr % rbuffer, d2, MPI_REALKIND, &
+ sendListPtr % procID, dminfo % my_proc_id, dminfo % comm, sendListPtr % reqID, mpi_ierr)
+ end if
sendListPtr => sendListPtr % next
end do
recvListPtr => recvList
do while (associated(recvListPtr))
- if (recvListPtr % procID == dminfo % my_proc_id) exit
+ if (recvListPtr % procID /= dminfo % my_proc_id) then
+ call MPI_Wait(recvListPtr % reqID, MPI_STATUS_IGNORE, mpi_ierr)
+ d2 = dim1 * recvListPtr % nlist
+ call unpackRecvBuf2dReal(1, dim1, dim2, array, recvListPtr, 1, d2, recvListPtr % rbuffer, nUnpacked, lastUnpackedIdx)
+ deallocate(recvListPtr % rbuffer)
+ end if
recvListPtr => recvListPtr % next
end do
-
- do i=1,nLoop
- if (mod(dminfo % my_proc_id / i, 2) /= 0) then
- sendFirst = .true.
- else
- sendFirst = .false.
+
+ sendListPtr => sendList
+ do while (associated(sendListPtr))
+ if (sendListPtr % procID /= dminfo % my_proc_id) then
+ call MPI_Wait(sendListPtr % reqID, MPI_STATUS_IGNORE, mpi_ierr)
+ deallocate(sendListPtr % rbuffer)
end if
-
- leftNeighbor = dminfo % my_proc_id - i + dminfo % nprocs
- leftNeighbor = mod(leftNeighbor, dminfo % nprocs)
- rightNeighbor = dminfo % my_proc_id + i
- rightNeighbor = mod(rightNeighbor, dminfo % nprocs)
-
- if (sendFirst) then
- if (isInList(leftNeighbor, sendList, sendListPtr)) then
- istart = 1
- do while (istart <= sendListPtr % nlist)
- call packSendBuf2dReal(1, d1, d2, array, sendListPtr, istart, BUFSIZE, buffer, nSendBuf, lastPackedIdx)
- call MPI_Send(nSendBuf, 1, MPI_INTEGER, leftNeighbor, i, dminfo % comm, mpi_ierr)
- call MPI_Send(buffer, nSendBuf, MPI_REALKIND, leftNeighbor, i, dminfo % comm, mpi_ierr)
- istart = lastPackedIdx + 1
- end do
- end if
- if (isInList(leftNeighbor, recvList, recvListPtr)) then
- istart = 1
- do while (istart <= recvListPtr % nList)
- call MPI_Recv(nRecvBuf, 1, MPI_INTEGER, leftNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call MPI_Recv(buffer, nRecvBuf, MPI_REALKIND, leftNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call unpackRecvBuf2dReal(1, d1, d2, array, recvListPtr, istart, BUFSIZE, buffer, nUnpacked, lastUnpackedIdx)
- istart = lastUnpackedIdx + 1
- end do
- end if
- if (leftNeighbor /= rightNeighbor) then
- if (isInList(rightNeighbor, recvList, recvListPtr)) then
- istart = 1
- do while (istart <= recvListPtr % nList)
- call MPI_Recv(nRecvBuf, 1, MPI_INTEGER, rightNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call MPI_Recv(buffer, nRecvBuf, MPI_REALKIND, rightNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call unpackRecvBuf2dReal(1, d1, d2, array, recvListPtr, istart, BUFSIZE, buffer, nUnpacked, lastUnpackedIdx)
- istart = lastUnpackedIdx + 1
- end do
- end if
- if (isInList(rightNeighbor, sendList, sendListPtr)) then
- istart = 1
- do while (istart <= sendListPtr % nlist)
- call packSendBuf2dReal(1, d1, d2, array, sendListPtr, istart, BUFSIZE, buffer, nSendBuf, lastPackedIdx)
- call MPI_Send(nSendBuf, 1, MPI_INTEGER, rightNeighbor, i, dminfo % comm, mpi_ierr)
- call MPI_Send(buffer, nSendBuf, MPI_REALKIND, rightNeighbor, i, dminfo % comm, mpi_ierr)
- istart = lastPackedIdx + 1
- end do
- end if
- end if
-
- else
- if (isInList(rightNeighbor, recvList, recvListPtr)) then
- istart = 1
- do while (istart <= recvListPtr % nList)
- call MPI_Recv(nRecvBuf, 1, MPI_INTEGER, rightNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call MPI_Recv(buffer, nRecvBuf, MPI_REALKIND, rightNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call unpackRecvBuf2dReal(1, d1, d2, array, recvListPtr, istart, BUFSIZE, buffer, nUnpacked, lastUnpackedIdx)
- istart = lastUnpackedIdx + 1
- end do
- end if
- if (isInList(rightNeighbor, sendList, sendListPtr)) then
- istart = 1
- do while (istart <= sendListPtr % nlist)
- call packSendBuf2dReal(1, d1, d2, array, sendListPtr, istart, BUFSIZE, buffer, nSendBuf, lastPackedIdx)
- call MPI_Send(nSendBuf, 1, MPI_INTEGER, rightNeighbor, i, dminfo % comm, mpi_ierr)
- call MPI_Send(buffer, nSendBuf, MPI_REALKIND, rightNeighbor, i, dminfo % comm, mpi_ierr)
- istart = lastPackedIdx + 1
- end do
- end if
- if (leftNeighbor /= rightNeighbor) then
- if (isInList(leftNeighbor, sendList, sendListPtr)) then
- istart = 1
- do while (istart <= sendListPtr % nlist)
- call packSendBuf2dReal(1, d1, d2, array, sendListPtr, istart, BUFSIZE, buffer, nSendBuf, lastPackedIdx)
- call MPI_Send(nSendBuf, 1, MPI_INTEGER, leftNeighbor, i, dminfo % comm, mpi_ierr)
- call MPI_Send(buffer, nSendBuf, MPI_REALKIND, leftNeighbor, i, dminfo % comm, mpi_ierr)
- istart = lastPackedIdx + 1
- end do
- end if
- if (isInList(leftNeighbor, recvList, recvListPtr)) then
- istart = 1
- do while (istart <= recvListPtr % nList)
- call MPI_Recv(nRecvBuf, 1, MPI_INTEGER, leftNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call MPI_Recv(buffer, nRecvBuf, MPI_REALKIND, leftNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call unpackRecvBuf2dReal(1, d1, d2, array, recvListPtr, istart, BUFSIZE, buffer, nUnpacked, lastUnpackedIdx)
- istart = lastUnpackedIdx + 1
- end do
- end if
- end if
-
- end if
+ sendListPtr => sendListPtr % next
end do
- deallocate(buffer)
#endif
end subroutine dmpar_exch_halo_field2dReal
@@ -1726,139 +1659,61 @@
type (dm_info), intent(in) :: dminfo
integer, intent(in) :: dim1, dim2, dim3
- real (kind=RKIND), dimension(dim1,dim2, dim3), intent(inout) :: array
+ real (kind=RKIND), dimension(dim1,dim2,dim3), intent(inout) :: array
type (exchange_list), pointer :: sendList, recvList
type (exchange_list), pointer :: sendListPtr, recvListPtr
- integer :: i, j, istart, nLoop, leftNeighbor, rightNeighbor
- integer :: nSendBuf, nRecvBuf, lastPackedIdx, lastUnpackedIdx, nPacked, nUnpacked
- real (kind=RKIND), allocatable, dimension(:) :: buffer
- logical :: sendFirst
+ integer :: lastPackedIdx, lastUnpackedIdx, nPacked, nUnpacked
integer :: mpi_ierr
- integer :: d1, d2, d3
+ integer :: d3
-
#ifdef _MPI
- nLoop = dminfo % nprocs / 2
- d1 = dim1
- d2 = dim2
- d3 = dim3
+ recvListPtr => recvList
+ do while (associated(recvListPtr))
+ if (recvListPtr % procID /= dminfo % my_proc_id) then
+ d3 = dim1 * dim2 * recvListPtr % nlist
+ allocate(recvListPtr % rbuffer(d3))
+ call MPI_Irecv(recvListPtr % rbuffer, d3, MPI_REALKIND, &
+ recvListPtr % procID, recvListPtr % procID, dminfo % comm, recvListPtr % reqID, mpi_ierr)
+ end if
+ recvListPtr => recvListPtr % next
+ end do
- allocate(buffer(BUFSIZE))
-
sendListPtr => sendList
do while (associated(sendListPtr))
- if (sendListPtr % procID == dminfo % my_proc_id) exit
+ if (sendListPtr % procID /= dminfo % my_proc_id) then
+ d3 = dim1 * dim2 * sendListPtr % nlist
+ allocate(sendListPtr % rbuffer(d3))
+ call packSendBuf3dReal(1, dim1, 1, dim2, dim3, array, sendListPtr, 1, d3, &
+ sendListPtr % rbuffer, nPacked, lastPackedIdx)
+ call MPI_Isend(sendListPtr % rbuffer, d3, MPI_REALKIND, &
+ sendListPtr % procID, dminfo % my_proc_id, dminfo % comm, sendListPtr % reqID, mpi_ierr)
+ end if
sendListPtr => sendListPtr % next
end do
recvListPtr => recvList
do while (associated(recvListPtr))
- if (recvListPtr % procID == dminfo % my_proc_id) exit
+ if (recvListPtr % procID /= dminfo % my_proc_id) then
+ call MPI_Wait(recvListPtr % reqID, MPI_STATUS_IGNORE, mpi_ierr)
+ d3 = dim1 * dim2 * recvListPtr % nlist
+ call unpackRecvBuf3dReal(1, dim1, 1, dim2, dim3, array, recvListPtr, 1, d3, &
+ recvListPtr % rbuffer, nUnpacked, lastUnpackedIdx)
+ deallocate(recvListPtr % rbuffer)
+ end if
recvListPtr => recvListPtr % next
end do
- do i=1,nLoop
- if (mod(dminfo % my_proc_id / i, 2) /= 0) then
- sendFirst = .true.
- else
- sendFirst = .false.
+ sendListPtr => sendList
+ do while (associated(sendListPtr))
+ if (sendListPtr % procID /= dminfo % my_proc_id) then
+ call MPI_Wait(sendListPtr % reqID, MPI_STATUS_IGNORE, mpi_ierr)
+ deallocate(sendListPtr % rbuffer)
end if
-
- leftNeighbor = dminfo % my_proc_id - i + dminfo % nprocs
- leftNeighbor = mod(leftNeighbor, dminfo % nprocs)
- rightNeighbor = dminfo % my_proc_id + i
- rightNeighbor = mod(rightNeighbor, dminfo % nprocs)
-
- if (sendFirst) then
- if (isInList(leftNeighbor, sendList, sendListPtr)) then
- istart = 1
- do while (istart <= sendListPtr % nlist)
- call packSendBuf3dReal(1, d1, 1, d2, d3, array, sendListPtr, istart, BUFSIZE, buffer, nSendBuf, lastPackedIdx)
- call MPI_Send(nSendBuf, 1, MPI_INTEGER, leftNeighbor, i, dminfo % comm, mpi_ierr)
- call MPI_Send(buffer, nSendBuf, MPI_REALKIND, leftNeighbor, i, dminfo % comm, mpi_ierr)
- istart = lastPackedIdx + 1
- end do
- end if
- if (isInList(leftNeighbor, recvList, recvListPtr)) then
- istart = 1
- do while (istart <= recvListPtr % nList)
- call MPI_Recv(nRecvBuf, 1, MPI_INTEGER, leftNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call MPI_Recv(buffer, nRecvBuf, MPI_REALKIND, leftNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call unpackRecvBuf3dReal(1, d1, 1, d2, d3, array, recvListPtr, istart, BUFSIZE, buffer, nUnpacked, &
- lastUnpackedIdx)
- istart = lastUnpackedIdx + 1
- end do
- end if
- if (leftNeighbor /= rightNeighbor) then
- if (isInList(rightNeighbor, recvList, recvListPtr)) then
- istart = 1
- do while (istart <= recvListPtr % nList)
- call MPI_Recv(nRecvBuf, 1, MPI_INTEGER, rightNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call MPI_Recv(buffer, nRecvBuf, MPI_REALKIND, rightNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call unpackRecvBuf3dReal(1, d1, 1, d2, d3, array, recvListPtr, istart, BUFSIZE, buffer, nUnpacked, &
- lastUnpackedIdx)
- istart = lastUnpackedIdx + 1
- end do
- end if
- if (isInList(rightNeighbor, sendList, sendListPtr)) then
- istart = 1
- do while (istart <= sendListPtr % nlist)
- call packSendBuf3dReal(1, d1, 1, d2, d3, array, sendListPtr, istart, BUFSIZE, buffer, nSendBuf, lastPackedIdx)
- call MPI_Send(nSendBuf, 1, MPI_INTEGER, rightNeighbor, i, dminfo % comm, mpi_ierr)
- call MPI_Send(buffer, nSendBuf, MPI_REALKIND, rightNeighbor, i, dminfo % comm, mpi_ierr)
- istart = lastPackedIdx + 1
- end do
- end if
- end if
-
- else
- if (isInList(rightNeighbor, recvList, recvListPtr)) then
- istart = 1
- do while (istart <= recvListPtr % nList)
- call MPI_Recv(nRecvBuf, 1, MPI_INTEGER, rightNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call MPI_Recv(buffer, nRecvBuf, MPI_REALKIND, rightNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call unpackRecvBuf3dReal(1, d1, 1, d2, d3, array, recvListPtr, istart, BUFSIZE, buffer, nUnpacked, &
- lastUnpackedIdx)
- istart = lastUnpackedIdx + 1
- end do
- end if
- if (isInList(rightNeighbor, sendList, sendListPtr)) then
- istart = 1
- do while (istart <= sendListPtr % nlist)
- call packSendBuf3dReal(1, d1, 1, d2, d3, array, sendListPtr, istart, BUFSIZE, buffer, nSendBuf, lastPackedIdx)
- call MPI_Send(nSendBuf, 1, MPI_INTEGER, rightNeighbor, i, dminfo % comm, mpi_ierr)
- call MPI_Send(buffer, nSendBuf, MPI_REALKIND, rightNeighbor, i, dminfo % comm, mpi_ierr)
- istart = lastPackedIdx + 1
- end do
- end if
- if (leftNeighbor /= rightNeighbor) then
- if (isInList(leftNeighbor, sendList, sendListPtr)) then
- istart = 1
- do while (istart <= sendListPtr % nlist)
- call packSendBuf3dReal(1, d1, 1, d2, d3, array, sendListPtr, istart, BUFSIZE, buffer, nSendBuf, lastPackedIdx)
- call MPI_Send(nSendBuf, 1, MPI_INTEGER, leftNeighbor, i, dminfo % comm, mpi_ierr)
- call MPI_Send(buffer, nSendBuf, MPI_REALKIND, leftNeighbor, i, dminfo % comm, mpi_ierr)
- istart = lastPackedIdx + 1
- end do
- end if
- if (isInList(leftNeighbor, recvList, recvListPtr)) then
- istart = 1
- do while (istart <= recvListPtr % nList)
- call MPI_Recv(nRecvBuf, 1, MPI_INTEGER, leftNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call MPI_Recv(buffer, nRecvBuf, MPI_REALKIND, leftNeighbor, i, dminfo % comm, MPI_STATUS_IGNORE, mpi_ierr)
- call unpackRecvBuf3dReal(1, d1, 1, d2, d3, array, recvListPtr, istart, BUFSIZE, buffer, nUnpacked, &
- lastUnpackedIdx)
- istart = lastUnpackedIdx + 1
- end do
- end if
- end if
-
- end if
+ sendListPtr => sendListPtr % next
end do
- deallocate(buffer)
#endif
end subroutine dmpar_exch_halo_field3dReal
</font>
</pre>