<p><b>duda</b> 2009-09-11 11:25:09 -0600 (Fri, 11 Sep 2009)</p><p>Replace dmpar_get_owner_list with a simpler (and faster) implementation.<br>
<br>
M src/module_dmpar.F<br>
</p><hr noshade><pre><font color="gray">Modified: trunk/swmodel/src/module_dmpar.F
===================================================================
--- trunk/swmodel/src/module_dmpar.F        2009-09-09 22:24:22 UTC (rev 51)
+++ trunk/swmodel/src/module_dmpar.F        2009-09-11 17:25:09 UTC (rev 52)
@@ -779,32 +779,16 @@
type (exchange_list), pointer :: sendList
type (exchange_list), pointer :: recvList
- integer :: i, j, l, u, k, kk, totalSize, bufStart, bufEnd, needEnd
- integer, dimension(2,nOwnedList) :: ownedListSorted
- integer, dimension(2,nNeededList) :: neededListSorted
- integer, dimension(BUFSIZE) :: buffer
- integer, dimension(nNeededList) :: ownerList
+ integer :: i, j, k, kk
+ integer :: totalSize, nMesgRecv, nMesgSend, recvNeighbor, sendNeighbor, currentProc
+ integer :: numToSend, numToRecv
integer, dimension(nOwnedList) :: recipientList
+ integer, dimension(2,nOwnedList) :: ownedListSorted
+ integer, allocatable, dimension(:) :: ownerListIn, ownerListOut
type (exchange_list), pointer :: sendListPtr, recvListPtr
- integer :: numToSend, numToSendTotal, numToRecv
- logical :: needToRecv
- integer :: mpi_ierr
+ integer :: mpi_ierr, mpi_rreq, mpi_sreq
#ifdef _MPI
- do i=1,nOwnedList
- ownedListSorted(1,i) = ownedList(i)
- ownedListSorted(2,i) = i
- end do
- do i=1,nNeededList
- neededListSorted(1,i) = neededList(i)
- neededListSorted(2,i) = i
- end do
-
- call quicksort(nOwnedList, ownedListSorted)
- call quicksort(nNeededList, neededListSorted)
-
- ownerList(:) = -1
-
allocate(sendList)
allocate(recvList)
nullify(sendList % next)
@@ -812,228 +796,76 @@
sendListPtr => sendList
recvListPtr => recvList
- if (nOwnedList > nNeededList) then
- i = nOwnedList
- else
- i = nNeededList
- end if
- call MPI_Allreduce(i, totalSize, 1, MPI_INTEGER, MPI_MAX, dminfo % comm, mpi_ierr)
+ do i=1,nOwnedList
+ ownedListSorted(1,i) = ownedList(i)
+ ownedListSorted(2,i) = i
+ end do
+ call quicksort(nOwnedList, ownedListSorted)
- do i=0, dminfo % nprocs - 1
- buffer(:) = -1
+ call MPI_Allreduce(nNeededList, totalSize, 1, MPI_INTEGER, MPI_MAX, dminfo % comm, mpi_ierr)
- if (dminfo % my_proc_id /= i) then
+ allocate(ownerListIn(totalSize))
+ allocate(ownerListOut(totalSize))
- bufStart = 1
- numToSendTotal = 0
- recipientList(:) = -1
- do while (bufStart < totalSize)
- bufEnd = min(bufStart + BUFSIZE - 1, totalSize)
- call MPI_Bcast(buffer, BUFSIZE, MPI_INTEGER, i, dminfo % comm, mpi_ierr)
- numToSend = 0
- ! OPTIMIZATION: Could sorted arrays somehow help? Could make setting recipientList more difficult, though.
- do j=1, BUFSIZE
+ nMesgRecv = nNeededList
+ ownerListIn(1:nNeededList) = neededList(1:nNeededList)
- l = 1
- u = nOwnedList
- k = (l+u)/2
- do while (u >= l)
- if (ownedListSorted(1,k) == buffer(j)) then
- buffer(j) = dminfo % my_proc_id
- numToSend = numToSend + 1
-if (ownedListSorted(2,k) < 1 .or. ownedListSorted(2,k) > nOwnedList) write(0,*) 'ANOTHER OOPS PLACE IN THE CODE.......'
- recipientList(ownedListSorted(2,k)) = numToSend + numToSendTotal
- exit
- else if (ownedListSorted(1,k) < buffer(j)) then
- l = k + 1
- k = (l+u)/2
- else
- u = k - 1
- k = (l+u)/2
- end if
- end do
- if (u < l) buffer(j) = -1
+ recvNeighbor = mod(dminfo % my_proc_id + dminfo % nprocs - 1, dminfo % nprocs)
+ sendNeighbor = mod(dminfo % my_proc_id + 1, dminfo % nprocs)
-! do k=1, nOwnedList
-! if (ownedList(k) == buffer(j)) then
-! buffer(j) = dminfo % my_proc_id
-! numToSend = numToSend + 1
-! recipientList(k) = numToSend + numToSendTotal
-! exit
-! end if
-! end do
+ do i=1, dminfo % nprocs
-! if (k > nOwnedList) buffer(j) = -1
- end do
+ recipientList(:) = -1
+ numToSend = 0
- if (numToSend > 0) call MPI_Send(buffer, BUFSIZE, MPI_INTEGER, i, dminfo % my_proc_id, dminfo % comm, mpi_ierr)
- bufStart = bufEnd + 1
- numToSendTotal = numToSendTotal + numToSend
- end do
-
- if (numToSendTotal > 0) then
- allocate(sendListPtr % next)
- sendListPtr => sendListPtr % next
- sendListPtr % procID = i
- sendListPtr % nlist = numToSendTotal
- allocate(sendListPtr % list(numToSendTotal))
- nullify(sendListPtr % next)
- kk = 1
- do j=1,nOwnedList
- if (recipientList(j) > 0) then
-if (recipientList(j) < 1 .or. recipientList(j) > numToSendTotal) then
- write(0,*) 'dminfo_get_owner_list: We have a bad list index from recipientList'
- call dmpar_abort(dminfo)
-end if
- sendListPtr % list(recipientList(j)) = j
- kk = kk + 1
- end if
- end do
-if (kk /= numToSendTotal + 1) then
- write(0,*) 'dminfo_get_owner_list: Seem to have fewer to send than we thought.'
- call dmpar_abort(dminfo)
-end if
- end if
-
- else
-
- j = 1
- k = 1
- do while (j <= nOwnedList .and. k <= nNeededList)
- if (ownedListSorted(1,j) == neededListSorted(1,k)) then
-if (neededListSorted(2,k) < 1 .or. neededListSorted(2,k) > nNeededList) write(0,*) 'OOPS -- our setup seems to have a bug.'
- ownerList(neededListSorted(2,k)) = dminfo % my_proc_id
- j = j + 1
- k = k + 1
- else if (ownedListSorted(1,j) < neededListSorted(1,k)) then
- j = j + 1
+ currentProc = mod(dminfo % my_proc_id + dminfo % nprocs - i + 1, dminfo % nprocs)
+ do j=1,nMesgRecv
+ if (ownerListIn(j) > 0) then
+ k = binary_search(ownedListSorted, 2, 1, nOwnedList, ownerListIn(j))
+ if (k <= nOwnedList) then
+ ownerListOut(j) = -1 * dminfo % my_proc_id
+ numToSend = numToSend + 1
+ recipientList(ownedListSorted(2,k)) = numToSend
else
- k = k + 1
+ ownerListOut(j) = ownerListIn(j)
end if
- end do
-
-! do j=1, nOwnedList
-! do k=1, nNeededList
-! if (neededList(k) == ownedList(j)) then
-! ownerList(k) = dminfo % my_proc_id
-! exit
-! end if
-! end do
-! end do
+ else
+ ownerListOut(j) = ownerListIn(j)
+ end if
+ end do
- bufStart = 1
- do while (bufStart < totalSize)
- bufEnd = min(bufStart + BUFSIZE - 1, totalSize)
- needEnd = min(bufEnd, nNeededList)
- kk = min(BUFSIZE, nNeededList - bufStart + 1)
- kk = max(kk, 0)
-
- if (kk == 0) then
- buffer(1:BUFSIZE) = -1
- else
- buffer(1:kk) = neededList(bufStart:needEnd)
- buffer(kk+1:BUFSIZE) = -1
+ if (numToSend > 0) then
+ allocate(sendListPtr % next)
+ sendListPtr => sendListPtr % next
+ sendListPtr % procID = currentProc
+ sendListPtr % nlist = numToSend
+ allocate(sendListPtr % list(numToSend))
+ nullify(sendListPtr % next)
+ kk = 1
+ do j=1,nOwnedList
+ if (recipientList(j) /= -1) then
+ sendListPtr % list(recipientList(j)) = j
+ kk = kk + 1
end if
-
- call MPI_Bcast(buffer, BUFSIZE, MPI_INTEGER, i, dminfo % comm, mpi_ierr)
-
- ! OPTIMIZATION: Maybe needToRecv could be set based on a running total of how many we have
- ! filled so far, if ownerList were to include items owned by self
-! needToRecv = .false.
- numToRecv = 0
- do j=bufStart, needEnd
- if (ownerList(j) == -1) then
- numToRecv = numToRecv + 1
-! needToRecv = .true.
-! exit
- end if
- end do
-
-! do while (needToRecv)
- do while (numToRecv > 0)
- call MPI_Recv(buffer, BUFSIZE, MPI_INTEGER, MPI_ANY_SOURCE, MPI_ANY_TAG, dminfo % comm, &
- MPI_STATUS_IGNORE, mpi_ierr)
- do j=bufStart, needEnd
- if (buffer(j-bufStart+1) /= -1) then
- ownerList(j) = buffer(j-bufStart+1)
- numToRecv = numToRecv - 1
- end if
- end do
-
-! needToRecv = .false.
-! do j=bufStart, needEnd
-! if (ownerList(j) == -1) then
-! needToRecv = .true.
-! exit
-! end if
-! end do
- end do
- bufStart = bufEnd + 1
end do
-
end if
+ nMesgSend = nMesgRecv
+ call MPI_Irecv(nMesgRecv, 1, MPI_INTEGER, recvNeighbor, i, dminfo % comm, mpi_rreq, mpi_ierr)
+ call MPI_Isend(nMesgSend, 1, MPI_INTEGER, sendNeighbor, i, dminfo % comm, mpi_sreq, mpi_ierr)
+ call MPI_Wait(mpi_rreq, MPI_STATUS_IGNORE, mpi_ierr)
+ call MPI_Wait(mpi_sreq, MPI_STATUS_IGNORE, mpi_ierr)
+ call MPI_Irecv(ownerListIn, nMesgRecv, MPI_INTEGER, recvNeighbor, i, dminfo % comm, mpi_rreq, mpi_ierr)
+ call MPI_Isend(ownerListOut, nMesgSend, MPI_INTEGER, sendNeighbor, i, dminfo % comm, mpi_sreq, mpi_ierr)
+ call MPI_Wait(mpi_rreq, MPI_STATUS_IGNORE, mpi_ierr)
+ call MPI_Wait(mpi_sreq, MPI_STATUS_IGNORE, mpi_ierr)
end do
- numToSend = 0
- do j=1,nNeededList
- if (ownerList(j) == dminfo % my_proc_id) numToSend = numToSend + 1
- end do
- if (numToSend > 0) then
- allocate(sendListPtr % next)
- sendListPtr => sendListPtr % next
- sendListPtr % procID = dminfo % my_proc_id
- sendListPtr % nlist = numToSend
- allocate(sendListPtr % list(numToSend))
- nullify(sendListPtr % next)
- kk = 1
-
- do j=1,nNeededList
- if (ownerList(j) == dminfo % my_proc_id) then
- l = 1
- u = nOwnedList
- k = (l+u)/2
- do while (u >= l)
- if (ownedListSorted(1,k) == neededList(j)) then
- sendListPtr % list(kk) = ownedListSorted(2,k)
- kk = kk + 1
- exit
- else if (ownedListSorted(1,k) < neededList(j)) then
- l = k + 1
- k = (l+u)/2
- else
- u = k - 1
- k = (l+u)/2
- end if
- end do
- end if
- end do
-
-! do j=1,nNeededList
-! do k=1,nOwnedList
-! if (ownedList(k) == neededList(j)) then
-!if (kk > numToSend) then
-! write(0,*) 'dminfo_get_owner_list: We are confused about how many to send to ourself.'
-! call dmpar_abort(dminfo)
-!end if
-! sendListPtr % list(kk) = k
-! kk = kk + 1
-! exit
-! end if
-! end do
-! end do
-if (kk /= numToSend + 1) then
- write(0,*) 'dminfo_get_owner_list: Seem to have fewer to send to self than we thought.',kk,numToSend+1
- call dmpar_abort(dminfo)
-end if
- end if
-
-
do i=0, dminfo % nprocs - 1
numToRecv = 0
do j=1,nNeededList
- if (ownerList(j) == i) numToRecv = numToRecv + 1
+ if (ownerListIn(j) == -i) numToRecv = numToRecv + 1
end do
if (numToRecv > 0) then
allocate(recvListPtr % next)
@@ -1044,23 +876,18 @@
nullify(recvListPtr % next)
kk = 1
do j=1,nNeededList
- if (ownerList(j) == i) then
-if (kk > numToRecv) then
- write(0,*) 'dminfo_get_owner_list: Seem to have more to receive than we thought.'
- call dmpar_abort(dminfo)
-end if
+ if (ownerListIn(j) == -i) then
recvListPtr % list(kk) = j
kk = kk + 1
end if
end do
-if (kk /= numToRecv + 1) then
- write(0,*) 'dminfo_get_owner_list: Seem to have fewer to receive than we thought.'
- call dmpar_abort(dminfo)
-end if
- end if
+ end if
end do
+ deallocate(ownerListIn)
+ deallocate(ownerListOut)
+
sendListPtr => sendList
sendList => sendList % next
deallocate(sendListPtr)
@@ -1069,7 +896,6 @@
recvList => recvList % next
deallocate(recvListPtr)
- call MPI_Barrier(dminfo % comm, mpi_ierr)
#else
allocate(recvList)
recvList % procID = dminfo % my_proc_id
</font>
</pre>