[Dart-dev] [5994] DART/branches/development/ensemble_manager: Added options to both transpose routines to do the transpose loops in either order (loop around sending_pe, loop around recv_pe)
nancy at ucar.edu
nancy at ucar.edu
Tue Mar 12 14:50:18 MDT 2013
Revision: 5994
Author: hkershaw
Date: 2013-03-12 14:50:17 -0600 (Tue, 12 Mar 2013)
Log Message:
-----------
Added options to both transpose routines to do the transpose loops in either order (loop around sending_pe, loop around recv_pe)
Added namelist options to select which loop order to use
Modified Paths:
--------------
DART/branches/development/ensemble_manager/ensemble_manager_mod.f90
DART/branches/development/ensemble_manager/ensemble_manager_mod.nml
-------------- next part --------------
Modified: DART/branches/development/ensemble_manager/ensemble_manager_mod.f90
===================================================================
--- DART/branches/development/ensemble_manager/ensemble_manager_mod.f90 2013-03-11 22:35:47 UTC (rev 5993)
+++ DART/branches/development/ensemble_manager/ensemble_manager_mod.f90 2013-03-12 20:50:17 UTC (rev 5994)
@@ -86,11 +86,15 @@
logical :: single_restart_file_out = .true.
! Size of perturbations for creating ensembles when model won't do it
real(r8) :: perturbation_amplitude = 0.2_r8
+!HK options to change order of communiation loops
+logical :: use_copy2var_send_loop = .true.
+logical :: use_var2copy_rec_loop = .true.
namelist / ensemble_manager_nml / single_restart_file_in, &
single_restart_file_out, &
- perturbation_amplitude
-
+ perturbation_amplitude, &
+ use_copy2var_send_loop, &
+ use_var2copy_rec_loop
!-----------------------------------------------------------------
contains
@@ -842,6 +846,9 @@
! updated version of the original routine. here, all tasks send
! while 1 receives. original was all tasks receiving while 1 sends.
! apparently the sends can overlap and the execution time is less.
+! HK Added namelist option (use_var2copy_rec_loop) to select updated
+! or original version of the routine.
+! Default: use updated version
type (ensemble_type), intent(inout) :: ens_handle
character (len=*), intent(in), optional :: label
@@ -850,7 +857,7 @@
real(r8), allocatable :: transfer_temp(:)
integer :: num_copies, num_vars, my_num_vars, my_num_copies
integer :: max_num_vars, max_num_copies, num_copies_to_receive
-integer :: sending_pe, recv_pe, k, sv, num_vars_to_send
+integer :: sending_pe, recv_pe, k, sv, num_vars_to_send, copy
integer :: global_ens_index
! only output if there is a label
@@ -879,52 +886,106 @@
allocate(var_list(max_num_vars), transfer_temp(max_num_vars), &
copy_list(max_num_copies))
-! Loop to give each pe a turn to receive its copies
-RECEIVING_PE_LOOP: do recv_pe = 0, num_pes - 1
- ! If I'm the receiving pe, do this block
- if(my_pe == recv_pe) then
+if ( use_var2copy_rec_loop .eqv. .true. ) then ! use updated version
- ! Figure out what piece to receive from each other PE and receive it
- RECEIVE_FROM_EACH: do sending_pe = 0, num_pes - 1
- call get_copy_list(num_copies, sending_pe, copy_list, num_copies_to_receive)
+ ! Loop to give each pe a turn to receive its copies
+ RECEIVING_PE_LOOP: do recv_pe = 0, num_pes - 1
+ ! If I'm the receiving pe, do this block
+ if(my_pe == recv_pe) then
- ! Loop to receive for each copy stored on my_pe
- ALL_MY_COPIES: do k = 1, num_copies_to_receive
+ ! Figure out what piece to receive from each other PE and receive it
+ RECEIVE_FROM_EACH: do sending_pe = 0, num_pes - 1
+ call get_copy_list(num_copies, sending_pe, copy_list, num_copies_to_receive)
- global_ens_index = copy_list(k)
+ ! Loop to receive for each copy stored on my_pe
+ ALL_MY_COPIES: do k = 1, num_copies_to_receive
- ! If sending_pe is receiving_pe, just copy
- if(sending_pe == recv_pe) then
- do sv = 1, my_num_vars
- ens_handle%copies(global_ens_index, sv) = ens_handle%vars(ens_handle%my_vars(sv), k)
- end do
- else
- if (num_copies_to_receive > 0) then
- ! Otherwise, receive this part from the sending pe
- call receive_from(sending_pe, transfer_temp(1:my_num_vars))
+ global_ens_index = copy_list(k)
+
+ ! If sending_pe is receiving_pe, just copy
+ if(sending_pe == recv_pe) then
+ do sv = 1, my_num_vars
+ ens_handle%copies(global_ens_index, sv) = ens_handle%vars(ens_handle%my_vars(sv), k)
+ end do
+ else
+ if (num_copies_to_receive > 0) then
+ ! Otherwise, receive this part from the sending pe
+ call receive_from(sending_pe, transfer_temp(1:my_num_vars))
- ! Copy the transfer array to my local storage
- ens_handle%copies(global_ens_index, :) = transfer_temp(1:my_num_vars)
- endif
- endif
- end do ALL_MY_COPIES
- end do RECEIVE_FROM_EACH
- else
- ! I'm the sending PE, figure out what vars of my copies I'll send.
- call get_var_list(num_vars, recv_pe, var_list, num_vars_to_send)
+ ! Copy the transfer array to my local storage
+ ens_handle%copies(global_ens_index, :) = transfer_temp(1:my_num_vars)
+ endif
+ endif
+ end do ALL_MY_COPIES
+ end do RECEIVE_FROM_EACH
+ else
+ ! I'm the sending PE, figure out what vars of my copies I'll send.
+ call get_var_list(num_vars, recv_pe, var_list, num_vars_to_send)
- do k = 1, my_num_copies
- do sv = 1, num_vars_to_send
- ! Have to use temp because %var section is not contiguous storage
- transfer_temp(sv) = ens_handle%vars(var_list(sv), k)
- enddo
- call send_to(recv_pe, transfer_temp(1:num_vars_to_send))
- end do
+ do k = 1, my_num_copies
+ do sv = 1, num_vars_to_send
+ ! Have to use temp because %var section is not contiguous storage
+ transfer_temp(sv) = ens_handle%vars(var_list(sv), k)
+ enddo
+ call send_to(recv_pe, transfer_temp(1:num_vars_to_send))
+ end do
- endif
-end do RECEIVING_PE_LOOP
+ endif
+ end do RECEIVING_PE_LOOP
+else ! use older version
+ ! Loop to give each pe a turn to send its vars
+ SENDING_PE_LOOP: do sending_pe = 0, num_pes - 1
+ ! If I'm the sending pe, do this block
+ if(my_pe == sending_pe) then
+ ! Figure out what piece to send to each other PE and send it
+ SEND_TO_EACH: do recv_pe = 0, num_pes - 1
+ call get_var_list(num_vars, recv_pe, var_list, num_vars_to_send)
+
+ if (num_vars_to_send > 0) then
+ ! Loop to send these vars for each copy stored on my_pe
+ ALL_MY_COPIES_SEND_LOOP: do k = 1, my_num_copies
+
+ ! Fill up the transfer array
+ do sv = 1, num_vars_to_send
+ transfer_temp(sv) = ens_handle%vars(var_list(sv), k)
+ end do
+
+ ! If sending_pe is receiving_pe, just copy
+ if(sending_pe == recv_pe) then
+ global_ens_index = ens_handle%my_copies(k)
+ ens_handle%copies(global_ens_index, :) = transfer_temp(1:num_vars_to_send)
+ else
+ ! Otherwise, ship this off
+ call send_to(recv_pe, transfer_temp(1:num_vars_to_send))
+ endif
+ end do ALL_MY_COPIES_SEND_LOOP
+ endif
+ end do SEND_TO_EACH
+
+ else
+ ! I'm not the sending PE, figure out what copies of my vars I'll receive from sending_pe
+ call get_copy_list(num_copies, sending_pe, copy_list, num_copies_to_receive)
+
+ do copy = 1, num_copies_to_receive
+ if (my_num_vars > 0) then
+ ! Have to use temp because %copies section is not contiguous storage
+ call receive_from(sending_pe, transfer_temp(1:my_num_vars))
+ ! Figure out which global ensemble member this is
+ global_ens_index = copy_list(copy)
+ ! Store this chunk in my local storage
+ ens_handle%copies(global_ens_index, :) = transfer_temp(1:my_num_vars)
+ endif
+ end do
+
+ endif
+
+ end do SENDING_PE_LOOP
+
+endif
+
+
! Free up the temporary storage
deallocate(var_list, transfer_temp, copy_list)
@@ -952,6 +1013,9 @@
integer :: sending_pe, recv_pe, k, sv, copy, num_copies_to_send
integer :: global_ens_index
+!HK loop limit
+integer :: send_limit
+
! only output if there is a label
if (present(label)) then
call timestamp_message('copies_to_vars start: '//label, alltasks=.true.)
@@ -978,6 +1042,85 @@
allocate(var_list(max_num_vars), transfer_temp(max_num_vars), &
copy_list(max_num_copies))
+
+if (use_copy2var_send_loop .eqv. .true. ) then
+!HK Switched loop index from receiving_pe to sending_pe
+! Aim: to make the commication scale better on Yellowstone, as num_pes >> ens_size
+! For small numbers of tasks (32 or less) the recieving_pe loop may be faster.
+! Namelist option use_copy2var_send_loop can be used to select which
+! communication pattern to use
+! Default: use sending_pe loop (use_copy2var_send_loop = .true.)
+
+SEND_LOOP: do sending_pe = 0, num_pes - 1
+
+ if (my_task_id() /= sending_pe ) then
+
+ ! figure out what piece to recieve from each other PE and recieve it
+ ! note: num_vars_to_recive is 0 if I do not have an enemsble member
+ call get_var_list(num_vars, sending_pe, var_list, num_vars_to_receive)
+
+ ! Loop to receive these vars for each copy stored on my_pe
+ ALL_MY_COPIES_SEND_LOOP: do k = 1, my_num_copies
+
+ call receive_from(sending_pe, transfer_temp(1:num_vars_to_receive))
+
+ ! Copy the transfer array to my local storage
+ do sv = 1, num_vars_to_receive
+ ens_handle%vars(var_list(sv), k) = transfer_temp(sv)
+ enddo
+
+ enddo ALL_MY_COPIES_SEND_LOOP
+
+ endif
+
+ if (my_task_id() == sending_pe) then
+
+ do recv_pe = 0, num_pes - 1
+
+ ! I'm the sending PE, figure out what copies of my vars I'll send
+ call get_copy_list(num_copies, recv_pe, copy_list, num_copies_to_send)
+
+ send_copies: do copy = 1, num_copies_to_send
+
+ if (my_task_id() /= recv_pe ) then
+
+ if (my_num_vars > 0) then
+
+ transfer_temp(1:my_num_vars) = ens_handle%copies(copy_list(copy), :)
+
+ ! Have to use temp because %copies section is not contiguous storage
+ call send_to(recv_pe, transfer_temp(1:my_num_vars))
+ endif
+
+ else
+
+ ! figure out what piece to recieve from myself and recieve it
+ call get_var_list(num_vars, sending_pe, var_list, num_vars_to_receive)
+
+ do k = 1, my_num_copies
+
+ ! sending to yourself so just copy
+ global_ens_index = ens_handle%my_copies(k)
+ do sv = 1, num_vars_to_receive
+ ens_handle%vars(var_list(sv), k) = ens_handle%copies(global_ens_index, sv)
+ end do
+
+ enddo
+
+ endif
+
+ enddo send_copies
+
+ enddo
+
+ endif
+
+
+ enddo SEND_LOOP
+
+else ! use old communication pattern
+
+
! Loop to give each pe a turn to receive its vars
RECEIVING_PE_LOOP: do recv_pe = 0, num_pes - 1
! If I'm the receiving pe, do this block
@@ -1024,6 +1167,9 @@
endif
end do RECEIVING_PE_LOOP
+endif
+
+
! Free up the temporary storage
deallocate(var_list, transfer_temp, copy_list)
@@ -1060,7 +1206,6 @@
!--------------------------------------------------------------------------------
subroutine compute_copy_mean_sd(ens_handle, start_copy, end_copy, mean_copy, sd_copy)
-
! Assumes that ens_handle%copies is current; each pe has all copies of subset of vars
! Computes the mean and sd of ensemble copies start_copy:end_copy and stores
! mean in copy mean_copy and sd in copy sd_copy.
Modified: DART/branches/development/ensemble_manager/ensemble_manager_mod.nml
===================================================================
--- DART/branches/development/ensemble_manager/ensemble_manager_mod.nml 2013-03-11 22:35:47 UTC (rev 5993)
+++ DART/branches/development/ensemble_manager/ensemble_manager_mod.nml 2013-03-12 20:50:17 UTC (rev 5994)
@@ -1,5 +1,8 @@
&ensemble_manager_nml
single_restart_file_in = .true.,
single_restart_file_out = .true.,
- perturbation_amplitude = 0.2 /
+ perturbation_amplitude = 0.2,
+ use_copy2var_send_loop = .true.,
+ use_var2copy_rec_loop = .true.
+ /
More information about the Dart-dev
mailing list