[Dart-dev] [5994] DART/branches/development/ensemble_manager: Added options to both transpose routines to do the transpose loops in either order (loop around sending_pe, loop around recv_pe)

nancy at ucar.edu nancy at ucar.edu
Tue Mar 12 14:50:18 MDT 2013


Revision: 5994
Author:   hkershaw
Date:     2013-03-12 14:50:17 -0600 (Tue, 12 Mar 2013)
Log Message:
-----------
Added options to both transpose routines to do the transpose loops in either order (loop around sending_pe, loop around recv_pe)
Added namelist options to select which loop order to use

Modified Paths:
--------------
    DART/branches/development/ensemble_manager/ensemble_manager_mod.f90
    DART/branches/development/ensemble_manager/ensemble_manager_mod.nml

-------------- next part --------------
Modified: DART/branches/development/ensemble_manager/ensemble_manager_mod.f90
===================================================================
--- DART/branches/development/ensemble_manager/ensemble_manager_mod.f90	2013-03-11 22:35:47 UTC (rev 5993)
+++ DART/branches/development/ensemble_manager/ensemble_manager_mod.f90	2013-03-12 20:50:17 UTC (rev 5994)
@@ -86,11 +86,15 @@
 logical  :: single_restart_file_out = .true.
 ! Size of perturbations for creating ensembles when model won't do it
 real(r8) :: perturbation_amplitude  = 0.2_r8
+!HK options to change order of communiation loops
+logical  :: use_copy2var_send_loop = .true.
+logical  :: use_var2copy_rec_loop = .true.
 
 namelist / ensemble_manager_nml / single_restart_file_in,  &
                                   single_restart_file_out, &
-                                  perturbation_amplitude
-
+                                  perturbation_amplitude,  &
+                                  use_copy2var_send_loop,  &
+                                  use_var2copy_rec_loop
 !-----------------------------------------------------------------
 
 contains
@@ -842,6 +846,9 @@
 ! updated version of the original routine.  here, all tasks send
 ! while 1 receives.  original was all tasks receiving while 1 sends.
 ! apparently the sends can overlap and the execution time is less.
+! HK Added namelist option (use_var2copy_rec_loop) to select updated
+! or original version of the routine. 
+!   Default: use updated version
 
 type (ensemble_type), intent(inout)        :: ens_handle
 character (len=*),    intent(in), optional :: label
@@ -850,7 +857,7 @@
 real(r8), allocatable :: transfer_temp(:)
 integer               :: num_copies, num_vars, my_num_vars, my_num_copies
 integer               :: max_num_vars, max_num_copies, num_copies_to_receive
-integer               :: sending_pe, recv_pe, k, sv, num_vars_to_send
+integer               :: sending_pe, recv_pe, k, sv, num_vars_to_send, copy
 integer               :: global_ens_index
 
 ! only output if there is a label
@@ -879,52 +886,106 @@
 allocate(var_list(max_num_vars), transfer_temp(max_num_vars), &
    copy_list(max_num_copies))
 
-! Loop to give each pe a turn to receive its copies
-RECEIVING_PE_LOOP: do recv_pe = 0, num_pes - 1
-   ! If I'm the receiving pe, do this block
-   if(my_pe == recv_pe) then
+if ( use_var2copy_rec_loop .eqv. .true. ) then ! use updated version
 
-      ! Figure out what piece to receive from each other PE and receive it
-      RECEIVE_FROM_EACH: do sending_pe = 0, num_pes - 1
-         call get_copy_list(num_copies, sending_pe, copy_list, num_copies_to_receive)
+  ! Loop to give each pe a turn to receive its copies
+  RECEIVING_PE_LOOP: do recv_pe = 0, num_pes - 1
+     ! If I'm the receiving pe, do this block
+     if(my_pe == recv_pe) then
 
-         ! Loop to receive for each copy stored on my_pe
-         ALL_MY_COPIES: do k = 1, num_copies_to_receive 
+        ! Figure out what piece to receive from each other PE and receive it
+        RECEIVE_FROM_EACH: do sending_pe = 0, num_pes - 1
+           call get_copy_list(num_copies, sending_pe, copy_list, num_copies_to_receive)
 
-            global_ens_index = copy_list(k)
+           ! Loop to receive for each copy stored on my_pe
+           ALL_MY_COPIES: do k = 1, num_copies_to_receive
 
-            ! If sending_pe is receiving_pe, just copy
-            if(sending_pe == recv_pe) then
-               do sv = 1, my_num_vars
-                  ens_handle%copies(global_ens_index, sv) = ens_handle%vars(ens_handle%my_vars(sv), k)
-               end do
-            else
-               if (num_copies_to_receive > 0) then
-                  ! Otherwise, receive this part from the sending pe
-                  call receive_from(sending_pe, transfer_temp(1:my_num_vars)) 
+              global_ens_index = copy_list(k)
+
+              ! If sending_pe is receiving_pe, just copy
+              if(sending_pe == recv_pe) then
+                 do sv = 1, my_num_vars
+                    ens_handle%copies(global_ens_index, sv) = ens_handle%vars(ens_handle%my_vars(sv), k)
+                 end do
+              else
+                 if (num_copies_to_receive > 0) then
+                    ! Otherwise, receive this part from the sending pe
+                    call receive_from(sending_pe, transfer_temp(1:my_num_vars))
    
-                  ! Copy the transfer array to my local storage
-                  ens_handle%copies(global_ens_index, :) = transfer_temp(1:my_num_vars)
-               endif
-            endif
-         end do ALL_MY_COPIES
-      end do RECEIVE_FROM_EACH
-   else
-      ! I'm the sending PE, figure out what vars of my copies I'll send.
-      call get_var_list(num_vars, recv_pe, var_list, num_vars_to_send)
+                    ! Copy the transfer array to my local storage
+                    ens_handle%copies(global_ens_index, :) = transfer_temp(1:my_num_vars)
+                 endif
+              endif
+           end do ALL_MY_COPIES
+        end do RECEIVE_FROM_EACH
+     else
+        ! I'm the sending PE, figure out what vars of my copies I'll send.
+        call get_var_list(num_vars, recv_pe, var_list, num_vars_to_send)
        
-      do k = 1, my_num_copies
-         do sv = 1, num_vars_to_send
-            ! Have to use temp because %var section is not contiguous storage
-            transfer_temp(sv) = ens_handle%vars(var_list(sv), k)
-         enddo
-         call send_to(recv_pe, transfer_temp(1:num_vars_to_send))
-      end do
+        do k = 1, my_num_copies
+           do sv = 1, num_vars_to_send
+              ! Have to use temp because %var section is not contiguous storage
+              transfer_temp(sv) = ens_handle%vars(var_list(sv), k)
+           enddo
+           call send_to(recv_pe, transfer_temp(1:num_vars_to_send))
+        end do
       
-   endif
-end do RECEIVING_PE_LOOP
+     endif
+  end do RECEIVING_PE_LOOP
 
+else ! use older version
 
+  ! Loop to give each pe a turn to send its vars
+  SENDING_PE_LOOP: do sending_pe = 0, num_pes - 1
+    ! If I'm the sending pe, do this block
+    if(my_pe == sending_pe) then
+         ! Figure out what piece to send to each other PE and send it
+         SEND_TO_EACH: do recv_pe = 0, num_pes - 1
+           call get_var_list(num_vars, recv_pe, var_list, num_vars_to_send)
+
+           if (num_vars_to_send > 0) then
+             ! Loop to send these vars for each copy stored on my_pe
+             ALL_MY_COPIES_SEND_LOOP: do k = 1, my_num_copies
+
+                ! Fill up the transfer array
+                do sv = 1, num_vars_to_send
+                  transfer_temp(sv) = ens_handle%vars(var_list(sv), k)
+                end do
+
+               ! If sending_pe is receiving_pe, just copy
+               if(sending_pe == recv_pe) then
+                 global_ens_index = ens_handle%my_copies(k)
+                 ens_handle%copies(global_ens_index, :) = transfer_temp(1:num_vars_to_send)
+               else
+                 ! Otherwise, ship this off
+                 call send_to(recv_pe, transfer_temp(1:num_vars_to_send))
+               endif
+             end do ALL_MY_COPIES_SEND_LOOP
+           endif
+         end do SEND_TO_EACH
+
+    else
+       ! I'm not the sending PE, figure out what copies of my vars I'll receive from sending_pe
+        call get_copy_list(num_copies, sending_pe, copy_list, num_copies_to_receive)
+
+        do copy = 1, num_copies_to_receive
+          if (my_num_vars > 0) then
+            ! Have to  use temp because %copies section is not contiguous storage
+            call receive_from(sending_pe, transfer_temp(1:my_num_vars))
+            ! Figure out which global ensemble member this is
+            global_ens_index = copy_list(copy)
+            ! Store this chunk in my local storage
+            ens_handle%copies(global_ens_index, :) = transfer_temp(1:my_num_vars)
+         endif
+        end do
+
+    endif
+
+  end do SENDING_PE_LOOP
+
+endif
+
+
 ! Free up the temporary storage
 deallocate(var_list, transfer_temp, copy_list)
 
@@ -952,6 +1013,9 @@
 integer               :: sending_pe, recv_pe, k, sv, copy, num_copies_to_send
 integer               :: global_ens_index
 
+!HK loop limit 
+integer               :: send_limit
+
 ! only output if there is a label
 if (present(label)) then
    call timestamp_message('copies_to_vars start: '//label, alltasks=.true.)
@@ -978,6 +1042,85 @@
 allocate(var_list(max_num_vars), transfer_temp(max_num_vars), &
    copy_list(max_num_copies))
 
+
+if (use_copy2var_send_loop .eqv. .true. ) then
+!HK Switched loop index from receiving_pe to sending_pe
+! Aim: to make the commication scale better on Yellowstone, as num_pes >> ens_size
+! For small numbers of tasks (32 or less) the recieving_pe loop may be faster.
+! Namelist option use_copy2var_send_loop can be used to select which
+! communication pattern to use
+!    Default: use sending_pe loop (use_copy2var_send_loop = .true.)
+
+SEND_LOOP: do sending_pe = 0, num_pes - 1
+
+     if (my_task_id() /= sending_pe ) then  
+
+        ! figure out what piece to recieve from each other PE and recieve it
+        !    note:  num_vars_to_recive is 0 if I do not have an enemsble member
+        call get_var_list(num_vars, sending_pe, var_list, num_vars_to_receive)
+
+        ! Loop to receive these vars for each copy stored on my_pe
+        ALL_MY_COPIES_SEND_LOOP: do k = 1, my_num_copies
+
+          call receive_from(sending_pe, transfer_temp(1:num_vars_to_receive))
+
+          ! Copy the transfer array to my local storage
+          do sv = 1, num_vars_to_receive
+             ens_handle%vars(var_list(sv), k) = transfer_temp(sv)
+          enddo
+
+        enddo ALL_MY_COPIES_SEND_LOOP
+
+     endif
+
+     if (my_task_id() == sending_pe) then
+   
+         do recv_pe = 0, num_pes - 1 
+
+           ! I'm the sending PE, figure out what copies of my vars I'll send
+           call get_copy_list(num_copies, recv_pe, copy_list, num_copies_to_send)
+
+           send_copies: do copy = 1, num_copies_to_send
+
+               if (my_task_id() /= recv_pe ) then 
+ 
+                  if (my_num_vars > 0) then
+
+                   transfer_temp(1:my_num_vars) = ens_handle%copies(copy_list(copy), :)
+
+                  ! Have to  use temp because %copies section is not contiguous storage
+                   call send_to(recv_pe, transfer_temp(1:my_num_vars))
+                  endif
+       
+               else
+
+                ! figure out what piece to recieve from myself and recieve it
+                call get_var_list(num_vars, sending_pe, var_list, num_vars_to_receive)
+
+                  do k = 1,  my_num_copies
+
+                    ! sending to yourself so just copy
+                    global_ens_index = ens_handle%my_copies(k)
+                    do sv = 1, num_vars_to_receive
+                      ens_handle%vars(var_list(sv), k) = ens_handle%copies(global_ens_index, sv)
+                    end do
+
+                 enddo
+
+               endif
+
+           enddo send_copies
+
+        enddo
+
+      endif
+
+
+ enddo SEND_LOOP
+
+else ! use old communication pattern
+
+
 ! Loop to give each pe a turn to receive its vars
 RECEIVING_PE_LOOP: do recv_pe = 0, num_pes - 1
    ! If I'm the receiving pe, do this block
@@ -1024,6 +1167,9 @@
    endif
 end do RECEIVING_PE_LOOP
 
+endif
+
+
 ! Free up the temporary storage
 deallocate(var_list, transfer_temp, copy_list)
 
@@ -1060,7 +1206,6 @@
 !--------------------------------------------------------------------------------
 
 subroutine compute_copy_mean_sd(ens_handle, start_copy, end_copy, mean_copy, sd_copy)
-
 ! Assumes that ens_handle%copies is current; each pe has all copies of subset of vars
 ! Computes the mean and sd of ensemble copies start_copy:end_copy and stores
 ! mean in copy mean_copy and sd in copy sd_copy.

Modified: DART/branches/development/ensemble_manager/ensemble_manager_mod.nml
===================================================================
--- DART/branches/development/ensemble_manager/ensemble_manager_mod.nml	2013-03-11 22:35:47 UTC (rev 5993)
+++ DART/branches/development/ensemble_manager/ensemble_manager_mod.nml	2013-03-12 20:50:17 UTC (rev 5994)
@@ -1,5 +1,8 @@
 &ensemble_manager_nml
    single_restart_file_in  = .true.,
    single_restart_file_out = .true.,
-   perturbation_amplitude  = 0.2  /
+   perturbation_amplitude  = 0.2,
+   use_copy2var_send_loop = .true.,
+   use_var2copy_rec_loop = .true. 
+ /
 


More information about the Dart-dev mailing list