[Dart-dev] [3649] DART/trunk/models/cam/full_experiment/job_mpi.csh: From Kevin:

nancy at ucar.edu nancy at ucar.edu
Fri Nov 7 16:21:03 MST 2008


An HTML attachment was scrubbed...
URL: http://mailman.ucar.edu/pipermail/dart-dev/attachments/20081107/4dcd88ff/attachment.html
-------------- next part --------------
Modified: DART/trunk/models/cam/full_experiment/job_mpi.csh
===================================================================
--- DART/trunk/models/cam/full_experiment/job_mpi.csh	2008-11-07 23:19:55 UTC (rev 3648)
+++ DART/trunk/models/cam/full_experiment/job_mpi.csh	2008-11-07 23:21:03 UTC (rev 3649)
@@ -93,7 +93,7 @@
 #BSUB -o job_mpi.%J.log
 #BSUB -P xxxxxxxx
 #BSUB -q share
-#BSUB -W 0:30
+#BSUB -W 0:10
 #BSUB -n 1
 #BSUB -R "span[ptile=1]"
 #
@@ -146,12 +146,10 @@
 
    # Bluefire
    #set run_command = 'setenv TARGET_CPU_LIST "-1"; mpirun.lsf /usr/local/bin/launch '
-   # Wei's suggestion to fix ntbl windows problems
-   #set run_command = '/contrib/mpiruns/mpirun.lsf '
    # For multi-thread   
    set run_command = 'mpirun.lsf '
    # For single-thread (?)
-   #set run_command = ' '
+   # set run_command = ' '
 
 
 # Doesn't work with complicated bluefire run_command
@@ -299,22 +297,32 @@
 
 # Location of input observation files
 set obs_seq_root = ${CENTRALDIR}/obs_seq2006
+# More automatic way to find obs_seq.out files, if they're named consistently:
+# set month = $mo
+# if ($mo < 10) set month = 0$mo
+# set obs_seq_root = /ptmp/dart/Obs_sets/ACARS_24_ascii/${year}${month}/obs_seq${year}
 
 # DART source code directory trunk, and CAM interface location.
 set DARTDIR =     ~${user}/DART
 set DARTCAMDIR =   ${DARTDIR}/models/cam
 
 # The maximum number of processors that will be used by 
-# the $exp_#.script jobs spawned by this script.  
-# (FV core jobs may use less, depending on the domain decomposition)
-# (async = 2 jobs may need to use less, if memory is a constraint for
-#  having so many CAMs running on 1 node.  See LSB_PJL_TASK_GEOMETRY.
-#  On IBM Power5 parallel_cam = false will require using less than 16 procs/node 
-#  for FV1.9x2.5 and higher resol. )
-# ptile is the number of processors/node on this machine.  
-# It has no bearing on whether CAM is MPI or not, as long as filter is MPI.
-set max_num_procs = 80
-set ptile = 16
+#   the $exp_#.script jobs spawned by this script and
+# ptile is the number of processors/node to use.
+#   It has no bearing on whether CAM is MPI or not, as long as filter is MPI.
+#   FV core jobs may automatically use less, depending on the domain decomposition; 
+#      see keep_lev_blocks.
+#   async = 2 jobs may need to use ptile < #_procs/node, if memory is a constraint for
+#     having so many CAMs running on 1 node.  Also see LSB_PJL_TASK_GEOMETRY.
+#     This is not automatic.
+#     On IBM Power5 parallel_cam = false will require using < 28 (of 32) procs/node 
+#     for FV1.9x2.5 and higher resol. )
+# It's most efficient to run CAM single-threaded on at least as many processors
+#   as there are ensemble members.  The CAM start up is single threaded, and the
+#   forecasts (the multi-threaded part of CAM) are short.
+# For 80 members of FV1.9x2.5 on bluefire:
+set max_num_procs = 81
+set ptile = 27
 
 # accounting code used for batch jobs (if no accounting needed, you may need
 # to remove the -P lines in the script generation sections below.
@@ -907,7 +915,7 @@
    # Commented out to use real SSTs (in CAM_src)
    if ($obs_seq_1 == $obs_seq_first) then
       ${REMOVE} namelistin
-      ${LINK} ${CAM_src}/namelistin namelistin
+      ${COPY} ${CAM_src}/namelistin namelistin
       sleep 1 
    endif
    if (! -e namelistin ) then
@@ -1101,15 +1109,6 @@
    echo '        set from = filter_ic_new*[.0]$n'                                     >> ${job_i}
    echo "        set dest = ${out_full}/DART/filter_ic."'$from:e'                     >> ${job_i}
 
-   # stuff analyses into CAM initial files using 
-   # echo " " >> ${job_i}
-   # echo '      echo "$from >\! member "'                                              >> ${job_i}
-   # echo '      echo "caminput_${n}.nc >> member "'                                    >> ${job_i}
-   # echo "      ./trans_sv_pv "                                                        >> ${job_i}
-   # echo '      echo "stuffing analyses from $from into caminput_${n}.nc >> $MASTERLOG ' >> ${job_i}
-   # echo '      ls -l caminput_${n}.nc >> $MASTERLOG   '                               >> ${job_i}
-   # end stuffing 
-
    echo " " >> ${job_i}
    echo "        ${MOVE} "'$from $dest'                                               >> ${job_i}
    echo "        if (! "'$status'" == 0 ) then "                                      >> ${job_i}
@@ -1145,7 +1144,7 @@
 
    echo " "                                                                           >> ${job_i}
    echo '   if ( -e $CAMINPUT && ! -z $CAMINPUT) then '                               >> ${job_i}
-   echo "      ${MOVE} "'$CAMINPUT'" ${out_full}/CAM  "                               >> ${job_i}
+   echo "      ${COPY} "'$CAMINPUT'" ${out_full}/CAM  "                               >> ${job_i}
    echo '      if (! $status == 0 ) then '                                            >> ${job_i}
    echo '         echo "failed moving ${CENTRALDIR}/$CAMINPUT " '                     >> ${job_i}
    echo "         $KILLCOMMAND "                                                      >> ${job_i}
@@ -1159,7 +1158,7 @@
 
    echo " "                                                                           >> ${job_i}
    echo '   if ( -e $CLMINPUT && ! -z $CLMINPUT) then '                               >> ${job_i}
-   echo "      ${MOVE} "'$CLMINPUT'" ${out_full}/CLM "                                >> ${job_i}
+   echo "      ${COPY} "'$CLMINPUT'" ${out_full}/CLM "                                >> ${job_i}
    echo '      if (! $status == 0 ) then '                                            >> ${job_i}
    echo '         echo "failed moving ${CENTRALDIR}/$CLMINPUT " '                     >> ${job_i}
    echo "         $KILLCOMMAND "                                                      >> ${job_i}
@@ -1175,36 +1174,12 @@
    echo "   @ n++ "                                                                   >> ${job_i}
    echo "end "                                                                        >> ${job_i}
 
-# save the CLM initial files from intermediate times, for analyses in CAM initial file format
+# Now safe to move H{06,12,18,24} to out_full for later archiving use
+   echo 'foreach Hdir (`ls -d H[0-9]*`)'                                              >> ${job_i}
+   echo "   ${MOVE} "'$Hdir '"${out_full} "                                            >> ${job_i}
+   echo "end "                                                                        >> ${job_i}
    echo " "                                                                           >> ${job_i}
-   echo 'foreach clm (`ls clm_init_memb*.nc`) '                                       >> ${job_i}
-   echo '   if (! -z $clm) then '                                                     >> ${job_i}
-   echo "      ${MOVE} "'$clm'" ${out_full}"                                          >> ${job_i}
-   echo '      if (! $status == 0 ) then '                                            >> ${job_i}
-   echo '         echo "failed moving ${CENTRALDIR}/$clm " '                          >> ${job_i}
-   echo "         $KILLCOMMAND "                                                      >> ${job_i}
-   echo "      endif "                                                                >> ${job_i}
-   echo "   else "                                                                    >> ${job_i}
-   echo '      echo "failed moving ${CENTRALDIR}/$clm because of size 0" '            >> ${job_i}
-   echo "   endif "                                                                   >> ${job_i}
-   echo "end"                                                                         >> ${job_i}
-   echo " "                                                                           >> ${job_i}
-# save the CAM initial files from intermediate times, for analyses in CAM initial file format
-   echo " "                                                                           >> ${job_i}
-   echo 'foreach cam (`ls cam_init_memb*.nc`) '                                       >> ${job_i}
-   echo '   if (! -z $cam) then '                                                     >> ${job_i}
-   echo "      ${MOVE} "'$cam'" ${out_full}"                                          >> ${job_i}
-   echo '      if (! $status == 0 ) then '                                            >> ${job_i}
-   echo '         echo "failed moving ${CENTRALDIR}/$cam " '                          >> ${job_i}
-   echo "         $KILLCOMMAND "                                                      >> ${job_i}
-   echo "      endif "                                                                >> ${job_i}
-   echo "   else "                                                                    >> ${job_i}
-   echo '      echo "failed moving ${CENTRALDIR}/$cam because of size 0" '            >> ${job_i}
-   echo "   endif "                                                                   >> ${job_i}
-   echo "end"                                                                         >> ${job_i}
-   echo " "                                                                           >> ${job_i}
 
-
    echo "if (! -e times  && ! -e ${out_full}/CAM/caminput_1.nc) then  "               >> ${job_i}
    echo "   # There may have been no advance; "                                       >> ${job_i}
    echo "   # use the CAM_ics_1 as the CAM ics for this time  "                       >> ${job_i}
@@ -1241,10 +1216,7 @@
       echo "cd $out_prev "                                                             >> ${job_i}
 
       if (-e auto_diag2ms_LSF.csh) then
-#     Diagnostics file
-#     Need to do this before handling restarts because mean2cam_init needs clminput_1.nc
-#     Change CLM initial file strategy; they (*$i* vertions) are saved during model advance
-#        and should already be in $out_prev
+#     Diagnostics files
          echo "bsub < ../../auto_diag2ms_LSF.csh  >>& " ' $MASTERLOG '                 >> ${job_i}
          echo 'echo "job '$i'; Backing up diagnostics '${out_prev}' >> $MASTERLOG"'    >> ${job_i}
          echo 'echo "    to mass store in separate batch job"  >> $MASTERLOG  '        >> ${job_i}
@@ -1288,7 +1260,7 @@
    echo "${MOVE} ${job_i}            ${out_full} "                           >> ${job_i}
    echo "${MOVE} cam_out_temp1       ${out_full} "                           >> ${job_i}
    echo "${MOVE} namelist            ${exp}"                                 >> ${job_i}
-   echo "${REMOVE} cam_out_temp* *_ud* *_ic[0-9]* *_ic_old* "                >> ${job_i}
+   echo "${REMOVE} cam_out_temp* *_ud.* *_ic[0-9]* *_ic_old* "                >> ${job_i}
 # It's really stupid that this doesn't work on blueX, even with nonomatch
 #   echo "set nonomatch"                                                     >> ${job_i}
 #   if ($i != $obs_seq_first) then
@@ -1297,9 +1269,9 @@
 #      echo "if (-e ${exp}_${j}.*.log) ${MOVE} ${exp}_${j}.*.log  ${out_prev} " >> ${job_i}
 #   endif
 # SO,
-   echo "ls -1 ${exp}_${j}.*.log > logs"                                         >> ${job_i}
-   echo "set num_logs = `wc -l logs`"                                            >> ${job_i}
-   echo 'if ($num_logs[1] > 0)' "${MOVE} ${exp}_${j}.*.log  ${out_prev} "        >> ${job_i}
+   echo "ls -1 ${exp}_${j}"'.*.log >! logs'                                      >> ${job_i}
+   echo 'set num_logs = `wc -l logs`'                                            >> ${job_i}
+   echo 'if ($num_logs[1] > 0)' "${MOVE} ${exp}_${j}"'.*.log'"  ${out_prev} "    >> ${job_i}
    echo "rm logs"                                                                >> ${job_i}
 # END of trying to move log files.  Phew.
 


More information about the Dart-dev mailing list