[Dart-dev] [3445] DART/trunk/models/cam/full_experiment/job_mpi.csh:
Many updates to the main job script from kevin:
nancy at ucar.edu
nancy at ucar.edu
Tue Jul 8 12:49:16 MDT 2008
An HTML attachment was scrubbed...
URL: http://mailman.ucar.edu/pipermail/dart-dev/attachments/20080708/9a3f6cd8/attachment-0001.html
-------------- next part --------------
Modified: DART/trunk/models/cam/full_experiment/job_mpi.csh
===================================================================
--- DART/trunk/models/cam/full_experiment/job_mpi.csh 2008-07-08 18:46:42 UTC (rev 3444)
+++ DART/trunk/models/cam/full_experiment/job_mpi.csh 2008-07-08 18:49:16 UTC (rev 3445)
@@ -79,6 +79,13 @@
# excluded; cr0139en
# echo $nodelist
+# Better method for excluding a node(s?)
+# #BSUB -R "select[hname != bl0605en]"
+# So set a wordlist here, and use it below (differently than nodelist was used)
+# set exclude_nodes = (be0512en)
+# echo "exclude_nodes = " $exclude_nodes
+
+
# -W hr:mn max wallclock time (required on some systems)
# -b [[mm:]dd:]hh:mm allow job to run only after this time.
##=============================================================================
@@ -133,15 +140,28 @@
set CENTRALDIR = $LS_SUBCWD
set JOBNAME = $LSB_JOBNAME
-# for multi-thread
+ # Variable to abort further assimilations if too many archiving jobs are
+ # waiting in the queue, meaning that /ptmp is filling up.
+ set max_pend_archive = 5
+
+ # Bluefire
+ #set run_command = 'export TARGET_CPU_LIST="-1"; mpirun.lsf /usr/local/bin/launch '
+ # Wei's suggestion to fix ntbl windows problems
+ #set run_command = '/contrib/mpiruns/mpirun.lsf '
+ # For multi-thread
set run_command = 'mpirun.lsf '
-# for single-thread (?)
-# set run_command = ' '
- which $run_command
- if ($status != 0 && $run_command != ' ') then
- exit "run_command $run_command not found"
- endif
-# NOT USED on blueice
+ # For single-thread (?)
+ #set run_command = ' '
+
+
+# Doesn't work with complicated bluefire run_command
+ if ($run_command != ' ') then
+ which $run_command
+ if ($status != 0 ) then
+ exit "run_command $run_command not found"
+ endif
+ endif
+
alias submit ' bsub < \!* '
else if ($?PBS_O_WORKDIR) then
@@ -221,12 +241,13 @@
echo "Initialized at "`date`
echo "CENTRALDIR is " $CENTRALDIR
-#========================================================================================
+#===============================================================================
# User set run parameters to change
# Directory where output will be kept (relative to '.')
set resol = FV1.9x2.5
+# If true, the directory where the CAM executable lives should end with '-mpi'
set parallel_cam = false
# Change this for each new experiment.
@@ -251,31 +272,33 @@
# freq < -15 look for the first of each month
set obs_seq_freq = 1
-# 'day'/obs_seq.out numbers to assimilate during this job
-# First and last obs seq files.
+# If there is a currently running job and the first batch of the new
+# jobs should wait for it, set this to true. If all jobs have exited
+# the queue, or this is the very first run of an experiment, set this to false.
+# (It makes the job submission depend/wait for the previously numbered
+# job to exit before starting.)
+
+set obs_seq_1_depend = false
+
+# 'day'/obs_seq.out numbers to assimilate during this batch of jobs.
+# First and last obs seq files for this run.
set obs_seq_1 = 1
set obs_seq_n = 7
-# if obs_seq_1 above is the very first obs_seq file for an entire
-# experiment, set the depend line below 'false'. if obs_seq_1 is
-# the sequence number of a new batch of jobs but is really part of
-# a longer series of seq files (e.g. the entire job is seq numbers
-# 1-30, but this script is only going to queue up jobs 6-12)
-# then it to 'true'. it controls whether it is going to clean out
-# old cam/clm input files, and whether it is going to queue up the
-# job to depend on the previous job step completing.
-set obs_seq_1_depend = false
-
-# The month of the obs_seq.out files for this run, and
-# the month of the first obs_seq.out file for this experiment.
-# This will be a misnomer for the spin-up run that has obs_seq files
+# Month number of first obs_seq.out of entire experiment.
+# All other runs will be counted from here.
+set mo_first = 7
+# The month and year of the obs_seq.out files for this batch of obs_seq.out files.
+set mo = 7
+set year = 2007
+# These can be used differently for different values of obs_seq_freq,
+# as when doing a long spin-up run that has obs_seq files
# only at the first day of the month; the 'days' will refer to months
# and this mo can be thought of as the year (2001).
-set mo = 1
-set mo_first = 1
+
# Location of input observation files
-set obs_seq_root = ${CENTRALDIR}/obs_seq2003
+set obs_seq_root = ${CENTRALDIR}/obs_seq2006
# DART source code directory trunk, and CAM interface location.
set DARTDIR = ~${user}/DART
@@ -284,6 +307,10 @@
# The maximum number of processors that will be used by
# the $exp_#.script jobs spawned by this script.
# (FV core jobs may use less, depending on the domain decomposition)
+# (async = 2 jobs may need to use less, if memory is a constraint for
+# having so many CAMs running on 1 node. See LSB_PJL_TASK_GEOMETRY.
+# On IBM Power5 parallel_cam = false will require using less than 16 procs/node
+# for FV1.9x2.5 and higher resol. )
# ptile is the number of processors/node on this machine.
# It has no bearing on whether CAM is MPI or not, as long as filter is MPI.
set max_num_procs = 80
@@ -361,11 +388,14 @@
# set DART_ics_1 = ${CENTRALDIR}
# set CAM_ics_1 = ${CENTRALDIR}/caminput_
# set CLM_ics_1 = ${CENTRALDIR}/clminput_
- set DART_ics_1 = /ptmp/raeder/CAM_init/FV1.9x2.5_cam3.5/Jan_1/DART_MPI
- set CAM_ics_1 = /ptmp/raeder/CAM_init/FV1.9x2.5_cam3.5/Jan_1/CAM/caminput_
- set CLM_ics_1 = /ptmp/raeder/CAM_init/FV1.9x2.5_cam3.5/Jan_1/CLM/clminput_
- # -mpi will be attached to CAM_src if parallel_cam = true; don't add it here
- set CAM_src = /home/coral/raeder/Cam3/cam3.5.06/models/atm/cam/bld/FV1.9x2.5-O1
+ set DART_ics_1 = /ptmp/dart/CAM_init/FV1.9x2.5_cam3.5/Jul_1/DART_MPI
+ set CAM_ics_1 = /ptmp/dart/CAM_init/FV1.9x2.5_cam3.5/Jul_1/CAM/caminput_
+ set CLM_ics_1 = /ptmp/dart/CAM_init/FV1.9x2.5_cam3.5/Jul_1/CLM/clminput_
+ # -mpi will be attached to this name if parallel_cam = true; don't add it here
+ # set CAM_src = /blhome/raeder/Cam3/cam3.5/models/atm/cam/bld/FV1.9x2.5_ALT_PFT-O3
+ set CAM_src = /blhome/raeder/Cam3/cam3.5/models/atm/cam/bld/FV2deg_Tmix_fire
+ # set CAM_src = /blhome/raeder/Cam3/cam3.5/models/atm/cam/bld/FV1.9x2.5-O3
+# NOTE; namelistin section is commented out; uncomment if you want namelistin from CAM_src
set CAM_phis = $CAM_src/cam_phis.nc
set num_lons = 144
set num_lats = 96
@@ -421,8 +451,8 @@
echo "There are ${num_ens} ensemble members."
# blueice requires the file to exist in order to append to it
-# echo "There are ${num_ens} ensemble members." >> $MASTERLOG
-echo "There are ${num_ens} ensemble members." > $MASTERLOG
+touch $MASTERLOG
+echo "There are ${num_ens} ensemble members." >> $MASTERLOG
#----------------------------------------------------------
# Figure out CAMs domain decomposition and usable number of processors,
@@ -503,16 +533,15 @@
if (! -e advance_model.csh) then
${COPY} ${DARTCAMDIR}/shell_scripts/advance_model.csh .
${COPY} ${DARTCAMDIR}/shell_scripts/run-cam.csh .
- ${COPY} ${DARTCAMDIR}/shell_scripts/auto_re2ms*.csh .
- ${COPY} ${DARTCAMDIR}/shell_scripts/diags.csh .
- ${COPY} ${DARTCAMDIR}/shell_scripts/auto_diag2ms_LSF.csh .
+ ${COPY} ${DARTCAMDIR}/full_experiment/auto_re2ms*.csh .
+ ${COPY} ${DARTCAMDIR}/full_experiment/diags.csh .
+ ${COPY} ${DARTCAMDIR}/full_experiment/auto_diag2ms_LSF.csh .
endif
set days_in_mo = (31 28 31 30 31 30 31 31 30 31 30 31)
-# leap years (but year not defined here);
-# if (($year % 4) == 0) @ days_in_mo[2] = $days_in_mo[2] + 1
-# leap year every 4 years except for century marks, but include centuries divisible by 400
+# leap years every 4 years except for century marks, but include centuries divisible by 400
# So, all modern years divisible by 4 are leap years.
+if (($year % 4) == 0) @ days_in_mo[2] = $days_in_mo[2] + 1
#----------------------------------------------------------
echo "exp num_ens obs_seq_1 obs_seq_n obs_seq_first"
@@ -524,7 +553,6 @@
echo "DART_ics_1 is $DART_ics_1" >> $MASTERLOG
# clean up old CAM inputs that may be laying around
-
if ( $obs_seq_1_depend == false ) then
if (-e caminput_1.nc) then
${REMOVE} clminput_[1-9]*.nc
@@ -536,8 +564,8 @@
if (-e $CAM_phis) then
${COPY} $CAM_phis cam_phis.nc
else
- echo "ERROR ... need a topog file from CAM h0 history file." >> $MASTERLOG
- echo "ERROR ... need a topog file from CAM h0 history file."
+ echo "ERROR ... need a cam_phis file from CAM h0 history file." >> $MASTERLOG
+ echo "ERROR ... need a cam_phis file from CAM h0 history file."
exit 99
endif
endif
@@ -554,11 +582,11 @@
endif
# Subdirectory name root, where output from each obs_seq iteration will be kept.
-# obs_diag looks for obs_seq.final files in directories of the form xx_##[#].
-# where xx_ = output_root signifies the month OF THE OBS_SEQ_FIRST.
-# and ##[#]# signifies the 2+ digit obs_seq number within this experiment.
-set output_root = ${mo_first}_
-if (${mo_first} < 10) set output_root = 0${output_root}
+# obs_diag looks for obs_seq.final files in directories of the form xx_####.
+# where xx_ = output_root is now always 'obs' (was $mo_first in earlier DARTs)
+# and #### signifies the 4 digit obs_seq number within this experiment.
+# (was 2 digit in earlier DARTs)
+set output_root = obs
#============================================================================================
# Have an overall outer loop over obs_seq.out files
@@ -587,14 +615,20 @@
set n_procs = $num_procs
echo "#BSUB -q ${queue}" >> ${job_i}
echo "#BSUB -n ${n_procs}" >> ${job_i}
-# exclusive use of the nodes; still allows > process/node
+# Exclusive use of the nodes; still allows > 1 process/node
echo "#BSUB -x " >> ${job_i}
+ echo '#BSUB -R "span[ptile='$ptile']"' >> ${job_i}
+
+# Select subset of all possible computational nodes here
# echo '#BSUB -m "'$nodelist '"' >> ${job_i}
# if ($?nodelist) echo '#BSUB -m "'$nodelist '"' >> ${job_i}
- echo '#BSUB -R "span[ptile='$ptile']"' >> ${job_i}
-# all possible computational nodes; select your subset here
+# OR exclude misbehaving nodes.
+ if ($?exclude_nodes) then
+ foreach node ($exclude_nodes)
+ echo '#BSUB -R "select[hname != '$node']" ' >> ${job_i}
+ end
+ endif
-# add this to your other bsub lines
if ($i > $obs_seq_1 || ($i == $obs_seq_1 && $obs_seq_1_depend == true)) then
@ previousjobnumber = $i - 1
@@ -638,20 +672,43 @@
echo "##==================================================================" >> ${job_i}
endif
+ if ($parallel_cam == 'false' && $?LS_SUBCWD) then
+ # This environment variable tells how many processors on each node to use
+ # which will depend on the per-processor memory, the model memory high-water mark
+ # the ensemble size and other things.
+ # The following numbers are for bluefire (IBM Power6 chip) with ~2 Gb memory /processor
+ # and 32 processors/node.
+ if ($num_procs == 96) then
+ # want 80 members = 1*28 + 2*26
+ echo "setenv LSB_PJL_TASK_GEOMETRY \" >> ${job_i}
+ echo ' "{(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27)\ ' >> ${job_i}
+ echo " (28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53)\ " >> ${job_i}
+ echo ' (54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79)}" ' >> ${job_i}
+ else if ($num_procs == 32) then
+ # I want 20 = 1*20
+ echo "setenv LSB_PJL_TASK_GEOMETRY \" >> ${job_i}
+ echo ' "{(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19)}"' >> ${job_i}
+ else
+ echo "parallel_cam is false, but num_procs is not 96 or 48 or 32" >> $MASTERLOG
+ exit
+ endif
+
+ endif
+
echo "set myname = "'$0'" # this is the name of this script" >> ${job_i}
echo "set CENTRALDIR = ${CENTRALDIR} " >> ${job_i}
echo "cd ${CENTRALDIR}" >> ${job_i}
- echo "set MASTERLOG = ${CENTRALDIR}/run_job.log" >> ${job_i}
+ echo "set MASTERLOG = ${MASTERLOG} " >> ${job_i}
echo 'set start_time = `date +%s`' >> ${job_i}
echo ' echo "host is " `hostname` ' >> ${job_i}
+ echo 'touch $MASTERLOG ' >> ${job_i}
#===================================================================================
# Construct directory name of location of restart files
@ j = $i - 1
- set out_prev = ${output_root}
- if ($j < 10) set out_prev = ${output_root}0
- set out_prev = ${out_prev}$j
+ set out_prev = `printf "%s_%04d" ${output_root} $j`
+ set out_prev = ${exp}/$out_prev
#-----------------------
@@ -713,8 +770,9 @@
else if ($obs_seq_freq > 0) then
@ month = $mo
- while ($month > $mo_first)
+ while ($month != $mo_first)
@ month = $month - 1
+ if ($month == 0) @ month = $month + 12
@ seq = $seq - $days_in_mo[$month] * $obs_seq_freq
end
@ month = $mo
@@ -769,9 +827,9 @@
set clm_init = ${CLM_ics_1}
else
# Get initial files from result of previous experiment.
- set from_root = `pwd`/$exp/${out_prev}/DART
- set cam_init = `pwd`/$exp/${out_prev}/CAM/caminput_
- set clm_init = `pwd`/$exp/${out_prev}/CLM/clminput_
+ set from_root = `pwd`/${out_prev}/DART
+ set cam_init = `pwd`/${out_prev}/CAM/caminput_
+ set clm_init = `pwd`/${out_prev}/CLM/clminput_
endif
# transmit info to advance_model.csh
@@ -783,27 +841,28 @@
echo "$parallel_cam" >> casemodel.$i
echo "$run_command" >> casemodel.$i
# Only write the 7th record if it's FV and run-cam.csh needs the decomposition info
- if ($keep_lev_blocks > -1) then
+ # if ($keep_lev_blocks > -1) then
+ if ($keep_lev_blocks > 0) then
echo "$num_procs $keep_lev_blocks $keep_lat_blocks " >> casemodel.$i
endif
# advance_model wants to see a file 'casemodel' and not keep track of which obs_seq it's for
- echo "$REMOVE casemodel" >> ${job_i}
- echo "if (-e casemodel.$i) then " >> ${job_i}
- echo " $LINK casemodel.$i casemodel " >> ${job_i}
- echo "else " >> ${job_i}
- echo ' echo "casemodel.$i not found; exiting" >> $MASTERLOG' >> ${job_i}
- echo ' echo "casemodel.$i not found; exiting" ' >> ${job_i}
- echo " exit 124 " >> ${job_i}
- echo "endif " >> ${job_i}
+ echo "$REMOVE casemodel" >> ${job_i}
+ echo "if (-e casemodel.$i) then " >> ${job_i}
+ echo " $LINK casemodel.$i casemodel " >> ${job_i}
+ echo "else " >> ${job_i}
+ echo ' echo "job '$i'; casemodel.$i not found; exiting" >> $MASTERLOG' >> ${job_i}
+ echo ' echo "casemodel.$i not found; exiting" ' >> ${job_i}
+ echo " exit 124 " >> ${job_i}
+ echo "endif " >> ${job_i}
# adaptive inflation ic files may (not) exist
# Should query input.nml to learn whether to get them?
- echo " " >> ${job_i}
- echo "${REMOVE} *_inf_ic* " >> ${job_i}
- echo "if (-e ${from_root}/prior_inf_ic) \" >> ${job_i}
- echo " ${LINK} ${from_root}/prior_inf_ic prior_inf_ic_old " >> ${job_i}
- echo "if (-e ${from_root}/post_inf_ic) \" >> ${job_i}
- echo " ${LINK} ${from_root}/post_inf_ic post_inf_ic_old " >> ${job_i}
+ echo " " >> ${job_i}
+ echo "${REMOVE} *_inf_ic* " >> ${job_i}
+ echo "if (-e ${from_root}/prior_inf_ic) \" >> ${job_i}
+ echo " ${LINK} ${from_root}/prior_inf_ic prior_inf_ic_old " >> ${job_i}
+ echo "if (-e ${from_root}/post_inf_ic) \" >> ${job_i}
+ echo " ${LINK} ${from_root}/post_inf_ic post_inf_ic_old " >> ${job_i}
#? MPI too?
# link to filter_ic file(s), so that filter can copy them to a compute node
@@ -832,24 +891,57 @@
#-----------------------------------------------------------------------------
echo " " >> ${job_i}
echo "${REMOVE} caminput.nc clminput.nc " >> ${job_i}
- echo "${LINK} ${CAM_src}/caminput.nc caminput.nc" >> ${job_i}
- echo "${LINK} ${CAM_src}/clminput.nc clminput.nc" >> ${job_i}
+ if (-e ${CAM_src}/caminput.nc) then
+ echo "${LINK} ${CAM_src}/caminput.nc caminput.nc" >> ${job_i}
+ echo "${LINK} ${CAM_src}/clminput.nc clminput.nc" >> ${job_i}
+ else
+ echo "${CAM_src}/caminput.nc is missing; exiting job_mpi.csh"
+ exit
+ endif
+
#-----------------------------------------------------------------------------
# get name of file containing PHIS from the CAM namelist. This will be used by
# static_init_model to read in the PHIS field, which is used for height obs.
#-----------------------------------------------------------------------------
- if (obs_seq_1_depend == false) then
+ # Commented out to use real SSTs (in CAM_src)
+ if ($obs_seq_1 == $obs_seq_first) then
${REMOVE} namelistin
${LINK} ${CAM_src}/namelistin namelistin
sleep 1
endif
-
if (! -e namelistin ) then
echo "ERROR ... need a namelistin file." >> $MASTERLOG
echo "ERROR ... need a namelistin file."
exit 89
endif
+# Check contents of namelistin for proper CLM file output
+ set killit = false
+ grep restart_option namelistin | head -1 >! restart_option
+ if ($status != 0 ) then
+ set killit = true
+ else
+ set STRING = "1,$ s#'##g"
+ sed -e "$STRING" restart_option >! restart_string
+ set STRING = `cat restart_string`
+ if ($STRING[3] != nsteps) then
+ set killit = true
+ endif
+ endif
+ if ($killit == true) then
+ echo "namelistin:camcpl6_inparm must contain " >> $MASTERLOG
+ echo " restart_option = 'nsteps'" >> $MASTERLOG
+ echo " restart_n = # models steps in forecast" >> $MASTERLOG
+ echo "exiting" >> $MASTERLOG
+ echo "namelistin:camcpl6_inparm must contain "
+ echo " restart_option = 'nsteps'"
+ echo " restart_n = # models steps in forecast"
+ echo "exiting"
+ $KILLCOMMAND
+ endif
+
+ ${REMOVE} restart_[os]*
+
echo "if (! -e ${exp}/namelistin) ${COPY} namelistin ${exp}/namelistin " >> ${job_i}
@@ -873,53 +965,53 @@
echo 'while ( -e filter_to_model.lock ) ' >> ${job_i}
# read from the fifo file. this is *not* a busy wait; it puts the
# job to sleep in the kernel waiting for input.
- echo " " >> ${job_i}
- echo ' set todo = `( echo $< ) < filter_to_model.lock` ' >> ${job_i}
- echo ' echo todo received, value = ${todo} ' >> ${job_i}
- echo " " >> ${job_i}
- echo ' if ( "${todo}" == "finished" ) then ' >> ${job_i}
- echo ' echo finished command received from filter. ' >> ${job_i}
- echo ' echo main script: filter done. ' >> ${job_i}
+ echo " " >> ${job_i}
+ echo ' set todo = `( echo $< ) < filter_to_model.lock` ' >> ${job_i}
+ echo ' echo todo received, value = ${todo} ' >> ${job_i}
+ echo " " >> ${job_i}
+ echo ' if ( "${todo}" == "finished" ) then ' >> ${job_i}
+ echo ' echo finished command received from filter. ' >> ${job_i}
+ echo ' echo main script: filter done. ' >> ${job_i}
# add this wait to be sure filter task has exited
# before starting to clean up the files.
- echo ' wait ' >> ${job_i}
- echo ' echo filter finished, removing pipes. ' >> ${job_i}
- echo " rm -f filter_to_model.lock model_to_filter.lock " >> ${job_i}
+ echo ' wait ' >> ${job_i}
+ echo ' echo filter finished, removing pipes. ' >> ${job_i}
+ echo " rm -f filter_to_model.lock model_to_filter.lock " >> ${job_i}
- echo ' break ' >> ${job_i}
- echo " " >> ${job_i}
- echo ' else if ( "${todo}" == "advance" ) then ' >> ${job_i}
- echo ' echo advance command received from filter. ' >> ${job_i}
- echo ' echo calling advance_model.csh now: ' >> ${job_i}
- echo " ./advance_model.csh 0 $num_ens filter_control00000 ${parallel_cam}" >> ${job_i}
+ echo ' break ' >> ${job_i}
+ echo " " >> ${job_i}
+ echo ' else if ( "${todo}" == "advance" ) then ' >> ${job_i}
+ echo ' echo advance command received from filter. ' >> ${job_i}
+ echo ' echo calling advance_model.csh now: ' >> ${job_i}
+ echo " ./advance_model.csh 0 $num_ens filter_control00000 ${parallel_cam}" >> ${job_i}
# do not execute anything here until you have saved
# the exit status from the advance model script.
- echo ' set advance_status = $status ' >> ${job_i}
- echo ' echo saved advance_model.csh exit status ' >> ${job_i}
- echo " " >> ${job_i}
- echo ' echo restarting filter. this version of wakeup_filter ' >> ${job_i}
- echo ' echo includes restarting the main filter program. ' >> ${job_i}
- echo " ${run_command} ./wakeup_filter " >> ${job_i}
- echo " " >> ${job_i}
- echo ' if ($advance_status != 0) then ' >> ${job_i}
- echo ' echo "Model advance failed" ' >> ${job_i}
- echo ' rm -f filter_lock* ' >> ${job_i}
- echo ' break ' >> ${job_i}
- echo ' endif ' >> ${job_i}
- echo " " >> ${job_i}
- echo ' else ' >> ${job_i}
- echo " " >> ${job_i}
- echo ' echo main script: unexpected value received.' >> ${job_i}
- echo ' break ' >> ${job_i}
- echo " " >> ${job_i}
- echo ' endif ' >> ${job_i}
- echo " " >> ${job_i}
- echo 'end ' >> ${job_i}
- echo " " >> ${job_i}
+ echo ' set advance_status = $status ' >> ${job_i}
+ echo ' echo saved advance_model.csh exit status ' >> ${job_i}
+ echo " " >> ${job_i}
+ echo ' echo restarting filter. this version of wakeup_filter ' >> ${job_i}
+ echo ' echo includes restarting the main filter program. ' >> ${job_i}
+ echo " ${run_command} ./wakeup_filter " >> ${job_i}
+ echo " " >> ${job_i}
+ echo ' if ($advance_status != 0) then ' >> ${job_i}
+ echo ' echo "Model advance failed" ' >> ${job_i}
+ echo ' rm -f filter_lock* ' >> ${job_i}
+ echo ' break ' >> ${job_i}
+ echo ' endif ' >> ${job_i}
+ echo " " >> ${job_i}
+ echo ' else ' >> ${job_i}
+ echo " " >> ${job_i}
+ echo ' echo main script: unexpected value received.' >> ${job_i}
+ echo ' break ' >> ${job_i}
+ echo " " >> ${job_i}
+ echo ' endif ' >> ${job_i}
+ echo " " >> ${job_i}
+ echo 'end ' >> ${job_i}
+ echo " " >> ${job_i}
else
# Run the filter in async=2 mode.
# runs filter, which tells the model to model advance and assimilates obs
- echo "${run_command} ./filter " >> ${job_i}
+ echo "${run_command} ./filter " >> ${job_i}
endif
set KILLCOMMAND = "touch BOMBED; exit"
@@ -956,19 +1048,18 @@
# All the CLM-related files will get put in ${exp}/${output_dir}/CLM
# All the DART-related files will get put in ${exp}/${output_dir}/DART
#-----------------------------------------------------------------------------
- set output_dir = ${output_root}
- if ($i < 10) set output_dir = ${output_root}0
- set output_dir = ${output_dir}$i
+ set output_dir = `printf "%s_%04d" ${output_root} $i`
+ set out_full = ${exp}/${output_dir}
echo " " >> ${job_i}
- echo "mkdir -p ${exp}/${output_dir}/{CAM,CLM,DART} " >> ${job_i}
+ echo "mkdir -p ${out_full}/{CAM,CLM,DART} " >> ${job_i}
echo " " >> ${job_i}
echo "foreach FILE ( Prior_Diag.nc Posterior_Diag.nc obs_seq.final )" >> ${job_i}
echo ' if ( -e $FILE && ! -z $FILE) then ' >> ${job_i}
- echo " ${MOVE} "'$FILE'" ${exp}/${output_dir} " >> ${job_i}
+ echo " ${MOVE} "'$FILE'" ${out_full} " >> ${job_i}
echo " if ( ! "'$status'" == 0 ) then " >> ${job_i}
- echo ' echo "failed moving ${CENTRALDIR}/$FILE" >> $MASTERLOG ' >> ${job_i}
+ echo ' echo "job '$i'; failed moving ${CENTRALDIR}/$FILE" >> $MASTERLOG ' >> ${job_i}
echo ' echo "failed moving ${CENTRALDIR}/$FILE" ' >> ${job_i}
echo " $KILLCOMMAND " >> ${job_i}
echo " endif " >> ${job_i}
@@ -984,31 +1075,31 @@
# so don't die if it's missing. We'd have to query input.nml to learn if it should exist.
echo " " >> ${job_i}
- echo "foreach FILE ( prior_inf_diag post_inf_diag ) " >> ${job_i}
- echo ' if ( -e ${FILE} && ! -z $FILE) then ' >> ${job_i}
- echo " ${MOVE} "'${FILE}'" ${exp}/${output_dir} " >> ${job_i}
- echo ' if ( ! $status == 0 ) then ' >> ${job_i}
- echo ' echo "failed moving ${CENTRALDIR}/${FILE} " >> $MASTERLOG ' >> ${job_i}
- echo ' echo "failed moving ${CENTRALDIR}/${FILE} " ' >> ${job_i}
- echo " $KILLCOMMAND " >> ${job_i}
- echo " endif " >> ${job_i}
- echo " endif " >> ${job_i}
- echo "end " >> ${job_i}
+ echo "foreach FILE ( prior_inf_diag post_inf_diag ) " >> ${job_i}
+ echo ' if ( -e ${FILE} && ! -z $FILE) then ' >> ${job_i}
+ echo " ${MOVE} "'${FILE}'" ${out_full} " >> ${job_i}
+ echo ' if ( ! $status == 0 ) then ' >> ${job_i}
+ echo ' echo "job '$i'; failed moving ${CENTRALDIR}/${FILE} " >> $MASTERLOG ' >> ${job_i}
+ echo ' echo "failed moving ${CENTRALDIR}/${FILE} " ' >> ${job_i}
+ echo " $KILLCOMMAND " >> ${job_i}
+ echo " endif " >> ${job_i}
+ echo " endif " >> ${job_i}
+ echo "end " >> ${job_i}
# Move the filter restart file(s) to the storage subdirectory
echo " " >> ${job_i}
- echo 'echo "moving filter_ic_newS to '${exp}/${output_dir}'/DART/filter_icS" ' >> ${job_i}
+ echo 'echo "moving filter_ic_newS to '${out_full}'/DART/filter_icS" ' >> ${job_i}
echo "if (-e filter_ic_new) then " >> ${job_i}
- echo " ${MOVE} filter_ic_new ${exp}/${output_dir}/DART/filter_ic " >> ${job_i}
+ echo " ${MOVE} filter_ic_new ${out_full}/DART/filter_ic " >> ${job_i}
echo " if (! "'$status'" == 0 ) then " >> ${job_i}
- echo ' echo "failed moving filter_ic_new to '${exp}/${output_dir}'/DART/filter_ic" ' >> ${job_i}
+ echo ' echo "failed moving filter_ic_new to '${out_full}'/DART/filter_ic" ' >> ${job_i}
echo " $KILLCOMMAND " >> ${job_i}
echo " endif " >> ${job_i}
echo "else if (-e filter_ic_new.0001) then " >> ${job_i}
echo " set n = 1 " >> ${job_i}
echo " while("'$n'" <= ${num_ens}) " >> ${job_i}
echo ' set from = filter_ic_new*[.0]$n' >> ${job_i}
- echo " set dest = ${exp}/${output_dir}/DART/filter_ic."'$from:e' >> ${job_i}
+ echo " set dest = ${out_full}/DART/filter_ic."'$from:e' >> ${job_i}
# stuff analyses into CAM initial files using
# echo " " >> ${job_i}
@@ -1038,9 +1129,9 @@
echo " " >> ${job_i}
echo "foreach FILE (prior_inf_ic post_inf_ic) " >> ${job_i}
echo ' if (-e ${FILE}_new ) then ' >> ${job_i}
- echo " ${MOVE} "'${FILE}_new'" ${exp}/${output_dir}/DART/"'${FILE} ' >> ${job_i}
+ echo " ${MOVE} "'${FILE}_new'" ${out_full}/DART/"'${FILE} ' >> ${job_i}
echo ' if (! $status == 0 ) then ' >> ${job_i}
- echo ' echo "failed moving ${FILE}_new to '${exp}/${output_dir}/DART/'${FILE}s "' >> ${job_i}
+ echo ' echo "failed moving ${FILE}_new to '${out_full}/DART/'${FILE}s "' >> ${job_i}
echo " $KILLCOMMAND " >> ${job_i}
echo " endif " >> ${job_i}
echo " endif " >> ${job_i}
@@ -1054,7 +1145,7 @@
echo " " >> ${job_i}
echo ' if ( -e $CAMINPUT && ! -z $CAMINPUT) then ' >> ${job_i}
- echo " ${MOVE} "'$CAMINPUT'" ${exp}/${output_dir}/CAM " >> ${job_i}
+ echo " ${MOVE} "'$CAMINPUT'" ${out_full}/CAM " >> ${job_i}
echo ' if (! $status == 0 ) then ' >> ${job_i}
echo ' echo "failed moving ${CENTRALDIR}/$CAMINPUT " ' >> ${job_i}
echo " $KILLCOMMAND " >> ${job_i}
@@ -1068,7 +1159,7 @@
echo " " >> ${job_i}
echo ' if ( -e $CLMINPUT && ! -z $CLMINPUT) then ' >> ${job_i}
- echo " ${MOVE} "'$CLMINPUT'" ${exp}/${output_dir}/CLM " >> ${job_i}
+ echo " ${MOVE} "'$CLMINPUT'" ${out_full}/CLM " >> ${job_i}
echo ' if (! $status == 0 ) then ' >> ${job_i}
echo ' echo "failed moving ${CENTRALDIR}/$CLMINPUT " ' >> ${job_i}
echo " $KILLCOMMAND " >> ${job_i}
@@ -1084,13 +1175,43 @@
echo " @ n++ " >> ${job_i}
echo "end " >> ${job_i}
- echo "if (! -e times && ! -e ${exp}/${output_dir}/CAM/caminput_1.nc) then " >> ${job_i}
+# save the CLM initial files from intermediate times, for analyses in CAM initial file format
+ echo " " >> ${job_i}
+ echo 'foreach clm (`ls clm_init_memb*.nc`) ' >> ${job_i}
+ echo ' if (! -z $clm) then ' >> ${job_i}
+ echo " ${MOVE} "'$clm'" ${out_full}" >> ${job_i}
+ echo ' if (! $status == 0 ) then ' >> ${job_i}
+ echo ' echo "failed moving ${CENTRALDIR}/$clm " ' >> ${job_i}
+ echo " $KILLCOMMAND " >> ${job_i}
+ echo " endif " >> ${job_i}
+ echo " else " >> ${job_i}
+ echo ' echo "failed moving ${CENTRALDIR}/$clm because of size 0" ' >> ${job_i}
+ echo " endif " >> ${job_i}
+ echo "end" >> ${job_i}
+ echo " " >> ${job_i}
+# save the CAM initial files from intermediate times, for analyses in CAM initial file format
+ echo " " >> ${job_i}
+ echo 'foreach cam (`ls cam_init_memb*.nc`) ' >> ${job_i}
+ echo ' if (! -z $cam) then ' >> ${job_i}
+ echo " ${MOVE} "'$cam'" ${out_full}" >> ${job_i}
+ echo ' if (! $status == 0 ) then ' >> ${job_i}
+ echo ' echo "failed moving ${CENTRALDIR}/$cam " ' >> ${job_i}
+ echo " $KILLCOMMAND " >> ${job_i}
+ echo " endif " >> ${job_i}
+ echo " else " >> ${job_i}
+ echo ' echo "failed moving ${CENTRALDIR}/$cam because of size 0" ' >> ${job_i}
+ echo " endif " >> ${job_i}
+ echo "end" >> ${job_i}
+ echo " " >> ${job_i}
+
+
+ echo "if (! -e times && ! -e ${out_full}/CAM/caminput_1.nc) then " >> ${job_i}
echo " # There may have been no advance; " >> ${job_i}
echo " # use the CAM_ics_1 as the CAM ics for this time " >> ${job_i}
echo " set ens = 1 " >> ${job_i}
echo ' while ($ens <= '$num_ens" ) " >> ${job_i}
- echo " cp ${CAM_ics_1}"'${ens}'".nc ${exp}/${output_dir}/CAM " >> ${job_i}
- echo " cp ${CLM_ics_1}"'${ens}'".nc ${exp}/${output_dir}/CLM " >> ${job_i}
+ echo " cp ${CAM_ics_1}"'${ens}'".nc ${out_full}/CAM " >> ${job_i}
+ echo " cp ${CLM_ics_1}"'${ens}'".nc ${out_full}/CLM " >> ${job_i}
echo " @ ens++ " >> ${job_i}
echo " end " >> ${job_i}
echo "endif " >> ${job_i}
@@ -1102,13 +1223,13 @@
echo " ! -z $exp/${output_dir}/DART/filter_ic ) || \" >> ${job_i}
echo " ( -e $exp/${output_dir}/DART/filter_ic.*${num_ens} && \" >> ${job_i}
echo " ! -z $exp/${output_dir}/DART/filter_ic.*${num_ens}) ) then " >> ${job_i}
- echo ' echo "it is OK to proceed with next obs_seq at "`date` >> $MASTERLOG ' >> ${job_i}
+ echo ' echo "job '$i'; it is OK to proceed with next obs_seq at "`date` >> $MASTERLOG ' >> ${job_i}
echo ' echo "it is OK to proceed with next obs_seq at "`date` ' >> ${job_i}
echo 'else ' >> ${job_i}
echo ' echo "RETRIEVE filter_ic files from filter temp directory ?" ' >> ${job_i}
echo ' echo "Then remove temp and cam advance temps" ' >> ${job_i}
- echo ' echo "RETRIEVE filter_ic files from filter temp directory ?" >> $MASTERLOG ' >> ${job_i}
- echo ' echo "Then remove temp and cam advance temps" >> $MASTERLOG ' >> ${job_i}
+ echo ' echo "job '$i'; RETRIEVE filter_ic files from filter temp directory ?" >> $MASTERLOG ' >> ${job_i}
@@ Diff output truncated at 40000 characters. @@
More information about the Dart-dev
mailing list