[Dart-dev] [4363] DART/trunk/models/template/shell_scripts/run_filter.csh: Make it harder to do this wrong, by getting the async= value from
nancy at ucar.edu
nancy at ucar.edu
Thu May 13 13:19:47 MDT 2010
Revision: 4363
Author: nancy
Date: 2010-05-13 13:19:47 -0600 (Thu, 13 May 2010)
Log Message:
-----------
Make it harder to do this wrong, by getting the async= value from
the input.nml and set the parallel_model shell variable automatically.
Also set the advance command based on what's in the namelist.
Minor updates to the comments to remove names of obsolete machines
and add a comment about the -W bsub line.
Modified Paths:
--------------
DART/trunk/models/template/shell_scripts/run_filter.csh
-------------- next part --------------
Modified: DART/trunk/models/template/shell_scripts/run_filter.csh
===================================================================
--- DART/trunk/models/template/shell_scripts/run_filter.csh 2010-05-13 18:56:06 UTC (rev 4362)
+++ DART/trunk/models/template/shell_scripts/run_filter.csh 2010-05-13 19:19:47 UTC (rev 4363)
@@ -6,30 +6,33 @@
#
# $Id$
#
+# Script to start an MPI version of filter, and then optionally
+# run the model advance if &filter_nml has async=4 (parallel filter
+# AND parallel model). This version gets the number of ensemble members
+# and advance command out of the input.nml namelist file automatically.
+# You do have to set the
+#
#=============================================================================
# This block of directives constitutes the preamble for the LSF queuing system
-# LSF is used on the IBM Linux cluster 'lightning'
# LSF is used on the IMAGe Linux cluster 'coral'
-# LSF is used on the IBM 'bluevista'
-# The queues on lightning and bluevista are supposed to be similar.
+# LSF is used on the IBM 'bluefire'
#
# the normal way to submit to the queue is: bsub < run_filter.csh
#
# an explanation of the most common directives follows:
-# -J Job name (master script job.csh presumes filter_server.xxxx.log)
-# -o STDOUT filename
-# -e STDERR filename
-# -P account
+# -J Job_name
+# -o STDOUT_filename
+# -e STDERR_filename
+# -P account_code_number
# -q queue cheapest == [standby, economy, (regular,debug), premium] == $$$$
-# -n number of processors (really)
+# -n number of MPI processes (not nodes)
+# -W hh:mm wallclock time (required on some systems)
##=============================================================================
#BSUB -J filter
#BSUB -o filter.%J.log
-#BSUB -q regular
+#BSUB -q standby
#BSUB -n 20
-#BXXX -P 868500xx
-#BSUB -W 2:00
-#BSUB -N -u ${USER}@ucar.edu
+#BSUB -W 1:00
#
##=============================================================================
## This block of directives constitutes the preamble for the PBS queuing system
@@ -64,25 +67,40 @@
# mpirun -np 64 modelxxx (or whatever) for as many ensembles as you have,
# set this to "true"
-# if async=4, also check that the call to advance_model.csh
-# has the right number of ensemble members below; it must match
-# the input.nml number.
+# this script is going to determine several things by reading the input.nml
+# file which contains the &filter_nml namelist. make sure it exists first.
+if ( ! -e input.nml ) then
+ echo "ERROR - input.nml does not exist in local directory."
+ echo "ERROR - input.nml needed to determine several settings for this script."
+ exit 1
+endif
+# detect whether the model is supposed to run as an MPI job or not
+# by reading the "async = " from the &filter_nml namelist in input.nml.
+# some namelists contain the same string - be sure to get the filter_nml one
+# by grepping for lines which follow it.
+
+set ASYNCSTRING = `grep -A 42 filter_nml input.nml | grep async`
+set ASYNC_TYPE = `echo $ASYNCSTRING[3] | sed -e "s#,##"`
+
+if ( "${ASYNC_TYPE}" == "0" || "${ASYNC_TYPE}" == "2") then
+ set parallel_model = "false"
+else if ( "${ASYNC_TYPE}" == "4") then
set parallel_model = "true"
+else
+ echo 'cannot autodetect async value in the filter_nml namelist in input.nml file.'
+ echo 'hardcode the parallel_model shell variable and comment out these lines.'
+ exit -1
+ set parallel_model = "false"
+endif
# Determine the number of ensemble members from input.nml,
-# it may exist in more than one place.
-# Parse out the filter_nml string and see which
-# one is immediately after it ...
+# as well as the command for advancing the model.
-if ( ! -e input.nml ) then
- echo "ERROR - input.nml does not exist in local directory."
- echo "ERROR - input.nml needed to determine number of ensemble members."
- exit 1
-endif
-
set ENSEMBLESTRING = `grep -A 42 filter_nml input.nml | grep ens_size`
set NUM_ENS = `echo $ENSEMBLESTRING[3] | sed -e "s#,##"`
+set ADVANCESTRING = `grep -A 42 filter_nml input.nml | grep adv_ens_command`
+set ADV_CMD = `echo $ADVANCESTRING[3] | sed -e "s#,##"`
# A common strategy for the beginning is to check for the existence of
# some variables that get set by the different queuing mechanisms.
@@ -93,7 +111,6 @@
if ($?LS_SUBCWD) then
# LSF has a list of processors already in a variable (LSB_HOSTS)
- # alias submit 'bsub < \!*'
echo "LSF - using mpirun.lsf for execution"
# each filter task advances the ensembles, each running on 1 proc.
@@ -137,7 +154,7 @@
# of processors this job is using.
echo "calling model advance now:"
- ./advance_model.csh 0 ${NUM_ENS} filter_control00000 || exit 9
+ ${ADV_CMD} 0 ${NUM_ENS} filter_control00000 || exit 9
echo "restarting filter."
mpirun.lsf ./wakeup_filter
@@ -161,7 +178,6 @@
else if ($?PBS_O_WORKDIR) then
# PBS has a list of processors in a file whose name is (PBS_NODEFILE)
- # alias submit 'qsub \!*'
echo "PBS - using mpirun for execution"
# each filter task advances the ensembles, each running on 1 proc.
@@ -205,7 +221,7 @@
# of processors this job is using.
echo "calling model advance now:"
- ./advance_model.csh 0 ${NUM_ENS} filter_control00000 || exit 9
+ ${ADV_CMD} 0 ${NUM_ENS} filter_control00000 || exit 9
echo "restarting filter."
mpirun ./wakeup_filter
@@ -288,7 +304,7 @@
# of processors this job is using.
echo "calling model advance now:"
- ./advance_model.csh 0 ${NUM_ENS} filter_control00000 || exit 9
+ ${ADV_CMD} 0 ${NUM_ENS} filter_control00000 || exit 9
echo "restarting filter."
${MPICMD} ./wakeup_filter
More information about the Dart-dev
mailing list