[Dart-dev] DART/branches Revision: 12151

dart at ucar.edu dart at ucar.edu
Wed Nov 29 15:34:01 MST 2017


thoar at ucar.edu
2017-11-29 15:33:59 -0700 (Wed, 29 Nov 2017)
420
This one demonstrates how to spread the ensemble over fewer tasks on each node.
So - if you need more memory per task ... this is it.
Also turned off generating most of the history files in an attempt to isolate 
why the writing of the history file is causing a run-time crash.
Still nothing to do with DART - just running multi-instance CESM.
The prime suspect is that there is no RH2M_R ... there is a RH2M, however.




Modified: DART/branches/cesm_clm/models/clm/shell_scripts/CESM2_0/CESM2_0_setup_ensemble
===================================================================
--- DART/branches/cesm_clm/models/clm/shell_scripts/CESM2_0/CESM2_0_setup_ensemble	2017-11-29 20:50:01 UTC (rev 12150)
+++ DART/branches/cesm_clm/models/clm/shell_scripts/CESM2_0/CESM2_0_setup_ensemble	2017-11-29 22:33:59 UTC (rev 12151)
@@ -23,7 +23,7 @@
 # num_instances The number of ensemble members.
 # ==============================================================================
 
-setenv CASE           clm5_startup
+setenv CASE           clm5_startup_2
 setenv resolution     f09_g16
 setenv compset        I2000Clm50BgcCrop
 setenv compset        2000_DATM%GSWP3v1_CLM50%BGC-CROP_SICE_SOCN_MOSART_SGLC_SWAV
@@ -79,7 +79,7 @@
 #      less than 10 minutes on yellowstone using 1800 pes (120 nodes)
 # ==============================================================================
 
-setenv queue        regular
+setenv queue        economy
 setenv timewall     0:20
 setenv short_term_archiver off
 
@@ -177,13 +177,13 @@
 # If any of these are changed by xmlchange calls in this program,
 # then they must be explicty changed with setenv calls too.
 #
-setenv TEST_MPI           `./xmlquery MPI_RUN_COMMAND    --value`
-setenv CLM_CONFIG_OPTS    `./xmlquery CLM_CONFIG_OPTS    --value`
-setenv COMPSET            `./xmlquery COMPSET            --value`
-setenv COMP_ATM           `./xmlquery COMP_ATM           --value`
-setenv CIMEROOT           `./xmlquery CIMEROOT           --value`
-setenv CASEROOT           `./xmlquery CASEROOT           --value`
-setenv MAX_TASKS_PER_NODE `./xmlquery MAX_TASKS_PER_NODE --value`
+setenv TEST_MPI              `./xmlquery MPI_RUN_COMMAND       --value`
+setenv CLM_CONFIG_OPTS       `./xmlquery CLM_CONFIG_OPTS       --value`
+setenv COMPSET               `./xmlquery COMPSET               --value`
+setenv COMP_ATM              `./xmlquery COMP_ATM              --value`
+setenv CIMEROOT              `./xmlquery CIMEROOT              --value`
+setenv CASEROOT              `./xmlquery CASEROOT              --value`
+setenv MAX_MPITASKS_PER_NODE `./xmlquery MAX_MPITASKS_PER_NODE --value`
 
 # Make sure the case is configured with a data ocean.
 
@@ -252,37 +252,49 @@
 ./xmlchange RUN_REFDATE=${refdate}
 ./xmlchange RUN_STARTDATE=${startdate}
 
+# pnetcdf is default
 ./xmlchange PIO_TYPENAME=netcdf
 
-# Data assimilation is turned off initially.
-# After things are working, it will be turned on when you
-# run CASEROOT/CESM_DART_config
+# MAX_MPITASKS_PER_NODE comes from $case/Tools/mkbatch.$machine
 
-@ nodes_per_instance = 1
-@ ptile = $MAX_TASKS_PER_NODE / 2
+@ ptile = $MAX_MPITASKS_PER_NODE / 2
+./xmlchange MAX_TASKS_PER_NODE=$ptile
+./xmlchange MAX_MPITASKS_PER_NODE=$ptile
+
 @ nthreads = 1
 
-@ atm_tasks = $ptile * $nodes_per_instance * $num_instances
-@ lnd_tasks = $ptile * $nodes_per_instance * $num_instances
-@ ice_tasks = $ptile * $nodes_per_instance
+echo "ptile is $ptile"
+
+# Task layout:
+# Set the nodes_per_instance below to match your case.
+# By computing task counts like we do below, we guarantee each instance uses
+# a whole number of nodes which is the recommended configuration.
+
+@ nodes_per_instance = 4
+@ ptile = -1
+
+@ atm_tasks = $ptile * $nodes_per_instance
+@ cpl_tasks = $ptile * $nodes_per_instance
 @ ocn_tasks = $ptile * $nodes_per_instance
-@ cpl_tasks = $ptile * $nodes_per_instance
 @ wav_tasks = $ptile * $nodes_per_instance
+@ glc_tasks = $ptile * $nodes_per_instance
+@ ice_tasks = $ptile * $nodes_per_instance
+@ rof_tasks = $ptile * $nodes_per_instance
+@ lnd_tasks = $ptile * $nodes_per_instance
 @ esp_tasks = $ptile * $nodes_per_instance
-@ glc_tasks = $ptile * $nodes_per_instance
-@ rof_tasks = $ptile * $nodes_per_instance * $num_instances
 
+./xmlchange ROOTPE_ATM=0,NTHRDS_ATM=$nthreads,NTASKS_ATM=$atm_tasks
 ./xmlchange ROOTPE_CPL=0,NTHRDS_CPL=$nthreads,NTASKS_CPL=$cpl_tasks
-./xmlchange ROOTPE_ICE=0,NTHRDS_ICE=$nthreads,NTASKS_ICE=$ice_tasks,NINST_ICE=1
-./xmlchange ROOTPE_ATM=0,NTHRDS_ATM=$nthreads,NTASKS_ATM=$atm_tasks,NINST_ATM=$num_instances
-./xmlchange ROOTPE_OCN=0,NTHRDS_OCN=$nthreads,NTASKS_OCN=$ocn_tasks,NINST_OCN=1
-./xmlchange ROOTPE_LND=0,NTHRDS_LND=$nthreads,NTASKS_LND=$lnd_tasks,NINST_LND=$num_instances
-./xmlchange ROOTPE_GLC=0,NTHRDS_GLC=$nthreads,NTASKS_GLC=$glc_tasks,NINST_GLC=1
-./xmlchange ROOTPE_ROF=0,NTHRDS_ROF=$nthreads,NTASKS_ROF=$rof_tasks,NINST_ROF=$num_instances
-./xmlchange ROOTPE_WAV=0,NTHRDS_WAV=$nthreads,NTASKS_WAV=$wav_tasks,NINST_WAV=1
-./xmlchange ROOTPE_ESP=0,NTHRDS_ESP=$nthreads,NTASKS_ESP=$esp_tasks,NINST_ESP=1
+./xmlchange ROOTPE_OCN=0,NTHRDS_OCN=$nthreads,NTASKS_OCN=$ocn_tasks
+./xmlchange ROOTPE_WAV=0,NTHRDS_WAV=$nthreads,NTASKS_WAV=$wav_tasks


More information about the Dart-dev mailing list