[Wrf-users] Runtime Problem running WRF2.1.2 on multi processors

Srikishen, Jayanthi Jayanthi.Srikishen at msfc.nasa.gov
Thu Sep 14 15:08:05 MDT 2006


Hi  Wrf-users !
 
 
I'm trying to run the WRF model on a Linux cluster (intel processors)
Successfully created real.exe,wrf.exe
 
./real.exe worked fine
 
mpirun -np 4 ./wrf.exe runs for 2 time steps and aborts.  Fort.98 has
OUT OF BOUNDS and nan's.
 
I've tried with mpirun -np 1 ./wrf.exe and it works FINE.
 
Could you tell me what is causing the problem ?
 
COMPILER SWITCHES and other system related info is given below.  Also,
included here is the
rsl.out.0000 output and a few lines of fort.98 output.
 
************************************************************************
************************************************************************
**********
setenv MP_STACK_SIZE 64000000
setenv WRF_EM_CORE 1
limit stacksize unlimited
 
mpif90 -V  =   pgf90 5.2-2
netcdf = netcdf-3.6.1_GCC_PGI5.2    (portland group compiler-fortran)
mpich = mpich2-1.0.3_GCC_PGI5.2
mpich =  mpich-1.2.7p1_GCC_PGI5.2 (tried with this version also)
 
WRF = WRFV2.1.2
wrfsi = wrfsi_v2.1.2
uname -rs = Linux 2.6.9-34.0.1.ELsmp
 
#### Architecture specific settings ####
 
# Settings for PC Linux i486 i586 i686, PGI compiler  DM-Parallel (RSL,
MPICH, Allows nesting
)
#
# Notes: for experimental implementation of moving nests, add
-DMOVE_NESTS to ARCHFLAGS
#        for experimental implementation of vortex tracking nests, add
-DMOVE_NESTS -DVORTEX_
CENTER to ARCHFLAGS
#
 
DMPARALLEL      =       1
MAX_PROC        =       1024
FC              =       /rstor17/sriki/mpich-1.2.7p1/bin/mpif90
-f90=pgf90 -Bstatic (with and without static option)
LD              =       /rstor17/sriki/mpich-1.2.7p1/bin/mpif90
-f90=pgf90 -Bstatic (with and without static option)
CC              =       /rstor17/sriki/mpich-1.2.7p1/bin/mpicc -cc=gcc
-static -DMPI2_SUPPORT
 -DFSEEKO64_OK
SCC             =       gcc
SFC             =       pgf90
RWORDSIZE       =       $(NATIVE_RWORDSIZE)
PROMOTION       =       -r$(RWORDSIZE) -i4
CFLAGS          =       -DDM_PARALLEL -DWRF_RSL_IO \
                        -DMAXDOM_MAKE=$(MAX_DOMAINS)
-DMAXPROC_MAKE=$(MAX_PROC) -I../external
/RSL/RSL \
                        -I/rstor17/sriki/mpich-1.2.7p1/include
FCOPTIM         =       -O2 # -fast # ALSO TRIED WITH -O0
FCDEBUG         =       #-g
#FCBASEOPTS      =       -w -byteswapio -Ktrap=fp -Mfree -tp p6
$(FCDEBUG)
FCBASEOPTS      =       -w -byteswapio -Mfree -tp p6 $(FCDEBUG) # -Mlfs
FCFLAGS         =       $(FCOPTIM) $(FCBASEOPTS)
ARCHFLAGS       =       -DDEREF_KLUDGE -DIO_DEREF_KLUDGE -DGRIB1 -DINTIO
-DWRF_RSL_IO -DRSL -
DDM_PARALLEL \
-DIWORDSIZE=4 -DDWORDSIZE=8 -DRWORDSIZE=$(RWORDSIZE) -DLWORDSIZE=4 -D
NETCDF \
                        -DTRIEDNTRUE   \
                        -DLIMIT_ARGS
INCLUDE_MODULES =       -module ../main -I../external/io_netcdf
-I../external/io_int -I../ext
ernal/esmf_time_f90 \
                        -I../external -I../frame -I../share -I../phys
-I../chem -I../inc \
                        /rstor17/sriki/mpich-1.2.7p1/include
PERL            =       perl
REGISTRY        =       Registry
LIB             =       -L../external/io_netcdf -lwrfio_nf
-L/usr/local/netcdf/lib -lnetcdf -
L../external/RSL/RSL -lrsl \
                        -L../external/io_grib1 -lio_grib1 \
                        -L../external/io_int -lwrfio_int \
                         -L/rstor17/sriki//mpich-1.2.7p1/lib -lmpichf90
\
                        ../frame/module_internal_header_util.o
../frame/pack_utils.o -L../ext
ernal/esmf_time_f90 -lesmf_time
LDFLAGS         =       -byteswapio $(FCFLAGS)
ENVCOMPDEFS     =
WRF_CHEM        =       0
CPP             =       /lib/cpp -C -P -traditional
POUND_DEF       =       -DNO_RRTM_PHYSICS  -traditional $(COREDEFS)
-DNONSTANDARD_SYSTEM -DF9
0_STANDALONE -DCONFIG_BUF_LEN=$(CONFIG_BUF_LEN)
-DMAX_DOMAINS_F=$(MAX_DOMAINS)
CPPFLAGS        =       -I$(LIBINCLUDE) -C -P $(ARCHFLAGS)
-I../external/RSL/RSL -C -P `cat .
./inc/dm_comm_cpp_flags` $(ENVCOMPDEFS) $(POUND_DEF)
AR              =       ar ru
M4              =       m4
RANLIB          =       ranlib
NETCDFPATH      =       /usr/local/netcdf
CC_TOOLS        =       cc
************************************************************************
*********************
********************
mpirun -np 4 ./wrf.exe
 
The program aborts with the following message:
rm_l_2_19671: (28.535918) net_send: could not write to fd=5, errno = 32

tail rsl.out.0000 
 
  STEPRA,STEPCU,STEPBL            7            3            1
Timing for Writing wrfout_d01_2005-11-20_06:00:00 for domain        1:
4.25400 elapsed sec
onds.
Timing for processing lateral boundary for domain        1:    0.84300
elapsed seconds.
 WRF NUMBER OF TILES =   1
Timing for main: time 2005-11-20_06:01:30 on domain   1:   16.37400
elapsed seconds.
Timing for main: time 2005-11-20_06:03:00 on domain   1:    5.29800
elapsed seconds.
 
 
more fort.98
 

 **** OUT OF BOUNDS *********
 **** OUT OF BOUNDS *********
 **** OUT OF BOUNDS *********
 **** OUT OF BOUNDS *********
  LFS,LDB,LDT = 26 24 24 TIMEC, TADVEC, NSTEP= 3600. 4308.  2NCOUNT,
FABE, AINC= 1 1.000   na
n
 
 P(LC), DTP, WKL, WKLCL =    590.3015                -nan
-1.8851364E-02
   2.0000000E-02
 TLCL, DTLCL, DTRH, TENV =    266.8293       0.0000000       0.0000000
            -nan
      KLCL=25 ZLCL= 5248.4M DTLCL= 0.00 LTOP=36 P0(LTOP)=122.8MB FRZ LV=
0 TMIX=-0.7 PMIX= 57
7.0 QMIX=  4.5 CAPE=    nan
  P0(LET) =  122.8 P0(LTOP) =  122.8 VMFLCL =        -nan PLCL =  -nan
WLCL = 1.000 CLDHGT = 
10237.9
 PEF(WS)=0.90(CB)=0.31LC,LET= 23 36WKL=-0.019VWS= 0.66
 PRECIP EFFICIENCY =             nan
  LFS,LDB,LDT = 26 24 24 TIMEC, TADVEC, NSTEP= 3600. 4308.  2NCOUNT,
FABE, AINC= 1 1.000   na
n
     P       DP  DT K/D  DR K/D    OMG   DOMGDP    UMF     UER     UDR
DMF     DER     DD
R     EMS      W0    DETLQ   DETIC
 just before DO 300...
  122.76   45.42  -17.29    -nan     nan     nan    0.00   0.000    -nan
0.00   0.000   0.
000   6.000   0.852     nan     nan
  168.13   45.42   26.98    -nan     nan     nan    -nan    -nan    -nan
0.00   0.000   0.
000   6.000   1.578     nan     nan
 
************************************************************************
*********************
****
 
Thanks
Jayanthi



-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mailman.ucar.edu/pipermail/wrf-users/attachments/20060914/8a5d3c52/attachment.html


More information about the Wrf-users mailing list