[Wrf-users] Hi WRF users

Kim, Chang-Ki 62beatle at yonsei.ac.kr
Thu Jan 25 04:34:36 MST 2007


Hi 

I'm  trying to optimize the WRFV2.1.2 on the Intel Wood Crest cluster (64bit).
Of course, model codes are installed well (without a kind of error messages).
However, average CPU usage between all nodes is just 40 %.

Someone know how CPU usage is raised into about 80 %

PGIF90 and MPICH are installed as fortran compiller and mpi program, respectively.

I have 16 cpus.

My configure.wrf is as follows.

# configure.wrf
#
# This file was automatically generated by the configure script in the
# top level directory. You may make changes to the settings in this
# file but be aware they will be overwritten each time you run configure.
# Ordinarily, it is necessary to run configure once, when the code is
# first installed.
#
# To permanently change options, change the settings for your platform
# in the file arch/configure.defaults then rerun configure.
#
SHELL           =       /bin/sh
DEVTOP          =       `pwd`
LIBINCLUDE      =       .
.SUFFIXES: .F .i .o .f .c

#### Get core settings from environment (set in compile script)
#### Note to add a core, this has to be added to.

COREDEFS = -DEM_CORE=$(WRF_EM_CORE) \
           -DNMM_CORE=$(WRF_NMM_CORE) -DNMM_MAX_DIM=2600 \
    -DCOAMPS_CORE=$(WRF_COAMPS_CORE) \
           -DEXP_CORE=$(WRF_EXP_CORE)

#### Single location for defining total number of domains.  You need
#### at least 1 + 2*(number of total nests).  For example, 1 coarse
#### grid + three fine grids = 1 + 2(3) = 7, so MAX_DOMAINS=7.

MAX_DOMAINS = 21

#### DM buffer length for the configuration flags.

CONFIG_BUF_LEN = 8192


##############################################################################
#### The settings in this section are defaults that may be overridden by the 
#### architecture-specific settings in the next section.  
##############################################################################

##############################################################################
#### NOTE:  Do not modify these default values here.  To override these 
####        values, make changes after "Architecture specific settings".  
##############################################################################

#### Native size (in bytes) of Fortran REAL data type on this architecture ####
#### Note:  to change real wordsize (for example, to promote REALs from 
####        4-byte to 8-byte), modify the definition of RWORDSIZE in the 
####        section following "Architecture specific settings".  Do not 
####        change NATIVE_RWORDSIZE as is it architecture-specific.  
NATIVE_RWORDSIZE = 4

#### Default sed command and script for Fortran source files ####
SED_FTN = sed -f ../arch/standard.sed

#### Default ESMF switch and dependences for embedded ESMF time manager ####
ESMFCOUPLING        = 0
ESMF_MOD_DEPENDENCE = ../external/esmf_time_f90/module_utility.o

##############################################################################


#### Architecture specific settings ####

# Settings for PC Linux x86_64 (IA64 and Opteron), PGI 5.2 or higher, DM-Parallel  (RSL, MPICH, Allows nesting)
#
# Notes: for experimental implementation of moving nests, add -DMOVE_NESTS to ARCHFLAGS
#        for experimental implementation of vortex tracking nests, add -DMOVE_NESTS -DVORTEX_CENTER to ARCHFLAGS
#
#        Note that for 5.1.x comment out -Mpia=fast
#
DMPARALLEL      =       1
MAX_PROC        =       1024
FC              =       mpif90
LD              =       mpif90
CC              =       mpicc -DMPI2_SUPPORT -DFSEEKO64_OK 
SCC  = gcc
SFC             =       pgf90
RWORDSIZE       =       $(NATIVE_RWORDSIZE)
PROMOTION = -r$(RWORDSIZE) -i4
CFLAGS          =       -DDM_PARALLEL -DWRF_RSL_IO \
                        -DMAXDOM_MAKE=$(MAX_DOMAINS) -DMAXPROC_MAKE=$(MAX_PROC) -I../external/RSL/RSL \
                        -DLANDREAD_STUB
FCOPTIM         =       -fastsse #-Mipa=fast
FCDEBUG  = #-g
#FCBASEOPTS      =       -w -byteswapio -Ktrap=fp -Mfree $(FCDEBUG)
FCBASEOPTS      =       -w -byteswapio -Mfree $(FCDEBUG)
FCFLAGS         =       $(FCOPTIM) $(FCBASEOPTS)
ARCHFLAGS       =       -DDEREF_KLUDGE -DIO_DEREF_KLUDGE -DGRIB1 -DINTIO -DWRF_RSL_IO -DRSL -DDM_PARALLEL \
                        -DIWORDSIZE=4 -DDWORDSIZE=8 -DRWORDSIZE=$(RWORDSIZE) -DLWORDSIZE=4 -DNETCDF \
                        -DTRIEDNTRUE   \
   -DLIMIT_ARGS -DLANDREAD_STUB
INCLUDE_MODULES =       -module ../main -I../external/io_netcdf -I../external/io_int -I../external/esmf_time_f90 \
                        -I../external -I../frame -I../share -I../phys -I../inc
PERL            =       perl
REGISTRY        =       Registry
LIB             =       -L../external/io_netcdf -lwrfio_nf -L/usr/local/netcdf-3.6.1/lib -lnetcdf -L../external/RSL/RSL -lrsl \
                        -L../external/io_grib1 -lio_grib1 \
   -L../external/io_int -lwrfio_int \
   ../frame/module_internal_header_util.o ../frame/pack_utils.o -L../external/esmf_time_f90 -lesmf_time 
LDFLAGS         =       -byteswapio 
ENVCOMPDEFS = 
WRF_CHEM = 0 
CPP             =       /lib/cpp -C -P -traditional
POUND_DEF = -DNO_RRTM_PHYSICS  -traditional $(COREDEFS) -DNONSTANDARD_SYSTEM -DF90_STANDALONE -DCONFIG_BUF_LEN=$(CONFIG_BUF_LEN) -DMAX_DOMAINS_F=$(MAX_DOMAINS)
CPPFLAGS        =       -I$(LIBINCLUDE) -C -P $(ARCHFLAGS) -I../external/RSL/RSL -C -P `cat ../inc/dm_comm_cpp_flags` $(ENVCOMPDEFS) $(POUND_DEF)
AR              =       ar ru
M4              =       m4
RANLIB          =       ranlib
NETCDFPATH = /usr/local/netcdf-3.6.1
CC_TOOLS        =       cc

externals : wrf_ioapi_includes wrfio_nf wrfio_grib1 ../external/RSL/RSL/librsl.a wrfio_int gen_comms.c module_dm.F esmf_time

gen_comms.c : ../external/RSL/gen_comms.c
 ( /bin/cp ../tools/gen_comms_warning ../tools/gen_comms.c ; \
          cat ../external/RSL/gen_comms.c >> ../tools/gen_comms.c )

module_dm.F : ../external/RSL/module_dm.F
 ( /bin/cp module_dm_warning module_dm.F ; cat ../external/RSL/module_dm.F >> module_dm.F )

wrfio_nf :
 ( cd ../external/io_netcdf ; \
   make NETCDFPATH=/usr/local/netcdf-3.6.1 RANLIB="$(RANLIB)" CPP="$(CPP)" FC="$(FC) $(PROMOTION) $(FCFLAGS) -w" TRADFLAG="-traditional" )

wrfio_int :
 ( cd ../external/io_int ; \
          make CC="$(CC)" RANLIB="$(RANLIB)" CPP="$(CPP)" SFC="$(SFC) $(PROMOTION) $(FCDEBUG) $(FCBASEOPTS)" FC="$(SFC) $(PROMOTION) $(FCDEBUG) $(FCBASEOPTS) -w" TRADFLAG="-traditional" all )

wrfio_grib1 :
 ( cd ../external/io_grib1 ; \
          make CC="$(SCC)" CFLAGS="$(CFLAGS)" RANLIB="$(RANLIB)" CPP="$(CPP)" FC="$(SFC) $(PROMOTION) -I. $(FCDEBUG) $(FCBASEOPTS) -w" TRADFLAG="-traditional" )

esmf_time : 
 ( cd ../external/esmf_time_f90 ; \
   make FC="$(FC) $(PROMOTION) $(FCDEBUG) $(FCBASEOPTS)" RANLIB="$(RANLIB)" CPP="$(CPP) -I../../inc -I. $(POUND_DEF)" )

../external/RSL/RSL/librsl.a :
 ( cd ../external/RSL/RSL ; make CC="$(CC)" FC="$(FC) $(PROMOTION) -byteswapio" MAX_DOMAINS=$(MAX_DOMAINS) MAX_PROC=$(MAX_PROC) LEARN_BCAST=-DLEARN_BCAST linux )

# compile these without high optimization to speed compile
solve_interface.o : solve_interface.F
shift_domain_em.o : shift_domain_em.F
module_io_mm5.o : module_io_mm5.F
module_si_io.o : module_si_io.F
module_io_wrf.o : module_io_wrf.F
module_domain.o : module_domain.F
module_start.o : module_start.F
module_initialize.o : module_initialize.F
module_initialize_b_wave.o : module_initialize_b_wave.F
module_initialize_hill2d_x.o : module_initialize_hill2d_x.F
module_initialize_quarter_ss.o : module_initialize_quarter_ss.F
module_initialize_squall2d_x.o : module_initialize_squall2d_x.F
module_initialize_squall2d_y.o : module_initialize_squall2d_y.F
module_initialize_real.o : module_initialize_real.F
module_dm.o : module_dm.F
start_domain.o : start_domain.F
mediation_integrate.o : mediation_integrate.F
module_configure.o : module_configure.F

solve_interface.o                                              \
shift_domain_em.o                                              \
module_io_mm5.o module_si_io.o module_io_wrf.o module_domain.o \
module_start.o module_initialize.o module_initialize_b_wave.o  \
module_initialize_hill2d_x.o module_initialize_quarter_ss.o    \
module_initialize_squall2d_x.o module_initialize_squall2d_y.o  \
mediation_integrate.o \
module_configure.o                                             \
module_initialize_real.o module_dm.o start_domain.o :
 $(RM) $@
 $(SED_FTN) $*.F > $*.b 
 $(CPP) -I../inc $(CPPFLAGS) $*.b  > $*.f
 $(RM) $*.b
 $(FC) -c $(PROMOTION) $(FCBASEOPTS) $(MODULE_DIRS) $*.f

###########################################################
#
# Macros, these should be generic for all machines

LN = ln -sf
MAKE = make -i -r
RM =  rm -f

# There is probably no reason to modify these rules

wrf_ioapi_includes :
 ( cd ../external/ioapi_share ; \
 $(MAKE) NATIVE_RWORDSIZE="$(NATIVE_RWORDSIZE)" RWORDSIZE="$(RWORDSIZE)" )

.F.i:
 $(RM) $@
 $(CPP) -I../inc $(CPPFLAGS) $*.F > $@
 mv $*.i $(DEVTOP)/pick/$*.f
 cp $*.F $(DEVTOP)/pick

.F.o:
 $(RM) $@
 $(SED_FTN) $*.F > $*.b 
 $(CPP) -I../inc $(CPPFLAGS) $*.b  > $*.f
 $(RM) $*.b
 $(FC) -c $(FCFLAGS) $(MODULE_DIRS) $(PROMOTION) $*.f
# if [ ! -e $@ ] ; then \
# sleep 10 ; $(FC) -c $(FCFLAGS) $(MODULE_DIRS) $(PROMOTION) $*.f ; \
# fi
# if [ ! -e $@ ] ; then \
# sleep 10 ; $(FC) -c $(FCFLAGS) $(MODULE_DIRS) $(PROMOTION) $*.f ; \
# fi

.F.f:
 $(RM) $@
 $(SED_FTN) $*.F > $*.b 
 $(CPP) -I../inc $(CPPFLAGS) $*.b  > $@
 $(RM) $*.b

.f.o:
 $(RM) $@
 $(FC) -c $(FCFLAGS) $(PROMOTION) $*.f
# if [ ! -e $@ ] ; then \
# sleep 10 ; $(FC) -c $(FCFLAGS) $(PROMOTION) $*.f ; \
# fi
# if [ ! -e $@ ] ; then \
# sleep 10 ; $(FC) -c $(FCFLAGS) $(PROMOTION) $*.f ; \
# fi

.c.o:
 $(RM) $@
 $(CC) -c $(CFLAGS) $*.c


 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mailman.ucar.edu/pipermail/wrf-users/attachments/20070125/34628f21/attachment.html


More information about the Wrf-users mailing list