[Dart-dev] [10236] DART/trunk/observations/gps/shell_scripts: latest updates to handle automating generating gps

nancy at ucar.edu nancy at ucar.edu
Mon May 2 15:27:07 MDT 2016


Revision: 10236
Author:   nancy
Date:     2016-05-02 15:27:06 -0600 (Mon, 02 May 2016)
Log Message:
-----------
latest updates to handle automating generating gps
obs from multiple satellites.

Modified Paths:
--------------
    DART/trunk/observations/gps/shell_scripts/README
    DART/trunk/observations/gps/shell_scripts/do_convert.csh
    DART/trunk/observations/gps/shell_scripts/gpsro_to_obsseq.csh

Added Paths:
-----------
    DART/trunk/observations/gps/shell_scripts/do_convert.lsf

-------------- next part --------------
Modified: DART/trunk/observations/gps/shell_scripts/README
===================================================================
--- DART/trunk/observations/gps/shell_scripts/README	2016-05-02 20:36:53 UTC (rev 10235)
+++ DART/trunk/observations/gps/shell_scripts/README	2016-05-02 21:27:06 UTC (rev 10236)
@@ -4,16 +4,18 @@
 #
 # DART $Id$
 
-Description of the scripts provided to process the COSMIC and 
-CHAMP GPS radio occultation data.
+Description of the scripts provided to process the COSMIC, CHAMP, GRACE and 
+other GPS radio occultation data.
 
 Summary of workflow:  
 1) cd to the ../work directory and run ./quickbuild.csh to compile everything.  
 2) Edit ./gpsro_to_obsseq.csh once to set the directory where the DART
     code is installed, and your CDAAC web site user name and password.
-3) Edit ./do_convert.csh to set the days of data to download/convert/remove.
+3) Edit ./do_convert.csh to set the days of data to download/convert/remove,
+   and what directory you want the work to be done in.
 4) Run ./do_convert.csh
 5) For additional days repeat steps 3 and 4.
+6) For long conversions, edit ./do_convert.lsf and submit it to the batch system.
 
 
 More details:
@@ -61,5 +63,9 @@
 the conversion has succeeded.  But if you have file quota concerns
 this allows you to keep the total disk usage lower.
 
+My recommendation is to run a few days without the autodelete option and
+check the logs carefully.  When it seems to be working correctly, turn 
+the autodelete option on before doing a long run.
 
 
+

Modified: DART/trunk/observations/gps/shell_scripts/do_convert.csh
===================================================================
--- DART/trunk/observations/gps/shell_scripts/do_convert.csh	2016-05-02 20:36:53 UTC (rev 10235)
+++ DART/trunk/observations/gps/shell_scripts/do_convert.csh	2016-05-02 21:27:06 UTC (rev 10236)
@@ -16,13 +16,13 @@
 # -------------------
 
 # set the first and last days.  can roll over month and year boundaries.
-set start_year=2009
-set start_month=9
+set start_year=2010
+set start_month=1
 set start_day=1
 
-set end_year=2009
-set end_month=9
-set end_day=3
+set end_year=2010
+set end_month=1
+set end_day=7
 
 
 # for each day: download the data or not, convert to daily obs_seq files 
@@ -40,22 +40,40 @@
 # - only select one of reprocessed or realtime for a particular
 #   satellite or you will get duplicate observations.
 
+# WARNING: this table is almost certainly out of date.  check the CDAAC web 
+# site for the currently available days.
+#
+# champ2014:  latest reprocessing of original champ data : 2001.138 - 2008.279
+# cnofs:     Air Force C/NOFS : 2010.060 - 2011.365
+# cnofsrt:   C/NOFS realtime : 2012.001 - 2015.193
+# cosmic2013: COSMIC reprocessed : 2006.112 - 2014.120
+# cosmic:    COSMIC : 2014.121 - 2015.150
+# cosmicrt:  COSMIC realtime : 2014.181 - now* (2015.237)
+# gpsmet:    ? : 1995.111 - 1997.047
+# gpsmetas:  ? : 1995.237 - 1997.016
+# champ:     CHAMP : unavailable now
+# grace:     Grace-A : 2007.059 - now* (2015.089)
+# metopa2016:  Metop-A/GRAS reprocessed in 2016 : 2007.273 - 2011.364
+# metopa:    Metop-A/GRAS : 2012.001 - 2015.059
+# metopb:    Metop-B/GRAS : 2013.032 - 2015.059
+# sacc:      Argentinan SAC-C : 2006.068 - 2011.215
+# saccrt:    SAC-C realtime : 2011.329 - 2013.226
+# tsx:       German TerraSAR-X : 2008.041 - now* (2015.058)
+
+# which satellites to include:
 rm -fr satlist
-echo cosmic      >>! satlist  # all 6 COSMIC : 2006.194 - now*
-## echo cosmicrt >>! satlist  # COSMIC : realtime
+echo cnofs       >>! satlist  # new Air Force C/NOFS : 2010.335 - now*
+echo cosmic2013  >>! satlist  # all 6 COSMIC : 2006.194 - now*, reprocessed 2013
+echo grace       >>! satlist  # Grace-A : 2007.059 - now*
+echo metopa2016  >>! satlist  # Metop-A/GRAS : 2008.061 - now*, reprocessed 2016
 echo sacc        >>! satlist  # Argentinan SAC-C : 2006.068 - now*
-## echo saccrt   >>! satlist  # SAC-C : realtime
-echo ncofs       >>! satlist  # new Air Force C/NOFS : 2010.335 - now*
-## echo ncofsrt  >>! satlist  # C/NOFS : realtime
-echo grace       >>! satlist  # Grace-A : 2007.059 - now*
 echo tsx         >>! satlist  # German TerraSAR-X : 2008.041 - now*
-echo metopa      >>! satlist  # Metop-A/GRAS : 2008.061 - now*
-echo champ       >>! satlist  # CHAMP : 2001.139 - 2008.274
 
 
 # where to download the data and do the conversions, relative to
-# this shell_scripts directory.
-set datadir = ../gpsro
+# this shell_scripts directory.  the script below will add YYYYMM
+# to the end of this string.
+set datadir = /glade/p/image/Observations/GPS/staged
 
 # end of things you should have to set in this script
 
@@ -112,7 +130,7 @@
 
   # THE WORK HAPPENS HERE:  call the convert script for each day.
 
-  ./gpsro_to_obsseq.csh ${year}${month}${day} $datadir \
+  ./gpsro_to_obsseq.csh ${year}${month}${day} $datadir/${year}${month} \
                          $do_download $do_convert $do_delete ./satlist
 
 
@@ -120,7 +138,7 @@
   set curday=`echo ${year}${month}${day}00 +1d | ./advance_time`
 
   # advance the loop counter
-  @ d += 1
+  @ d++
  
 end
 

Added: DART/trunk/observations/gps/shell_scripts/do_convert.lsf
===================================================================
--- DART/trunk/observations/gps/shell_scripts/do_convert.lsf	                        (rev 0)
+++ DART/trunk/observations/gps/shell_scripts/do_convert.lsf	2016-05-02 21:27:06 UTC (rev 10236)
@@ -0,0 +1,26 @@
+#!/bin/csh
+#
+# DART software - Copyright 2004 - 2013 UCAR. This open source software is
+# provided by UCAR, "as is", without charge, subject to all terms of use at
+# http://www.image.ucar.edu/DAReS/DART/DART_download
+#
+# DART $Id$
+#
+# wrapper to run the convert script in the batch queue.
+# all the settings are in do_convert.csh
+# submit this script in the directory containing do_convert.csh
+# (usually DART/observations/gps/shell_scripts)
+
+#BSUB -J conv_gps
+#BSUB -o conv_gps_%J.out
+#BSUB -e conv_gps_%J.err
+#BSUB -q caldera
+#BSUB -P xxxxxxxxx
+#BSUB -W 24:00
+#BSUB -n 1
+
+# -------------------
+
+./do_convert.csh
+exit
+


Property changes on: DART/trunk/observations/gps/shell_scripts/do_convert.lsf
___________________________________________________________________
Added: svn:executable
   + *
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + Date Rev Author HeadURL Id
Added: svn:eol-style
   + native

Modified: DART/trunk/observations/gps/shell_scripts/gpsro_to_obsseq.csh
===================================================================
--- DART/trunk/observations/gps/shell_scripts/gpsro_to_obsseq.csh	2016-05-02 20:36:53 UTC (rev 10235)
+++ DART/trunk/observations/gps/shell_scripts/gpsro_to_obsseq.csh	2016-05-02 21:27:06 UTC (rev 10236)
@@ -1,4 +1,4 @@
-#!/bin/csh
+#!/bin/csh 
 #
 # DART software - Copyright 2004 - 2013 UCAR. This open source software is
 # provided by UCAR, "as is", without charge, subject to all terms of use at
@@ -31,19 +31,25 @@
 #
 # the processing directory name is relative to the 'work' directory.
 #
-# the options for satellite names, and available data times are:
+# CHECK these against the cdaac web site for latest info.  as an
+# example, a snapshot of available data as of aug 2015 is:
 #
-#    cosmic:      all 6 COSMIC : 2006.194 - now*
-#    cosmic-2013: reprocessed COSMIC : 2006.194 - now*
-#    cosmicrt:    COSMIC : realtime
-#    champ:       CHAMP : 2001.139 - 2008.274
-#    grace:       Grace-A : 2007.059 - now*
-#    tsx:         German TerraSAR-X : 2008.041 - now*
-#    metopa:      Metop-A/GRAS : 2008.061 - now*
-#    sacc:        Argentinan SAC-C : 2006.068 - now*
-#    saccrt:      SAC-C : realtime
-#    ncofs:       new Air Force C/NOFS : 2010.335 - now*
-#    ncofsrt:     C/NOFS : realtime
+#    champ2014:  latest reprocessing of original champ data : 2001.138 - 2008.279
+#    champ:     CHAMP : superceeded by reprocessed data
+#    cnofs:     Air Force C/NOFS : 2010.060 - 2011.365
+#    cnofsrt:   C/NOFS realtime : 2012.001 - 2015.193
+#    cosmic2013: COSMIC reprocessed : 2006.112 - 2014.120
+#    cosmic:    COSMIC : 2014.121 - 2015.150
+#    cosmicrt:  COSMIC realtime : 2014.181 - now* (2015.237)
+#    gpsmet:    ? : 1995.111 - 1997.047
+#    gpsmetas:  ? : 1995.237 - 1997.016
+#    grace:     Grace-A : 2007.059 - now* (2015.089)
+#    metopa2016:  Metop-A/GRAS reprocessed : 2007.274 - 2015.365
+#    metopa:    Metop-A/GRAS : 2012.001 - 2015.059
+#    metopb:    Metop-B/GRAS : 2013.032 - 2015.059
+#    sacc:      Argentinan SAC-C : 2006.068 - 2011.215
+#    saccrt:    SAC-C realtime : 2011.329 - 2013.226
+#    tsx:       German TerraSAR-X : 2008.041 - now* (2015.058)
 #
 #  - dates are YYYY.DDD where DDD is day number in year
 #  - now* means current date minus 3-4 months.  reprocessed data
@@ -61,43 +67,15 @@
 # 
 #
 # ------- 
-# From the CDAAC web site about the use of 'wget' to download
-# the many files needed to do this process:
+# This script uses the CDAAC web site with 'wget' to download
+# the files needed to do this process.  They are available as
+# a single tar file per satellite per day which contains all
+# available profiles for that day.  For example:
 #
-#   Hints for using wget for fetching CDAAC files from CDAAC:
+#    http://cdaac-www.cosmic.ucar.edu/cdaac/rest/tarservice/data/cosmic/atmPrf/2012.304
+#        -O cosmic_atmPrf_2012.304.tar 
 #   
-#   Here is one recipe for fetching all cosmic real time atmPrf files for one day:
-#   
-#   wget -nd -np -r -l 10 -w 2 --http-user=xxxx --http-passwd=xxxx \
-#         http://cosmic-io.cosmic.ucar.edu/cdaac/login/cosmicrt/level2/atmPrf/2009.007/
-#   
-# (note - now http://cdaac-www.cosmic.ucar.edu/...)
 #
-#   The option -np (no parents) is important. Without it, all manner of 
-#   files from throughout the site will be loaded, I think due to the 
-#   links back to the main page which are everywhere.
-#   
-#   The option -r (recursive fetch) is necessary unless there is just 
-#   one file you want to fetch.
-#   
-#   The option -l 10 (limit of recursive depth to 10 levels) is necessary 
-#   in order to get around the default 5 level depth.
-#   
-#   The option -nd dumps all fetched files into your current directory. 
-#   Otherwise a directory hierarchy will be created: 
-#     cosmic-io.cosmic.ucar.edu/cdaac/login/cosmic/level2/atmPrf/2006.207
-#   
-#   The option -w 2 tells wget to wait two seconds between each file fetch 
-#   so as to have pity on the poor web server.
-# ------- 
-# 
-# note: there are between 1000 and 3000 files per day.  without the -w
-# flag i was getting about 5 files per second (each individual file is
-# relatively small).  but with -w 1 obviously we get slightly less than
-# a file a second, -w 2 is half that again.  this script uses -w 1 by
-# default, but if you are trying to download a lot of days i'd recommend
-# removing it.
-#
 ########################################################################
 
 ########################################################################
@@ -107,23 +85,23 @@
 # utility to download files from the web page.  
 
 # top level directory (root where observations/gps dir is found)
-setenv DART_DIR    /home/user/DART
-set cdaac_user    = username
-set cdaac_pw      = password
+setenv DART_DIR    /glade/p/home/$USER/DART/trunk
+set cdaac_user    = nscollins
+set cdaac_pw      = xxxxxxxx
 
-# old site, still seems to work:
-#set gps_repository_path = 'http://cosmic-io.cosmic.ucar.edu/cdaac/login'
-# new site:
-set gps_repository_path = 'http://cdaac-www.cosmic.ucar.edu/cdaac/login'
+# CDAAC web site path:
+set gps_repository_path = 'http://cdaac-www.cosmic.ucar.edu/cdaac/rest/tarservice/data'
 
 setenv DART_WORK_DIR  ${DART_DIR}/observations/gps/work
 setenv CONV_PROG      convert_cosmic_gps_cdf
 setenv DATE_PROG      advance_time
 
-# if you are in a hurry, this has no delay between requests:
-#set wget_cmd           = 'wget -q -nd -np -r -l 10 -w 0'
-set wget_cmd            = 'wget -q -nd -np -r -l 10 -w 1'
+# wget seems to print a lot of output showing progress.
+# this flag tries to print less often but i haven't found
+# the way to turn it off completely.
+set wget_cmd            = 'wget --progress=dot:mega '
 
+# this helps with debugging
 set chatty=yes
 
 if ($# != 6) then
@@ -262,13 +240,14 @@
       if ( $chatty == 'yes' ) then
          echo 'copying data files from satellite: ' ${sat}
       endif
-      ${get} ${gps_repository_path}/${sat}/level2/atmPrf/${yyyy}.${mday}/
-      rm -f *.html *.txt
+      # new, tar file per day!
+      echo ${get} ${gps_repository_path}/${sat}/atmPrf/${yyyy}.${mday} -O ${sat}_atmPrf_${yyyy}.${mday}.tar
+      ${get} ${gps_repository_path}/${sat}/atmPrf/${yyyy}.${mday} -O ${sat}_atmPrf_${yyyy}.${mday}.tar
    end
    rm input.nml
    
    if ( $chatty == 'yes' ) then
-      echo `/bin/ls . | grep _nc | wc -l` 'raw files downloaded at ' `date`
+      echo `/bin/ls *_atmPrf_${yyyy}.${mday}.tar | wc -l` 'tar files downloaded at ' `date`
    endif
    
    cd ${DART_WORK_DIR}
@@ -284,34 +263,57 @@
    if ( $chatty == 'yes' ) then
       echo 'starting gpsro conversion at ' `date`
    endif
-   
-   rm -f flist
+     
    set yyyy    = `echo $datea | cut -b1-4`
+   set yyyymm  = `echo $datea | cut -b1-6`
    set jyyyydd = `echo ${datea}00 0 -j | ./${DATE_PROG}`
    @ mday = $jyyyydd[2] + 1000  ;  set mday = `echo $mday | cut -b2-4`
    echo 'converting obs for date: ' $datea
+
+   cd ${datea}
+   cp ../input.nml .
+   echo 'current dir now ' `pwd`
+
+   # make sure directory has no leftovers from before
+   rm -fr atmPrf*_nc flist obs_seq.gpsro
+
+   foreach sat ( `cat ../$satlist` )
+     echo 'converting obs for satellite: ' $sat
+
+     set next_tarfile = ${sat}_atmPrf_${yyyy}.${mday}.tar 
+
+     if ( ! -e $next_tarfile || -z $next_tarfile ) then
+       echo $next_tarfile NOT DOWNLOADED or ZERO LENGTH, SKIPPING
+       continue
+     else
+       echo untarring $next_tarfile into daily profiles
+     endif
+
+     tar --strip-components=3 -xf $next_tarfile
+     /bin/ls -1 atmPrf_*.${yyyy}.${mday}.*_nc >! flist
+     
+     set nfiles = `cat flist | wc -l`
+     if ( $chatty == 'yes' ) then
+      echo $nfiles $sat ' profiles to process for day ' $datea 
+     endif
    
-   /bin/ls -1 ${datea}/*.${yyyy}.${mday}.*_nc >! flist
-   
-   set nfiles = `cat flist | wc -l`
-   if ( $chatty == 'yes' ) then
-      echo $nfiles ' to process for file ' $datea 
-   endif
-   
-   ./${CONV_PROG} >>! convert_output_log
+     ../${CONV_PROG} >>! ../convert_output_log
 
-   rm -rf flist
-   #rm -rf cosmic_gps_input.nc flist
+     # keep tar files but remove individual profile files so next
+     # pass doesn't add them into the output file.
+     rm -rf flist atmPrf_*.${yyyy}.${mday}.*_nc 
+   end
+
    if ( -e obs_seq.gpsro ) then
-       mv obs_seq.gpsro obs_seq.gpsro_${datea}
-   
-      if ( $chatty == 'yes' ) then
-         echo "all observations for day in file obs_seq.gpsro_${datea} at " `date`
-      endif
+     mv obs_seq.gpsro ../obs_seq.gpsro_${datea}
+     
+     if ( $chatty == 'yes' ) then
+       echo "all observations for day in file obs_seq.gpsro_${datea} at " `date`
+     endif
    else
-      if ( $chatty == 'yes' ) then
-         echo "no obs found for date ${datea}, or conversion failed.'
-      endif
+     if ( $chatty == 'yes' ) then
+       echo "no obs found for date ${datea}, or conversion failed."
+     endif
    endif
 
    cd ${DART_WORK_DIR}


More information about the Dart-dev mailing list