[Dart-dev] [7075] DART/trunk/obs_sequence: at the request of soyoung, i added support in this tool for matching

nancy at ucar.edu nancy at ucar.edu
Fri Jul 18 14:24:50 MDT 2014


Revision: 7075
Author:   nancy
Date:     2014-07-18 14:24:49 -0600 (Fri, 18 Jul 2014)
Log Message:
-----------
at the request of soyoung, i added support in this tool for matching
in the vertical as well as horizontal, and to let the user specify
tolerances for how close is 'close enough'.  she had observations
which were supposed to be the same, but had been processed by
different software and the locations varied a bit more than just
epsilon, but should have been considered the same location.
letting her specify a lat/lon tolerance solved that problem.
seemed generally useful so i'm committing this to the trunk.

Modified Paths:
--------------
    DART/trunk/obs_sequence/obs_selection.f90
    DART/trunk/obs_sequence/obs_selection.html
    DART/trunk/obs_sequence/obs_selection.nml

-------------- next part --------------
Modified: DART/trunk/obs_sequence/obs_selection.f90
===================================================================
--- DART/trunk/obs_sequence/obs_selection.f90	2014-07-18 20:00:34 UTC (rev 7074)
+++ DART/trunk/obs_sequence/obs_selection.f90	2014-07-18 20:24:49 UTC (rev 7075)
@@ -29,7 +29,9 @@
                              open_file, close_file, finalize_utilities
 use     location_mod, only : location_type, get_location, set_location, &
                              LocationName, read_location, operator(==), &
-                             write_location
+                             write_location, VERTISSURFACE, VERTISHEIGHT, &
+                             VERTISLEVEL, VERTISPRESSURE, VERTISSCALEHEIGHT, &
+                             VERTISUNDEF, query_location
 use      obs_def_mod, only : obs_def_type, get_obs_def_time, get_obs_kind, &
                              get_obs_def_location, read_obs_def
 use     obs_kind_mod, only : max_obs_kinds, get_obs_kind_name, get_obs_kind_index, &
@@ -97,8 +99,17 @@
 logical              :: process_file(max_num_input_files)
 
 character(len = 129) :: selections_file = 'obsdef_mask.txt'
+logical              :: selections_is_obs_seq = .false.
 
-logical  :: selections_is_obs_seq = .false.
+! max differences allowed when deciding a location is the same
+real(r8) :: latlon_tolerance      = 0.000001 ! horizontal, degrees
+real(r8) :: surface_tolerance     = 0.0001   ! vertical, meters
+real(r8) :: pressure_tolerance    = 0.001    ! vertical, pascals
+real(r8) :: height_tolerance      = 0.0001   ! vertical, meters
+real(r8) :: scaleheight_tolerance = 0.001    ! vertical, pressure ratio
+real(r8) :: level_tolerance       = 0.00001  ! vertical, fractional model levels
+
+logical  :: match_vertical        = .false.
 logical  :: print_only            = .false.
 logical  :: partial_write         = .false.
 logical  :: print_timestamps      = .false.
@@ -108,7 +119,9 @@
 namelist /obs_selection_nml/ &
          num_input_files, filename_seq, filename_seq_list, filename_out, &
          selections_file, selections_is_obs_seq, print_only, calendar,   &
-         print_timestamps, partial_write
+         print_timestamps, partial_write, latlon_tolerance,              &
+         surface_tolerance, pressure_tolerance, height_tolerance,        &
+         scaleheight_tolerance, level_tolerance, match_vertical
 
 !----------------------------------------------------------------
 ! Start of the program:
@@ -1134,7 +1147,7 @@
     if (base_obs_type /= test_obs_type) cycle
 
     test_obs_loc = get_obs_def_location(selection_list(i))
-    if ( .not. horiz_location_equal(base_obs_loc, test_obs_loc)) cycle
+    if ( .not. same_location(base_obs_loc, test_obs_loc, match_vertical)) cycle
 
     ! all match - good return.
     num_good_searched = i - startindex + 1
@@ -1160,27 +1173,73 @@
 end subroutine destroy_selections
 
 !---------------------------------------------------------------------------
-function horiz_location_equal(loc1,loc2)
+function same_location(loc1,loc2,vert_flag)
 
-! function to compare only the lat & lon and ignore the vert location.
+! compares two locations.
+! the vert_flag specifies whether to consider only the horizontal values
+! of lat/lon, or whether to include the vertical coordinate.
+!
+! if including the vertical, the two coordinate systems must be
+! the same (pressure, height, level, surface, scaleheight, undef).
+! the tolerances for saying the two locations are the same is a namelist
+! item setting.  horizontal values are in degrees.  if that tolerance 
+! is specified as <= 0, allow the lowest bit to differ from roundoff error.
+! if vert_flag is true, vert must also match within tolerance to return same.
+! if vert_flag is false, just use the horizontal.
 
 type(location_type), intent(in) :: loc1, loc2
-logical                         :: horiz_location_equal
+logical, intent(in)             :: vert_flag
+logical                         :: same_location
 
-real(r8) :: l1(3), l2(3)
+real(r8) :: larray1(3), larray2(3)
+integer  :: whichv1, whichv2
 
-l1 = get_location(loc1)
-l2 = get_location(loc2)
+larray1 = get_location(loc1)
+larray2 = get_location(loc2)
 
-horiz_location_equal = .false.
+! strategy is to assume the locations don't match, and when
+! you know this you can immediately return.  if you get all
+! the way to the end of this routine, then they are the same.
 
-if ( abs(l1(1)  - l2(1) ) > epsilon(l1(1) ) ) return
-if ( abs(l1(2)  - l2(2) ) > epsilon(l1(2) ) ) return
+same_location = .false.
 
-horiz_location_equal = .true.
+if (latlon_tolerance <= 0.0_r8) then
+   if ( abs(larray1(1) - larray2(1)) > epsilon(larray1(1)) ) return
+   if ( abs(larray1(2) - larray2(2)) > epsilon(larray1(2)) ) return
+else
+   if ( abs(larray1(1) - larray2(1)) > latlon_tolerance ) return
+   if ( abs(larray1(2) - larray2(2)) > latlon_tolerance ) return
+endif
 
-end function horiz_location_equal
+if (vert_flag) then
+   whichv1 = nint(query_location(loc1))
+   whichv2 = nint(query_location(loc2))
+   if (whichv1 /= whichv2) return
 
+   select case(whichv1) 
+      case (VERTISUNDEF)
+         continue
+      case (VERTISSURFACE)
+         if ( abs(larray1(3) - larray2(3)) > surface_tolerance ) return
+      case (VERTISLEVEL)
+         if ( abs(larray1(3) - larray2(3)) > level_tolerance ) return
+      case (VERTISPRESSURE)
+         if ( abs(larray1(3) - larray2(3)) > pressure_tolerance ) return
+      case (VERTISHEIGHT)
+         if ( abs(larray1(3) - larray2(3)) > height_tolerance ) return
+      case (VERTISSCALEHEIGHT)
+         if ( abs(larray1(3) - larray2(3)) > scaleheight_tolerance ) return
+      case default
+         write(msgstring, *) 'unrecognized key for vertical type: ', whichv1
+         call error_handler(E_ERR, 'same_location', msgstring, source, revision, revdate)
+   end select
+ 
+endif
+
+same_location = .true.
+
+end function same_location
+
 !---------------------------------------------------------------------------
 function get_time_from_obs(this_obs)
   type(obs_type), intent(in) :: this_obs

Modified: DART/trunk/obs_sequence/obs_selection.html
===================================================================
--- DART/trunk/obs_sequence/obs_selection.html	2014-07-18 20:00:34 UTC (rev 7074)
+++ DART/trunk/obs_sequence/obs_selection.html	2014-07-18 20:24:49 UTC (rev 7075)
@@ -36,16 +36,28 @@
 <H2>Overview</H2>
 
 <P>
-This specialized tool selects out a subset of the input observations.
-For a more general purpose tool, see the
+This specialized tool selects a subset of input observations
+from an observation sequence file.
+For a more general purpose observation sequence file tool, see the
 <a href="obs_sequence_tool.html">obs_sequence_tool</a>.
-The tool which creates the input selection file is
-<a href="obs_seq_coverage.html">obs_seq_coverage</a>.
 This tool takes a selected list of observation
 types, times, and locations, and extracts only the matching
-observations out of a longer set of obs_sequence files.
+observations out of one or more obs_sequence files.
+The tool which creates the input selection file is usually
+<a href="obs_seq_coverage.html">obs_seq_coverage</a>.
+Alternatively, the selection file can be a full observation
+sequence file, in which case the types, times, and locations
+of those observations are used as the selection criteria.
 </P>
 <P>
+This tool processes each observation sequence file listed
+in the input namelist <em class=code>filename_seq</em> or 
+<em class=code>filename_seq_list</em>.  If the observation 
+type, time and location matches an entry in the selection
+file, it is copied through to the output.
+Otherwise it is ignored.
+</P>
+<P>
 The actions of the <em class=code>obs_selection</em> program
 are controlled by a Fortran namelist, read from a file named
 <em class=file>input.nml</em> in the current directory.  A detailed
@@ -76,9 +88,19 @@
    filename_seq          = 'obs_seq.out', 
    filename_seq_list     = '', 
    filename_out          = 'obs_seq.processed', 
+   num_input_files       = 0,
    selections_file       = 'obsdef_mask.txt', 
    selections_is_obs_seq = .false.,
+   match_vertical        = .false.,
+   latlon_tolerance      = 0.000001,
+   surface_tolerance     = 0.0001,
+   pressure_tolerance    = 0.001,
+   height_tolerance      = 0.0001,
+   scaleheight_tolerance = 0.001,
+   level_tolerance       = 0.00001,
    print_only            = .false., 
+   partial_write         = .false.,
+   print_timestamps      = .false.,
    calendar              = "Gregorian",
 /
 </pre>
@@ -117,19 +139,91 @@
     <TD>Optional.  The number of observation sequence files to process.
 Maximum of 500.  If 0, the length is set by the number of input files
 given.  If non-zero, must match the given input file list length.
+(Can be used to verify the right number of input files were processed.)
 </TD></TR>
 
 <TR><TD>filename_out</TD>
     <TD>character(len=129)</TD>
     <TD>The name of the resulting output observation sequence file.
+There is only a single output file from this tool.
+If the input specifies multiple obs_seq input files, the results are concatinated 
+into a single output file.
 </TD></TR>
 
 <TR><TD>selections_file</TD>
     <TD>character(len=129)</TD>
     <TD>The name of the input file containing the mask of observation
 definitions (the textfile output of <a href="obs_seq_coverage.html">obs_seq_coverage</a>).
+Alternatively, this can be the name of a full observation sequence file.
+In this case, the types, times, and locations are extracted from this
+file and then used in the same manner as a mask file from the coverage tool.
 </TD></TR>
 
+<TR><TD>selections_is_obs_seq</TD>
+    <TD>logical</TD>
+    <TD>If .TRUE. the filename given for the "selections_file" is a full
+obs_sequence file and not a text file from the coverage tool.
+</TD></TR>
+
+<TR><TD>latlon_tolerance</TD>
+    <TD>real(r8)</TD>
+    <TD>Specified in degrees. 
+For observations to match in the
+horizontal the difference in degrees for each of latitude and longitude
+must be less than this threshold. If less than or equal to 0, the
+values must match exactly.
+</TD></TR>
+
+<TR><TD>match_vertical</TD>
+    <TD>logical</TD>
+    <TD>If .TRUE. the locations of the observations in the input files have 
+to match the selection list not only the horizontal but also in the vertical.
+</TD></TR>
+
+<TR><TD>surface_tolerance</TD>
+    <TD>real(r8)</TD>
+    <TD>Specified in meters. If "match_vertical" is .FALSE. this value is
+ignored.  If "match_vertical" is .TRUE., this applies to observations with 
+a vertical type of VERTISSURFACE.  For observations which match in the
+horizontal, the vertical surface elevation difference must be less than 
+this to be considered the same.
+</TD></TR>
+
+<TR><TD>pressure_tolerance</TD>
+    <TD>real(r8)</TD>
+    <TD>Specified in pascals. If "match_vertical" is .FALSE. this value is
+ignored.  If "match_vertical" is .TRUE., this applies to observations with 
+a vertical type of VERTISPRESSURE.  For observations which match in the
+horizontal, the vertical difference must be less than this to be considered the same.
+</TD></TR>
+
+<TR><TD>height_tolerance</TD>
+    <TD>real(r8)</TD>
+    <TD>Specified in meters. If "match_vertical" is .FALSE. this value is
+ignored.  If "match_vertical" is .TRUE., this applies to observations with 
+a vertical type of VERTISHEIGHT.  For observations which match in the
+horizontal, the vertical difference must be less than this to be considered the same.
+</TD></TR>
+
+<TR><TD>scaleheight_tolerance</TD>
+    <TD>real(r8)</TD>
+    <TD>Specified in unitless values. If "match_vertical" is .FALSE. this value is
+ignored.  If "match_vertical" is .TRUE., this applies to observations with 
+a vertical type of VERTISSCALEHEIGHT.  For observations which match in the
+horizontal, the vertical difference must be less than this to be considered the same.
+</TD></TR>
+
+<TR><TD>level_tolerance</TD>
+    <TD>real(r8)</TD>
+    <TD>Specified in fractional model levels. If "match_vertical" is .FALSE. this value is
+ignored.  If "match_vertical" is .TRUE., this applies to observations with 
+a vertical type of VERTISLEVEL.  For observations which match in the
+horizontal, the vertical difference must be less than this to be considered the same.
+Note that some models only support integer level values, but others support
+fractional levels.  The vertical value in an observation is a floating point/real
+value, so fractional levels are possible to specify for an observation.
+</TD></TR>
+
 <TR><TD>print_only</TD>
     <TD>logical</TD>
     <TD>If .TRUE. do not create an output file, but print a summary of the
@@ -137,6 +231,25 @@
 of observations and types which would have been created in an output file.
 </TD></TR>
 
+<TR><TD>partial_write</TD>
+    <TD>logical</TD>
+    <TD>Generally only used for debugging problems.  After each input obs_seq
+file is processed, this flag, if .TRUE., causes the code to write out the 
+partial results to the output file.  The default is to process all input
+files (if more than a single file is specified) and write the output file
+only at the end of the processing.
+</TD></TR>
+
+<TR><TD>print_timestamps</TD>
+    <TD>logical</TD>
+    <TD>Generally only used for debugging very slow execution runs.
+This flag, if .TRUE., causes the code to output timestamps (wall clock
+time) at various locations during the processing phases.  It may help
+isolate where particularly slow execution times are occurring.
+For very large input files, or long lists of input files, it can also
+help to estimate what the eventual run time of the job will be.
+</TD></TR>
+
 <TR><TD>calendar</TD>
     <TD>character(len=32)</TD>
     <TD>Set to the name of the calendar; only controls the
@@ -174,8 +287,12 @@
 </P>
 <P>Generally the directories where executables are built will include
 a "quickbuild.csh" script which will build and run preprocess and then
-build the rest of the executables.  The "input.nml" namelists will need
-to be edited to include all the required observation types first.
+build the rest of the executables.  The "input.nml" namelist file may 
+need to be edited to include all the required observation types first.
+If this tool is not built, you must add a "path_names_obs_selection"
+and "mkmf_obs_selection" file in the current directory.  One can be
+copied from another model and edited to be consistent with the model
+you are building.
 </P>
 
 
@@ -258,9 +375,12 @@
 Long laundry list of things this tool could do, including:
 </P>
 <UL>
-<LI>Remove duplicates (difficult to accurately compare floating
-    point values, but identical obs should be possible to recognize,
-    or some epsilon could be specified.)</LI>
+<LI>Remove duplicates. Lots of clever bookkeeping will be
+needed to make sure this doesn't run excruciatingly slow.</LI>
+<LI>General superob functions could be added - all observations
+within a given tolerance could be combined.  Hard questions are
+how to specify the error of the combined observation, and how 
+to combine obs near or over the poles.</LI>
 </UL>
 
 <!--==================================================================-->

Modified: DART/trunk/obs_sequence/obs_selection.nml
===================================================================
--- DART/trunk/obs_sequence/obs_selection.nml	2014-07-18 20:00:34 UTC (rev 7074)
+++ DART/trunk/obs_sequence/obs_selection.nml	2014-07-18 20:24:49 UTC (rev 7075)
@@ -1,13 +1,32 @@
 
-# selections_file is a list of obs_defs output
-# from the obs_seq_coverage utility.
+# selections_file is a list of obs_defs output from the obs_seq_coverage 
+# utility.  (optionally can be a full obs_seq file.)
+#
+# tolerances in the horizontal are in degrees, and apply separately
+# to longitude and latitude.  tolerances in the vertical are in:
+#   surface: meters
+#   pressure: pascals
+#   height: meters
+#   scaleheight: pressure ratio
+#   levels: fractional model levels
+#
 
 &obs_selection_nml
-   filename_seq          = 'obs_seq.out', 
-   filename_seq_list     = '', 
-   filename_out          = 'obs_seq.processed', 
-   selections_file       = 'obsdef_mask.txt', 
-   selections_is_obs_seq = .false.,
-   print_only            = .false., 
-   calendar              = "Gregorian",
+   filename_seq          = 'obs_seq.out'
+   filename_seq_list     = ''
+   filename_out          = 'obs_seq.processed'
+   num_input_files       = 0
+   selections_file       = 'obsdef_mask.txt'
+   selections_is_obs_seq = .false.
+   latlon_tolerance      = 0.000001
+   match_vertical        = .false.
+   surface_tolerance     = 0.0001
+   pressure_tolerance    = 0.001
+   height_tolerance      = 0.0001
+   scaleheight_tolerance = 0.001
+   level_tolerance       = 0.00001
+   print_only            = .false.
+   partial_write         = .false.
+   print_timestamps      = .false.
+   calendar              = 'Gregorian'
 /


More information about the Dart-dev mailing list