[Dart-dev] [5705] DART/branches/development/obs_sequence/obs_selection.f90: one more try at making this faster - only go into mask file if we

nancy at ucar.edu nancy at ucar.edu
Wed Apr 18 10:03:30 MDT 2012


Revision: 5705
Author:   nancy
Date:     2012-04-18 10:03:30 -0600 (Wed, 18 Apr 2012)
Log Message:
-----------
one more try at making this faster - only go into mask file if we
know this is an obs type of interest.  also fix a mem leak - if we
left the main processing loop early i didn't delete the existing
obs_sequence structure.  added timestamps in several places (namelist
controlled) in case we encounter another slow case so i can see where
the time is going.  and finally, rename some internal variables to try
to stem the kinds/types confusion.

Modified Paths:
--------------
    DART/branches/development/obs_sequence/obs_selection.f90

-------------- next part --------------
Modified: DART/branches/development/obs_sequence/obs_selection.f90
===================================================================
--- DART/branches/development/obs_sequence/obs_selection.f90	2012-04-17 22:22:54 UTC (rev 5704)
+++ DART/branches/development/obs_sequence/obs_selection.f90	2012-04-18 16:03:30 UTC (rev 5705)
@@ -30,7 +30,7 @@
                              find_namelist_in_file, check_namelist_read, &
                              error_handler, E_ERR, E_MSG, nmlfileunit,   &
                              do_nml_file, do_nml_term, get_next_filename, &
-                             open_file, close_file
+                             open_file, close_file, finalize_utilities
 use     location_mod, only : location_type, get_location, set_location, &
                              LocationName, read_location, operator(==), &
                              write_location
@@ -72,10 +72,11 @@
 logical                 :: is_there_one, is_this_last
 integer                 :: size_seq_in, num_copies_in, num_qc_in
 integer                 :: size_seq_out, num_copies_out, num_qc_out
-integer                 :: num_inserted, iunit, io, i, j
+integer                 :: num_inserted, iunit, io, i, j, ocount
 integer                 :: total_num_inserted, base_index, next_base_index
-integer                 :: max_num_obs, file_id
-integer                 :: first_seq
+integer                 :: max_num_obs, file_id, this_type
+logical, allocatable    :: type_wanted(:)
+integer                 :: first_seq, num_good_called, num_good_searched
 character(len = 129)    :: read_format, meta_data
 logical                 :: pre_I_format, cal
 character(len = 255)    :: msgstring, msgstring1, msgstring2, msgstring3
@@ -103,12 +104,15 @@
 
 logical  :: selections_is_obs_seq = .false.
 logical  :: print_only            = .false.
+logical  :: partial_write         = .false.
+logical  :: print_timestamps      = .false.
 character(len=32) :: calendar     = 'Gregorian'
 
 
 namelist /obs_selection_nml/ &
          num_input_files, filename_seq, filename_seq_list, filename_out, &
-         selections_file, selections_is_obs_seq, print_only, calendar
+         selections_file, selections_is_obs_seq, print_only, calendar,   &
+         print_timestamps, partial_write
 
 !----------------------------------------------------------------
 ! Start of the program:
@@ -147,10 +151,14 @@
 call set_calendar_type(calendar)
 cal = (get_calendar_type() /= NO_CALENDAR)
 
-call read_selection_list(selections_file, selections_is_obs_seq, obs_def_list, obs_def_count)
+call read_selection_list(selections_file, selections_is_obs_seq, obs_def_list, obs_def_count, type_wanted)
 
 ! end of namelist processing and setup
 
+! some statistics for timing/debugging
+num_good_called  = 0
+num_good_searched = 0
+
 ! Read header information for the sequences to see if we need
 ! to accomodate additional copies or qc values from subsequent sequences.
 ! Also, calculate how many observations to be added to the first sequence.
@@ -165,6 +173,8 @@
 
 ! pass 1:
 
+if (print_timestamps) call timestamp(string1='start of pass1', pos='brief')
+
 first_seq = -1
 do i = 1, num_input_files
 
@@ -173,19 +183,13 @@
          'num_input_files and filename_seq mismatch',source,revision,revdate)
    endif
 
-   ! count up the number of observations we are going to eventually have.
-   ! if all the observations in a file are not part of the linked list, the
-   ! output number of observations might be much smaller than the total size in
-   ! the header.  it is slower, but go ahead and read in the entire sequence
-   ! and count up the real number of obs - trim_seq will do the count even if
-   ! it is not trimming in time.  this allows us to create an empty obs_seq
-   ! output file of exactly the right size.
+   ! count up the max number of observations possible if every obs in the
+   ! input file was copied to the output.
 
    call read_obs_seq_header(filename_seq(i), num_copies_in, num_qc_in, &
       size_seq_in, max_num_obs, file_id, read_format, pre_I_format, &
       close_the_file = .true.)
 
-   call destroy_obs_sequence(seq_in)
    if (max_num_obs == 0) then
       process_file(i) = .false.
       write(msgstring,*) 'No obs in input sequence file ', trim(filename_seq(i))
@@ -208,6 +212,8 @@
 
 enddo
 
+if (print_timestamps) call timestamp(string1='end   of pass1', pos='brief')
+ 
 ! no valid obs found?  if the index value is still negative, we are
 ! still waiting to process the first one and never found one.
 if (first_seq < 0 .or. size_seq_out == 0) then
@@ -218,6 +224,8 @@
 
 ! pass 2:
 
+if (print_timestamps) call timestamp(string1='start of pass2', pos='brief')
+
 ! blank line, start of actually creating output file
 call error_handler(E_MSG,' ',' ')
 
@@ -235,8 +243,12 @@
 
    if (.not. process_file(i)) cycle
 
+   write(msgstring, *) 'input file ', i
+   if (print_timestamps) call timestamp(string1='start of '//trim(msgstring), pos='brief')
+
    write(msgstring,*) 'Starting to process input sequence file ', trim(filename_seq(i))
    call error_handler(E_MSG,'obs_selection',msgstring)
+   call timestamp(' at ', pos='brief')
 
    call read_obs_seq(filename_seq(i), 0, 0, 0, seq_in)
 
@@ -278,44 +290,74 @@
    ! NOTE: insert_obs_in_seq CHANGES the obs passed in.
    !       Must pass a copy of incoming obs to insert_obs_in_seq.
    !--------------------------------------------------------------
+   num_good_called = 0
+   num_good_searched = 0
+
    num_inserted = 0
    is_there_one = get_first_obs(seq_in, obs_in)
 
-   if ( is_there_one )  then
+   if ( .not. is_there_one )  then
+      write(msgstring2,*)  'no valid observations in ',trim(filename_seq(i))
+      call error_handler(E_MSG,'obs_selection', 'skipping to next input file', &
+              source, revision, revdate, text2=msgstring2)
 
-      ! figure out the time of the first obs in this file, and set the
-      ! offset for the first item in the selection file that's at this time.
-      t1 = get_time_from_obs(obs_in)
-      base_index = set_base(t1, obs_def_list, obs_def_count)
+      call destroy_obs_sequence(seq_in)
 
-      if (base_index < 0) then
-         write(msgstring2, *) 'skipping all obs in ', trim(filename_seq(i))
-         call error_handler(E_MSG, 'obs_selection', &
-             'first time in obs_sequence file is after all times in selection file', &
-              source, revision, revdate, text2=msgstring2)
-         cycle FILES      
-      endif
-      next_base_index = base_index
+      write(msgstring, *) 'input file ', i
+      if (print_timestamps) call timestamp(string1='end   of '//trim(msgstring), pos='brief')
 
-      if (good_selection(obs_in, obs_def_list, obs_def_count, next_base_index)) then
-         obs_out = obs_in
+      cycle FILES
+   endif
 
-         call insert_obs_in_seq(seq_out, obs_out)  ! new_obs linked list info changes
+   if (print_timestamps) call timestamp(string1='start of first obs', pos='brief')
+   ocount = 1
 
-         prev_obs_out = obs_out            ! records new position in seq_out
-         num_inserted = num_inserted + 1
-      endif
+   ! figure out the time of the first obs in this file, and set the
+   ! offset for the first item in the selection file that's at this time.
+   t1 = get_time_from_obs(obs_in)
+   base_index = set_base(t1, obs_def_list, obs_def_count)
 
-      call get_next_obs(seq_in, obs_in, next_obs_in, is_this_last)
-      ObsLoop : do while ( .not. is_this_last)
+   if (base_index < 0) then
+      write(msgstring2, *) 'skipping all obs in ', trim(filename_seq(i))
+      call error_handler(E_MSG, 'obs_selection', &
+          'first time in obs_sequence file is after all times in selection file', &
+           source, revision, revdate, text2=msgstring2)
+      call destroy_obs_sequence(seq_in)
+      write(msgstring, *) 'input file ', i
+      if (print_timestamps) call timestamp(string1='cyc 1 of '//trim(msgstring), pos='brief')
+      cycle FILES      
+   endif
+   next_base_index = base_index
 
+   if (good_selection(obs_in, obs_def_list, obs_def_count, next_base_index)) then
+      obs_out = obs_in
+
+      call insert_obs_in_seq(seq_out, obs_out)  ! new_obs linked list info changes
+
+      prev_obs_out = obs_out            ! records new position in seq_out
+      num_inserted = num_inserted + 1
+   endif
+
+   call get_next_obs(seq_in, obs_in, next_obs_in, is_this_last)
+   ObsLoop : do while ( .not. is_this_last)
+
+      ocount = ocount + 1
+
+      ! before we fool with checking times and setting offsets into
+      ! the selection list, skip until we are handling an obs type 
+      ! that we care about.
+      this_type = get_type_from_obs(next_obs_in)
+
+      ! support identity obs - cannot exit early for them.
+      if (this_type < 0 .or. type_wanted(this_type)) then
+
          ! if the time of the next obs is different from this one, bump
          ! up the start of the search index offset.
-         t1 = get_time_from_obs(obs_in)
          t2 = get_time_from_obs(next_obs_in)
          if (t1 /= t2) then
             base_index = next_base_index
             next_base_index = set_base(t2, obs_def_list, obs_def_count, base_index)
+            t1 = t2
   
             if (next_base_index < 0) then
                ! next obs in selection file is after all the rest of the obs in this
@@ -323,15 +365,16 @@
                write(msgstring, *) 'done with ', trim(filename_seq(i))
                call error_handler(E_MSG, 'obs_selection', msgstring, &
                     source, revision, revdate)
+               call destroy_obs_sequence(seq_in)
+               write(msgstring, *) 'input file ', i
+               if (print_timestamps) call timestamp(string1='cyc 2 of '//trim(msgstring), pos='brief')
                cycle FILES      
             endif
          endif
 
-         obs_in     = next_obs_in   ! next obs is the current one now
+         if (good_selection(next_obs_in, obs_def_list, obs_def_count, next_base_index)) then
+            obs_out = next_obs_in
 
-         if (good_selection(obs_in, obs_def_list, obs_def_count, next_base_index)) then
-            obs_out = obs_in
-
             ! Since the stride through the observation sequence file is always
             ! guaranteed to be in temporally-ascending order, we can use the
             ! 'previous' observation as the starting point to search for the
@@ -356,28 +399,43 @@
 
          endif
 
-         call get_next_obs(seq_in, obs_in, next_obs_in, is_this_last)
+      endif
 
+      obs_in     = next_obs_in   ! next obs is the current one now
 
-      enddo ObsLoop
+      call get_next_obs(seq_in, obs_in, next_obs_in, is_this_last)
 
-      total_num_inserted = total_num_inserted + num_inserted
+      if (print_timestamps) then
+         if (mod(ocount,10000) == 0) then
+            write(msgstring, *) 'processed obs ', ocount, ' of ', size_seq_in
+            if (print_timestamps) call timestamp(string1=trim(msgstring), pos='brief')
+         endif
+      endif
 
-   else
-      write(msgstring,*)'no first observation in ',trim(filename_seq(i))
-      call error_handler(E_MSG,'obs_selection', msgstring)
-   endif
+   enddo ObsLoop
 
+   total_num_inserted = total_num_inserted + num_inserted
+
+
    if (.not. print_only) then
       print*, '--------------  Obs seq file # :          ', i
       print*, 'Number of obs in previous seq  :          ', size_seq_out
-      print*, 'Number of obs to be  inserted  :          ', size_seq_in
-      print*, 'Number of obs really inserted  :          ', num_inserted
+      print*, 'Number of obs possible to get  :          ', size_seq_in
+      print*, 'Number of obs really accepted  :          ', num_inserted
       print*, '---------------------------------------------------------'
+      if (partial_write) call write_obs_seq(seq_out, filename_out)
    endif
 
    call destroy_obs_sequence(seq_in)
 
+   write(msgstring, *) 'input file ', i
+   if (print_timestamps) call timestamp(string1='end   of '//trim(msgstring), pos='brief')
+
+   ! DEBUG diagnostics
+   !print *, 'size of mask file = ',  obs_def_count
+   !print *, 'average search length = ', num_good_searched / num_good_called
+   !print *, 'number of time search called = ', num_good_called
+
 enddo FILES
 
 write(msgstring,*) 'Starting to process output sequence file ', trim(filename_out)
@@ -406,7 +464,7 @@
 call destroy_obs(     obs_out)
 !call destroy_obs(prev_obs_out)
 
-call timestamp(source,revision,revdate,'end')
+call finalize_utilities()
 
 contains
 
@@ -632,19 +690,16 @@
 logical                 :: is_there_one, is_this_last
 integer                 :: size_seq_in
 integer                 :: i
-integer                 :: this_obs_kind
+integer                 :: this_obs_type
 ! max_obs_kinds is a public from obs_kind_mod.f90 and really is
-! counting the max number of types, not kinds
+! counting the max number of types, not kinds.
 integer                 :: type_count(max_obs_kinds), identity_count
 
 
 ! Initialize input obs_types
-do i = 1, max_obs_kinds
-   type_count(i) = 0
-enddo
+type_count(:) = 0
 identity_count = 0
 
-! make sure there are obs left to process before going on.
 ! num_obs should be ok since we just constructed this seq so it should
 ! have no unlinked obs.  if it might for some reason, use this instead:
 ! size_seq_in = get_num_key_range(seq_in)     !current size of seq_in
@@ -687,12 +742,11 @@
 
 ObsLoop : do while ( .not. is_this_last)
 
-   call get_obs_def(obs, this_obs_def)
-   this_obs_kind = get_obs_kind(this_obs_def)
-   if (this_obs_kind < 0) then
+   this_obs_type = get_type_from_obs(obs)
+   if (this_obs_type < 0) then
       identity_count = identity_count + 1
    else
-      type_count(this_obs_kind) = type_count(this_obs_kind) + 1
+      type_count(this_obs_type) = type_count(this_obs_type) + 1
    endif
 
    call get_next_obs(seq_in, obs, next_obs, is_this_last)
@@ -852,11 +906,12 @@
 
 !---------------------------------------------------------------------
 subroutine read_selection_list(select_file, select_is_seq, &
-                               selection_list, selection_count)
+                               selection_list, selection_count, type_wanted)
  character(len=*),                intent(in)  :: select_file
  logical,                         intent(in)  :: select_is_seq
  type(obs_def_type), allocatable, intent(out) :: selection_list(:)
  integer,                         intent(out) :: selection_count
+ logical,            allocatable, intent(out) :: type_wanted(:)
 
  ! the plan:
  ! open file
@@ -864,7 +919,7 @@
  ! call read_obs_def right number of times
  ! close file
 
- integer :: iunit, count, i, copies, qcs
+ integer :: iunit, count, i, copies, qcs, this_type
  character(len=15) :: label    ! must be 'num_definitions'
  type(obs_type) :: obs, prev_obs
  type(obs_sequence_type) :: seq_in
@@ -890,15 +945,21 @@
      ! set up the mapping table for the kinds here
      call read_obs_kind(iunit, .false.)
     
-     ! this one stays around and is a return from this subroutine
-     allocate(selection_list(count))
+     ! these ones stay around and are returned from this subroutine
+     allocate(selection_list(count), type_wanted(max_obs_kinds))
   
      ! these are temporaries for sorting into time order
      allocate(temp_sel_list(count), temp_time(count), sort_index(count))
     
+     ! assume no types are wanted
+     type_wanted(:) = .false.
+
      ! read into array in whatever order these are in the file
+     ! and bookkeep what types are encountered
      do i = 1, count
          call read_obs_def(iunit, temp_sel_list(i), 0, dummy)
+         this_type = get_obs_kind(temp_sel_list(i))
+         if (this_type > 0) type_wanted(this_type) = .true.
      enddo
     
      ! extract just the times into an array
@@ -945,6 +1006,8 @@
          if (is_this_last) exit 
 
          call get_obs_def(obs, selection_list(i))
+         this_type = get_obs_kind(selection_list(i))
+         if (this_type > 0) type_wanted(this_type) = .true.
 
          prev_obs = obs
          call get_next_obs(seq_in, prev_obs, obs, is_this_last)
@@ -1049,6 +1112,9 @@
        source, revision, revdate, text2=msgstring2) 
  endif
  
+ ! statistics for timing/debugging
+ num_good_called = num_good_called + 1
+
  ! the obs_def list is time-sorted now.  if you get to a larger
  ! time without a match you can bail early.
  good_selection = .false.
@@ -1061,7 +1127,10 @@
     test_obs_time = get_obs_def_time(selection_list(i))
 
     ! if past the possible times, return now.
-    if (base_obs_time > test_obs_time) return
+    if (base_obs_time > test_obs_time) then
+       num_good_searched = i - startindex + 1
+       return
+    endif
 
     if (base_obs_time /= test_obs_time) cycle
 
@@ -1072,11 +1141,13 @@
     if ( .not. horiz_location_equal(base_obs_loc, test_obs_loc)) cycle
 
     ! all match - good return.
+    num_good_searched = i - startindex + 1
     good_selection = .true.
     return
  enddo
 
  ! if you get here, no match, and return value is already false.
+num_good_searched = selection_count - startindex + 1
 
 end function good_selection
 
@@ -1126,6 +1197,18 @@
 
 end function get_time_from_obs
 
+!---------------------------------------------------------------------------
+function get_type_from_obs(this_obs)
+  type(obs_type), intent(in) :: this_obs
+  integer :: get_type_from_obs
+
+type(obs_def_type) :: this_obs_def
+
+call get_obs_def(this_obs, this_obs_def)
+get_type_from_obs = get_obs_kind(this_obs_def)
+
+end function get_type_from_obs
+
 !---------------------------------------------------------------------
 end program obs_selection
 


More information about the Dart-dev mailing list