[Dart-dev] [3219] DART/branches/nancy_work/obs_sequence: My test
branch: updated merge code should squeeze out unused observations
nancy at subversion.ucar.edu
nancy at subversion.ucar.edu
Thu Feb 7 17:04:53 MST 2008
An HTML attachment was scrubbed...
URL: http://mailman.ucar.edu/pipermail/dart-dev/attachments/20080207/36ca94db/attachment.html
-------------- next part --------------
Modified: DART/branches/nancy_work/obs_sequence/merge_obs_seq.f90
===================================================================
--- DART/branches/nancy_work/obs_sequence/merge_obs_seq.f90 2008-02-07 17:42:46 UTC (rev 3218)
+++ DART/branches/nancy_work/obs_sequence/merge_obs_seq.f90 2008-02-08 00:04:53 UTC (rev 3219)
@@ -23,7 +23,7 @@
insert_obs_in_seq, get_num_copies, get_num_qc, &
get_copy_meta_data, get_qc_meta_data, set_qc_meta_data, &
destroy_obs, destroy_obs_sequence, delete_seq_head, &
- delete_seq_tail
+ delete_seq_tail, get_num_key_range, set_copy_meta_data
implicit none
@@ -33,19 +33,16 @@
revision = "$Revision$", &
revdate = "$Date$"
-type(obs_sequence_type) :: seq1, seq2
+type(obs_sequence_type) :: seq_in, seq_out
type(obs_type) :: obs, prev_obs, next_obs, new_obs
logical :: is_there_one, is_this_last
-integer :: size_seq1, num_copies1, num_qc1
-integer :: size_seq2, num_copies2, num_qc2
-integer :: size_seq, num_copies, num_qc
-integer :: add_size_seq = 0
-integer :: add_copies = 0, add_qc = 0
-integer :: num_inserted, iunit, io, i, total_num_inserted
-integer :: max_num_obs, file_id
-character(len = 129) :: read_format
-logical :: pre_I_format
-logical :: all_gone
+integer :: size_seq_in, num_copies_in, num_qc_in
+integer :: size_seq_out, num_copies_out, num_qc_out
+integer :: num_inserted, iunit, io, i, j, total_num_inserted
+integer :: max_num_obs, file_id, first_seq_index, remaining_obs_count
+character(len = 129) :: read_format, meta_data
+logical :: pre_I_format, all_gone, first_seq
+logical :: trim_first, trim_last
character(len = 129) :: msgstring
!----------------------------------------------------------------
@@ -57,6 +54,7 @@
character(len = 129) :: filename_seq(max_num_input_files)
character(len = 129) :: filename_out = 'obs_seq.merged'
+logical :: process_file(max_num_input_files)
! Time of first and last observations to be used from obs_sequence
! If negative, these are not used
@@ -104,15 +102,40 @@
! Read header information for the sequences to see if we need
! to accomodate additional copies or qc values from subsequent sequences.
-! Also, calculate how many observation to be added to the first sequence.
-add_copies = 0
-add_qc = 0
-add_size_seq = 0
+! Also, calculate how many observations to be added to the first sequence.
+num_copies_out = 0
+num_qc_out = 0
+size_seq_out = 0
-! Note for future enhancement:
-! create an empty header in all cases and place obs in it. that sorts
-! them and allows us to trim the file if not all the obs are being used.
+! check to see if we are going to trim the sequence by time
+if(first_obs_seconds >= 0 .or. first_obs_days >= 0) then
+ first_obs_time = set_time(first_obs_seconds, first_obs_days)
+ trim_first = .true.
+else
+ trim_first = .false.
+endif
+if(last_obs_seconds >= 0 .or. last_obs_days >= 0) then
+ last_obs_time = set_time(last_obs_seconds, last_obs_days)
+ trim_last = .true.
+else
+ trim_last = .false.
+endif
+if (trim_first .and. trim_last) then
+ if (first_obs_time > last_obs_time) then
+ call error_handler(E_ERR,'merge_obs_seq', 'first time later than last time', &
+ source,revision,revdate)
+ endif
+endif
+! TWO PASS algorithm; open each file, trim it if requested, and count
+! the number of actual observations. then the output file can be
+! created with the correct size, and as observations are put into it
+! they'll be sorted, and unused obs will be removed.
+
+! pass 1:
+
+first_seq_index = -1
+first_seq = .true.
do i = 1, num_input_files
if ( len(filename_seq(i)) .eq. 0 .or. filename_seq(i) .eq. "" ) then
@@ -120,106 +143,108 @@
'num_input_files and filename_seq mismatch',source,revision,revdate)
endif
- if ( i .eq. 1) then
- ! for the header of the first seq
- call read_obs_seq_header(filename_seq(i), num_copies1, num_qc1, size_seq1, &
- max_num_obs, file_id, read_format, pre_I_format, close_the_file = .true.)
+ ! if some of the files are completely removed, you can ignore them.
+ call read_obs_seq_header(filename_seq(i), num_copies_in, num_qc_in, size_seq_in, &
+ max_num_obs, file_id, read_format, pre_I_format, close_the_file = .true.)
+ ! if not trimming observations, just read the headers because it is faster.
+ if (trim_first .or. trim_last) then
+ call read_obs_seq(filename_seq(i), 0, 0, 0, seq_in)
+ call trim_seq(seq_in, trim_first, first_obs_time, trim_last, last_obs_time, &
+ filename_seq(i), .true., remaining_obs_count)
+ call destroy_obs_sequence(seq_in)
+ if (remaining_obs_count == 0) then
+ process_file(i) = .false.
+ cycle
+ else
+ process_file(i) = .true.
+ endif
+ size_seq_in = remaining_obs_count
+ endif
+
+
+ if ( first_seq ) then
+ first_seq_index = i
+ first_seq = .false.
+ num_copies_out = num_copies_in
+ num_qc_out = num_qc_in
+ size_seq_out = size_seq_in
else
- call read_obs_seq_header(filename_seq(i), num_copies2, num_qc2, size_seq2, &
- max_num_obs, file_id, read_format, pre_I_format, close_the_file = .true.)
- add_size_seq = add_size_seq + size_seq2
- if (num_copies2 > num_copies1) then
- add_copies = num_copies2 - num_copies1
- num_copies1 = num_copies2
+ ! some simple error checking here; first, counts must match.
+ ! later we will verify that the metadata contents themselves match.
+ if (num_copies_out /= num_copies_in) then
+ write(msgstring,51) 'Number of data copies inconsistent, ', &
+ num_copies_in, ' does not equal ', num_copies_out
+ call error_handler(E_ERR,'merge_obs_seq',msgstring,source,revision,revdate)
endif
- if (num_qc2 > num_qc1) then
- add_qc = num_qc2 - num_qc1
- num_qc1 = num_qc2
+ if (num_qc_out /= num_qc_in) then
+ write(msgstring,51) 'Number of QC copies inconsistent, ', &
+ num_qc_in, ' does not equal ', num_qc_out
+ call error_handler(E_ERR,'merge_obs_seq',msgstring,source,revision,revdate)
endif
+ size_seq_out = size_seq_out + size_seq_in
endif
+51 format (A,I4,A,I4)
enddo
-! Read 1st obs seq, expand its size for insertion
-call read_obs_seq(filename_seq(1), add_copies, add_qc, add_size_seq, seq1)
-
-! Need to find first obs with appropriate time, delete all earlier ones
-if(first_obs_seconds >= 0 .or. first_obs_days >= 0) then
- first_obs_time = set_time(first_obs_seconds, first_obs_days)
- call delete_seq_head(first_obs_time, seq1, all_gone)
- if(all_gone) then
- msgstring = 'All obs in sequence are before first_obs_days:first_obs_seconds'
+! no valid obs found?
+if (first_seq_index == -1) then
+ msgstring = 'All obs in all files are empty or were deleted by the first/last times'
call error_handler(E_ERR,'merge_obs_seq',msgstring,source,revision,revdate)
- endif
endif
-! Also get rid of observations past the last_obs_time if requested
-if(last_obs_seconds >= 0 .or. last_obs_days >= 0) then
- last_obs_time = set_time(last_obs_seconds, last_obs_days)
- call delete_seq_tail(last_obs_time, seq1, all_gone)
- if(all_gone) then
- msgstring = 'All obs in sequence are after last_obs_days:last_obs_seconds'
- call error_handler(E_ERR,'merge_obs_seq',msgstring,source,revision,revdate)
- endif
-endif
+! pass 2:
-size_seq = get_num_obs(seq1) ! does not include size_seq2
-num_copies = get_num_copies(seq1) ! already includes add_copies
-num_qc = get_num_qc(seq1) ! already includes add_qc
-
! Initialize individual observation variables
-call init_obs( obs, num_copies, num_qc)
-call init_obs( new_obs, num_copies, num_qc)
-call init_obs(next_obs, num_copies, num_qc)
-call init_obs(prev_obs, num_copies, num_qc)
+call init_obs( obs, num_copies_out, num_qc_out)
+call init_obs( new_obs, num_copies_out, num_qc_out)
+call init_obs(next_obs, num_copies_out, num_qc_out)
+call init_obs(prev_obs, num_copies_out, num_qc_out)
total_num_inserted = 0
-! Read obs seq to be added, and insert obs from it to the first seq
-do i = 2, num_input_files
+! Read obs seq to be added, and insert obs from it to the output seq
+do i = 1, num_input_files
- call read_obs_seq(filename_seq(i), 0, 0, 0, seq2)
+ if (.not. process_file(i)) cycle
+
+ call read_obs_seq(filename_seq(i), 0, 0, 0, seq_in)
! Need to find first obs with appropriate time, delete all earlier ones
- if(first_obs_seconds >= 0 .or. first_obs_days >= 0) then
- first_obs_time = set_time(first_obs_seconds, first_obs_days)
- call delete_seq_head(first_obs_time, seq2, all_gone)
- if(all_gone) then
- msgstring = 'Skipping: all obs in ' // trim(filename_seq(i)) // &
- ' are before first_obs_days:first_obs_seconds'
- call error_handler(E_MSG,'merge_obs_seq',msgstring,source,revision,revdate)
- call destroy_obs_sequence(seq2)
- cycle ! skip this input file and do the next (if there are any)
- endif
+ call trim_seq(seq_in, trim_first, first_obs_time, trim_last, last_obs_time, &
+ filename_seq(i), .false., remaining_obs_count)
+ if(remaining_obs_count == 0) then
+ call destroy_obs_sequence(seq_in)
+ cycle ! skip this input file and do the next (if there are any)
endif
- ! Also get rid of observations past the last_obs_time if requested
- if(last_obs_seconds >= 0 .or. last_obs_days >= 0) then
- last_obs_time = set_time(last_obs_seconds, last_obs_days)
- call delete_seq_tail(last_obs_time, seq2, all_gone)
- if(all_gone) then
- msgstring = 'Skipping: all obs in ' // trim(filename_seq(i)) // &
- ' are after last_obs_days:last_obs_seconds'
- call error_handler(E_MSG,'merge_obs_seq',msgstring,source,revision,revdate)
- call destroy_obs_sequence(seq2)
- cycle ! skip this input file and do the next (if there are any)
- endif
+ ! create the output sequence here
+ if (i == first_seq_index) then
+ call init_obs_sequence(seq_out, num_copies_out, num_qc_out, size_seq_out)
+ do j=1, num_copies_out
+ meta_data = get_copy_meta_data(seq_in, j)
+ call set_copy_meta_data(seq_out, j, meta_data)
+ enddo
+ do j=1, num_qc_out
+ meta_data = get_qc_meta_data(seq_in, j)
+ call set_qc_meta_data(seq_out, j, meta_data)
+ enddo
endif
- size_seq1 = get_num_obs(seq1) !current size of seq1
- size_seq2 = get_num_obs(seq2) !current size of seq2
+ size_seq_out = get_num_key_range(seq_out) !current size of seq_out
+ size_seq_in = get_num_key_range(seq_in) !current size of seq_in
! Compare metadata between the observation sequences.
! If it is compatible, keep going, if not ... it will terminate here.
- call compare_metadata(seq1, seq2)
+ call compare_metadata(seq_out, seq_in)
call error_handler(E_MSG,'merge_obs_seq', &
'Good news - observation sequence files compatible ...', source,revision,revdate)
! Getting the time of the last observation in the first sequence to see
! if we can append instead of insert. Appending is MUCH faster.
- is_there_one = get_last_obs(seq1, obs)
- if (.not. is_there_one .and. size_seq1 >= 1) then
+ is_there_one = get_last_obs(seq_out, obs)
+ if (.not. is_there_one .and. size_seq_out >= 1) then
call error_handler(E_ERR,'merge_obs_seq', &
'BAD news - first obs_seq is neverending ...', source,revision,revdate)
endif
@@ -231,38 +256,38 @@
! Must pass a copy of incoming obs to insert_obs_in_seq.
!--------------------------------------------------------------
num_inserted = 0
- is_there_one = get_first_obs(seq2, obs)
+ is_there_one = get_first_obs(seq_in, obs)
if ( is_there_one ) then
- new_obs = obs ! obs records position in seq2
+ new_obs = obs ! obs records position in seq_out
- call insert_obs_in_seq(seq1, new_obs) ! new_obs linked list info changes
+ call insert_obs_in_seq(seq_out, new_obs) ! new_obs linked list info changes
- prev_obs = new_obs ! records new position in seq1
+ prev_obs = new_obs ! records new position in seq_in
num_inserted = num_inserted + 1
- call get_next_obs(seq2, obs, next_obs, is_this_last)
+ call get_next_obs(seq_in, obs, next_obs, is_this_last)
ObsLoop : do while ( .not. is_this_last)
if (mod(num_inserted,1000) == 0) then
- print*, 'inserted number ',num_inserted,' of ',size_seq2
+ print*, 'inserted number ',num_inserted,' of ',size_seq_in
endif
- obs = next_obs ! essentially records position in seq2
- new_obs = obs ! will be modified w/ position in seq1
+ obs = next_obs ! essentially records position in seq_out
+ new_obs = obs ! will be modified w/ position in seq_in
! Since the stride through the observation sequence file is always
! guaranteed to be in temporally-ascending order, we can use the
! 'previous' observation as the starting point to search for the
! correct insertion point.
- call insert_obs_in_seq(seq1, new_obs, prev_obs)
+ call insert_obs_in_seq(seq_out, new_obs, prev_obs)
- prev_obs = new_obs ! update position in seq1 for next insert
+ prev_obs = new_obs ! update position in seq_in for next insert
num_inserted = num_inserted + 1
- call get_next_obs(seq2, obs, next_obs, is_this_last)
+ call get_next_obs(seq_in, obs, next_obs, is_this_last)
enddo ObsLoop
@@ -274,25 +299,23 @@
endif
print*, '-------------- Obs seq file # : ', i
- print*, 'Number of obs in previous seq : ', size_seq1
- print*, 'Number of obs to be inserted : ', size_seq2
+ print*, 'Number of obs in previous seq : ', size_seq_out
+ print*, 'Number of obs to be inserted : ', size_seq_in
print*, 'Number of obs really inserted : ', num_inserted
print*, '---------------------------------------------------------'
- call destroy_obs_sequence(seq2)
+ call destroy_obs_sequence(seq_in)
enddo
-print*, 'Number of obs in first seq : ', size_seq
print*, 'Total number of obs inserted : ', total_num_inserted
-print*, 'Target number of obs in the new seq file :', size_seq + total_num_inserted
-print*, 'Actual number of obs in the new seq file :', get_num_obs(seq1)
+print*, 'Actual number of obs in the new seq file :', get_num_key_range(seq_out)
-call write_obs_seq(seq1, filename_out)
+call write_obs_seq(seq_out, filename_out)
! Time to clean up
-call destroy_obs_sequence(seq1)
+call destroy_obs_sequence(seq_out)
call destroy_obs( obs)
call destroy_obs( new_obs)
call destroy_obs(next_obs)
@@ -336,11 +359,9 @@
character(len=255) :: msgstring
num_qc1 = get_num_qc( seq1)
-size_seq1 = get_num_obs( seq1)
num_copies1 = get_num_copies(seq1)
num_qc2 = get_num_qc( seq2)
-size_seq2 = get_num_obs( seq2)
num_copies2 = get_num_copies(seq2)
num_copies = num_copies1
@@ -399,5 +420,48 @@
end subroutine compare_metadata
+! pass in an already opened sequence and a start/end time. this routine
+! really trims the observations out of the sequence, and returns a count
+! of how many remain.
+subroutine trim_seq(seq, trim_first, first_time, trim_last, last_time, seqfilename, &
+ print_msg, remaining_obs_count)
+ type(obs_sequence_type), intent(inout) :: seq
+ logical, intent(in) :: trim_first, trim_last
+ type(time_type), intent(in) :: first_time, last_time
+ character(len = *), intent(in) :: seqfilename
+ logical, intent(in) :: print_msg
+ integer, intent(out) :: remaining_obs_count
+ ! Need to find first obs with appropriate time, delete all earlier ones
+ if(trim_first) then
+ call delete_seq_head(first_time, seq, all_gone)
+ if(all_gone) then
+ if (print_msg) then
+ msgstring = 'Skipping: all obs in ' // trim(seqfilename) // &
+ ' are before first_obs_days:first_obs_seconds'
+ call error_handler(E_MSG,'merge_obs_seq',msgstring,source,revision,revdate)
+ endif
+ remaining_obs_count = 0
+ return
+ endif
+ endif
+
+ ! Also get rid of observations past the last_obs_time if requested
+ if(trim_last) then
+ call delete_seq_tail(last_time, seq, all_gone)
+ if(all_gone) then
+ if (print_msg) then
+ msgstring = 'Skipping: all obs in ' // trim(seqfilename) // &
+ ' are after last_obs_days:last_obs_seconds'
+ call error_handler(E_MSG,'merge_obs_seq',msgstring,source,revision,revdate)
+ endif
+ remaining_obs_count = 0
+ return
+ endif
+ endif
+
+ remaining_obs_count = get_num_key_range(seq)
+
+end subroutine trim_seq
+
end program merge_obs_seq
Modified: DART/branches/nancy_work/obs_sequence/obs_sequence_mod.f90
===================================================================
--- DART/branches/nancy_work/obs_sequence/obs_sequence_mod.f90 2008-02-07 17:42:46 UTC (rev 3218)
+++ DART/branches/nancy_work/obs_sequence/obs_sequence_mod.f90 2008-02-08 00:04:53 UTC (rev 3219)
@@ -52,7 +52,8 @@
delete_obs_from_seq, set_copy_meta_data, set_qc_meta_data, get_first_obs, &
get_last_obs, add_copies, add_qc, write_obs_seq, read_obs_seq, &
append_obs_to_seq, get_obs_from_key, get_obs_time_range, set_obs, get_time_range_keys, &
- get_num_times, static_init_obs_sequence, destroy_obs_sequence, read_obs_seq_header, &
+ get_num_times, get_num_key_range, &
+ static_init_obs_sequence, destroy_obs_sequence, read_obs_seq_header, &
get_expected_obs, delete_seq_head, delete_seq_tail, get_next_obs_from_key, get_prev_obs_from_key
! Public interfaces for obs
@@ -1364,7 +1365,7 @@
! Whole sequence is after
all_gone = .false.
else
- ! Whole sequence is before
+ ! Whole sequence is before; but sequence is not altered?
all_gone = .true.
endif
! Destroy temp storage and return
@@ -1372,6 +1373,15 @@
return
endif
+! compare num_keys with all possible keys in file; if equal, you have
+! also removed all obs and should return all_gone = .true.
+if (num_keys == get_num_key_range(seq)) then
+ all_gone = .true.
+ ! Destroy temp storage and return
+ call destroy_obs(obs)
+ return
+endif
+
! If here, then there are a set of observations that are not being used at beginning
! Delete them from the sequence
all_gone = .false.
@@ -1443,6 +1453,15 @@
return
endif
+! compare num_keys with all possible keys in file; if equal, you have
+! also removed all obs and should return all_gone = .true.
+if (num_keys == get_num_key_range(seq)) then
+ all_gone = .true.
+ ! Destroy temp storage and return
+ call destroy_obs(obs)
+ return
+endif
+
! If here, then there are a set of observations that are not being used at the end
! Delete them from the sequence
all_gone = .false.
@@ -1855,7 +1874,53 @@
end function get_num_times
+!---------------------------------------------------------
+function get_num_key_range(seq, key1, key2)
+
+! Returns number of observations between the two given keys
+
+type(obs_sequence_type), intent(in) :: seq
+integer, optional, intent(in) :: key1, key2
+integer :: get_num_key_range
+
+integer :: next, last
+
+
+if (present(key1)) then
+ if (key1 < seq%first_time .or. key1 > seq%last_time) then
+ write(msg_string, *) 'Bad value for key1, must be between ', &
+ seq%first_time, ' and ', seq%last_time
+ call error_handler(E_ERR, 'get_num_key_range', msg_string, &
+ source, revision, revdate)
+ endif
+ next = key1
+else
+ next = seq%first_time
+endif
+if (present(key2)) then
+ if (key2 < seq%first_time .or. key2 > seq%last_time) then
+ write(msg_string, *) 'Bad value for key2, must be between ', &
+ seq%first_time, ' and ', seq%last_time
+ call error_handler(E_ERR, 'get_num_key_range', msg_string, &
+ source, revision, revdate)
+ endif
+ last = key2
+else
+ last = seq%last_time
+endif
+
+! count them up
+get_num_key_range = 0
+do while (next /= -1)
+ get_num_key_range = get_num_key_range + 1
+ if (next == last) exit
+ next = seq%obs(next)%next_time
+end do
+
+end function get_num_key_range
+
+
!-------------------------------------------------
!subroutine get_cov_group
!-------------------------------------------------
Modified: DART/branches/nancy_work/obs_sequence/obs_sequence_mod.html
===================================================================
--- DART/branches/nancy_work/obs_sequence/obs_sequence_mod.html 2008-02-07 17:42:46 UTC (rev 3218)
+++ DART/branches/nancy_work/obs_sequence/obs_sequence_mod.html 2008-02-08 00:04:53 UTC (rev 3219)
@@ -1075,6 +1075,38 @@
</TABLE>
<BR>
+<!--============= DESCRIPTION OF A FUNCTION ========================-->
+ <A NAME="get_num_key_range"></A>
+ <P></P><HR><P></P>
+ <div class=routine>
+ <em class=call> var = get_num_key_range(seq, key1, key2) </em>
+ <pre>
+ integer :: <em class=code>get_num_key_range</em>
+ type(obs_sequence_type), intent(in) :: <em class=code>seq</em>
+ integer, optional, intent(in) :: <em class=code>key1, key2</em>
+ </pre></div>
+ <H3 class=indent1>Description</H3>
+ <P>
+Returns the number of observations between the two given keys.
+The default key numbers are the first and last in the sequence file.
+This routine can be used to count the actual number of observations
+in a sequence and will be accurate even if the sequence has been
+trimmed with delete_seq_head() or delete_seq_tail().
+ </P>
+ <TABLE width=100% border=0 summary="" celpadding=3>
+ <TR><TD valign=top><em class=code>var </em></TD>
+ <TD>Number of unique times for observations in a sequence</TD></TR>
+ <TR><TD valign=top><em class=code>seq </em></TD>
+ <TD>An observation sequence</TD></TR>
+ <TR><TD valign=top><em class=code>key1 </em></TD>
+ <TD>The starting key number. Defaults to the first observation
+ in the sequence.</TD></TR>
+ <TR><TD valign=top><em class=code>key2 </em></TD>
+ <TD>The ending key number. Defaults to the last observation
+ in the sequence.</TD></TR>
+ </TABLE>
+ <BR>
+
<!--============= DESCRIPTION OF A SUBROUTINE =======================-->
<A NAME="static_init_obs_sequence"></A>
<P></P><HR><P></P>
More information about the Dart-dev
mailing list