[Dart-dev] [3335]
DART/branches/nancy_work/obs_sequence/merge_obs_seq.f90:
My test branch only.
nancy at subversion.ucar.edu
nancy at subversion.ucar.edu
Wed May 14 09:23:44 MDT 2008
An HTML attachment was scrubbed...
URL: http://mailman.ucar.edu/pipermail/dart-dev/attachments/20080514/b6766da6/attachment-0001.html
-------------- next part --------------
Added: DART/branches/nancy_work/obs_sequence/merge_obs_seq.f90
===================================================================
--- DART/branches/nancy_work/obs_sequence/merge_obs_seq.f90 (rev 0)
+++ DART/branches/nancy_work/obs_sequence/merge_obs_seq.f90 2008-05-14 15:23:44 UTC (rev 3335)
@@ -0,0 +1,498 @@
+! Data Assimilation Research Testbed -- DART
+! Copyright 2004-2007, Data Assimilation Research Section
+! University Corporation for Atmospheric Research
+! Licensed under the GPL -- www.gpl.org/licenses/gpl.html
+
+!! NOTE: this version has the start of restricting the obs types
+!! by a list of specific type names. it has the namelist section
+!! set up, but the counting routines only count all types so the
+!! two-pass part which figures out the exact size and allocates
+!! only that amount of space is going to be wrong (too large; which
+!! is not a fatal error, but means if only a few obs are going to
+!! be extracted from a big file, it is going to waste a lot of memory
+!! when it is read in). the code to actually select on obs type
+!! is not there; it affects which obs is the 'first' to be inserted.
+!! the loop to copy over the 'next obs' seems easier; if restricting
+!! just skip ones of the wrong types. needs a type matching subroutine
+!! for clean code, i think.
+
+program merge_obs_seq
+
+! <next few lines under version control, do not edit>
+! $URL$
+! $Id: merge_obs_seq.f90 3325 2008-04-30 20:17:26Z nancy $
+! $Revision$
+! $Date: 2008-04-30 14:17:26 -0600 (Wed, 30 Apr 2008) $
+
+use types_mod, only : r8
+use utilities_mod, only : timestamp, register_module, initialize_utilities, &
+ find_namelist_in_file, check_namelist_read, &
+ error_handler, E_ERR, E_MSG, nmlfileunit
+use time_manager_mod, only : time_type, operator(>), print_time, set_time
+use obs_sequence_mod, only : obs_sequence_type, obs_type, write_obs_seq, &
+ init_obs, init_obs_sequence, static_init_obs_sequence, &
+ read_obs_seq_header, read_obs_seq, assignment(=), &
+ get_num_obs, get_first_obs, get_last_obs, get_next_obs, &
+ insert_obs_in_seq, get_num_copies, get_num_qc, &
+ get_copy_meta_data, get_qc_meta_data, set_qc_meta_data, &
+ destroy_obs, destroy_obs_sequence, delete_seq_head, &
+ delete_seq_tail, get_num_key_range, set_copy_meta_data
+
+implicit none
+
+! <next few lines under version control, do not edit>
+character(len=128), parameter :: &
+ source = "$URL$", &
+ revision = "$Revision$", &
+ revdate = "$Date: 2008-04-30 14:17:26 -0600 (Wed, 30 Apr 2008) $"
+
+type(obs_sequence_type) :: seq_in, seq_out
+type(obs_type) :: obs, prev_obs, next_obs, new_obs
+logical :: is_there_one, is_this_last
+integer :: size_seq_in, num_copies_in, num_qc_in
+integer :: size_seq_out, num_copies_out, num_qc_out
+integer :: num_inserted, iunit, io, i, j, total_num_inserted
+integer :: max_num_obs, file_id, remaining_obs_count
+integer :: first_seq
+character(len = 129) :: read_format, meta_data
+logical :: pre_I_format, all_gone
+logical :: trim_first, trim_last
+character(len = 129) :: msgstring
+
+!----------------------------------------------------------------
+! Namelist input with default values
+
+! max_num_input_files : maximum number of input sequence files to be merged
+integer :: max_num_input_files, num_input_files
+parameter( max_num_input_files = 50)
+
+! lazy, pick a big number
+integer, parameter :: max_obs_input_types = 5000
+character(len = 32) :: obs_input_types(max_obs_input_types)
+logical :: restrict_by_obs_type
+integer :: num_obs_input_types
+
+character(len = 129) :: filename_seq(max_num_input_files)
+character(len = 129) :: filename_out = 'obs_seq.merged'
+logical :: process_file(max_num_input_files)
+
+! Time of first and last observations to be used from obs_sequence
+! If negative, these are not used
+integer :: first_obs_days = -1
+integer :: first_obs_seconds = -1
+integer :: last_obs_days = -1
+integer :: last_obs_seconds = -1
+type(time_type) :: first_obs_time, last_obs_time
+
+namelist /merge_obs_seq_nml/ num_input_files, filename_seq, filename_out, &
+ first_obs_days, first_obs_seconds, last_obs_days, last_obs_seconds, &
+ obs_input_types
+
+!----------------------------------------------------------------
+! Start of the routine.
+! This routine basically opens the second observation sequence file
+! and 'inserts' itself into the first observation sequence file.
+! Each observation in the second file is independently inserted
+! or appended into the first file.
+! Inserting takes time, appending is much faster when possible.
+!----------------------------------------------------------------
+
+call merge_obs_seq_modules_used()
+
+! Initialize input obs_seq filenames and obs_types
+do i = 1, max_num_input_files
+ filename_seq(i) = ""
+enddo
+do i = 1, max_obs_input_types
+ obs_input_types(i) = ""
+enddo
+
+! Read the namelist entry
+call find_namelist_in_file("input.nml", "merge_obs_seq_nml", iunit)
+read(iunit, nml = merge_obs_seq_nml, iostat = io)
+call check_namelist_read(iunit, io, "merge_obs_seq_nml")
+
+if(num_input_files .gt. max_num_input_files) then
+ call error_handler(E_ERR,'merge_obs_seq', &
+ 'num_input_files > max_num_input_files. change max_num_input_files in source file', &
+ source,revision,revdate)
+endif
+
+! Record the namelist values used for the run ...
+write(nmlfileunit, nml=merge_obs_seq_nml)
+write( * , nml=merge_obs_seq_nml)
+
+! See if the user is restricting the obs types to be merged, and set up
+! the values if so.
+num_obs_input_types = 0
+do i = 1, max_obs_input_types
+ if ( len(obs_input_types(i)) .eq. 0 .or. obs_input_types(i) .eq. "" ) exit
+ num_obs_input_types = i
+enddo
+if (num_obs_input_types == 0) then
+ restrict_by_obs_type = .false.
+else
+ restrict_by_obs_type = .true.
+endif
+
+! Read header information for the sequences to see if we need
+! to accomodate additional copies or qc values from subsequent sequences.
+! Also, calculate how many observations to be added to the first sequence.
+num_copies_out = 0
+num_qc_out = 0
+size_seq_out = 0
+
+! check to see if we are going to trim the sequence by time
+if(first_obs_seconds >= 0 .or. first_obs_days >= 0) then
+ first_obs_time = set_time(first_obs_seconds, first_obs_days)
+ trim_first = .true.
+else
+ trim_first = .false.
+endif
+if(last_obs_seconds >= 0 .or. last_obs_days >= 0) then
+ last_obs_time = set_time(last_obs_seconds, last_obs_days)
+ trim_last = .true.
+else
+ trim_last = .false.
+endif
+if (trim_first .and. trim_last) then
+ if (first_obs_time > last_obs_time) then
+ call error_handler(E_ERR,'merge_obs_seq', 'first time cannot be later than last time', &
+ source,revision,revdate)
+ endif
+endif
+
+! TWO PASS algorithm; open each file, trim it if requested, and count
+! the number of actual observations. then the output file can be
+! created with the correct size, and as observations are put into it
+! they'll be sorted, and unused obs will be removed.
+
+! pass 1:
+
+first_seq = -1
+do i = 1, num_input_files
+
+ if ( len(filename_seq(i)) .eq. 0 .or. filename_seq(i) .eq. "" ) then
+ call error_handler(E_ERR,'merge_obs_seq', &
+ 'num_input_files and filename_seq mismatch',source,revision,revdate)
+ endif
+
+ ! count up the number of observations we are going to eventually have.
+ ! if all the observations in a file are not part of the linked list, the
+ ! output number of observations might be much smaller than the total size in
+ ! the header. it is slower, but go ahead and read in the entire sequence
+ ! and count up the real number of obs - trim_seq will do the count even if
+ ! it is not trimming in time. this allows us to create an empty obs_seq
+ ! output file of exactly the right size.
+
+ call read_obs_seq_header(filename_seq(i), num_copies_in, num_qc_in, size_seq_in, &
+ max_num_obs, file_id, read_format, pre_I_format, close_the_file = .true.)
+
+ call read_obs_seq(filename_seq(i), 0, 0, 0, seq_in)
+ call trim_seq(seq_in, trim_first, first_obs_time, trim_last, last_obs_time, &
+ filename_seq(i), .true., remaining_obs_count)
+ call destroy_obs_sequence(seq_in)
+ if (remaining_obs_count == 0) then
+ process_file(i) = .false.
+ cycle
+ else
+ process_file(i) = .true.
+ size_seq_in = remaining_obs_count
+ endif
+
+ if ( first_seq < 0 ) then
+ first_seq = i
+ num_copies_out = num_copies_in
+ num_qc_out = num_qc_in
+ size_seq_out = size_seq_in
+ else
+ size_seq_out = size_seq_out + size_seq_in
+ endif
+
+enddo
+
+! no valid obs found? if the index value is still negative, we are
+! still waiting to process the first one and never found one.
+if (first_seq < 0 .or. size_seq_out == 0) then
+ msgstring = 'All input files are empty or all obs outside the first/last times'
+ call error_handler(E_ERR,'merge_obs_seq',msgstring,source,revision,revdate)
+endif
+
+! pass 2:
+
+! Initialize individual observation variables
+call init_obs( obs, num_copies_out, num_qc_out)
+call init_obs( new_obs, num_copies_out, num_qc_out)
+call init_obs(next_obs, num_copies_out, num_qc_out)
+call init_obs(prev_obs, num_copies_out, num_qc_out)
+
+total_num_inserted = 0
+
+! Read obs seq to be added, and insert obs from it to the output seq
+first_seq = -1
+do i = 1, num_input_files
+
+ if (.not. process_file(i)) cycle
+
+ write(msgstring,*) 'Starting to process input sequence file ', trim(filename_seq(i))
+ call error_handler(E_MSG,'merge_obs_seq',msgstring,source,revision,revdate)
+
+ call read_obs_seq(filename_seq(i), 0, 0, 0, seq_in)
+
+ ! If you get here, there better be observations in this file which
+ ! are going to be used (the process_file flag wouldn't be set otherwise.)
+ call trim_seq(seq_in, trim_first, first_obs_time, trim_last, last_obs_time, &
+ filename_seq(i), .false., remaining_obs_count)
+
+ ! This would be an error at this point.
+ if(remaining_obs_count == 0) then
+ call destroy_obs_sequence(seq_in)
+ write(msgstring, *) 'Internal error trying to process file ', trim(filename_seq(i))
+ call error_handler(E_ERR,'merge_obs_seq',msgstring,source,revision,revdate)
+ endif
+
+ ! create the output sequence here based on the first input file
+ if (first_seq < 0) then
+ call init_obs_sequence(seq_out, num_copies_out, num_qc_out, size_seq_out)
+ do j=1, num_copies_out
+ meta_data = get_copy_meta_data(seq_in, j)
+ call set_copy_meta_data(seq_out, j, meta_data)
+ enddo
+ do j=1, num_qc_out
+ meta_data = get_qc_meta_data(seq_in, j)
+ call set_qc_meta_data(seq_out, j, meta_data)
+ enddo
+ first_seq = i
+ else
+ ! we have an existing output sequence already. make sure the next one
+ ! is completely compatible.
+
+ ! Compare metadata between the observation sequences.
+ ! This routine exits if they do not match.
+ call compare_metadata(seq_out, seq_in, filename_seq(first_seq), filename_seq(i))
+ endif
+
+ size_seq_out = get_num_key_range(seq_out) !current size of seq_out
+ size_seq_in = get_num_key_range(seq_in) !current size of seq_in
+
+ !-------------------------------------------------------------
+ ! Start to insert obs from sequence_in into sequence_out
+ !
+ ! NOTE: insert_obs_in_seq CHANGES the obs passed in.
+ ! Must pass a copy of incoming obs to insert_obs_in_seq.
+ !--------------------------------------------------------------
+ num_inserted = 0
+ is_there_one = get_first_obs(seq_in, obs)
+
+ if ( is_there_one ) then
+
+ new_obs = obs ! obs records position in seq_out
+
+ call insert_obs_in_seq(seq_out, new_obs) ! new_obs linked list info changes
+
+ prev_obs = new_obs ! records new position in seq_in
+ num_inserted = num_inserted + 1
+
+ call get_next_obs(seq_in, obs, next_obs, is_this_last)
+ ObsLoop : do while ( .not. is_this_last)
+
+ if (mod(num_inserted,1000) == 0) then
+ print*, 'inserted number ',num_inserted,' of ',size_seq_in
+ endif
+
+ obs = next_obs ! essentially records position in seq_out
+ new_obs = obs ! will be modified w/ position in seq_in
+
+ ! Since the stride through the observation sequence file is always
+ ! guaranteed to be in temporally-ascending order, we can use the
+ ! 'previous' observation as the starting point to search for the
+ ! correct insertion point. This speeds up the insert code a lot.
+
+ call insert_obs_in_seq(seq_out, new_obs, prev_obs)
+
+ prev_obs = new_obs ! update position in seq_in for next insert
+ num_inserted = num_inserted + 1
+
+ call get_next_obs(seq_in, obs, next_obs, is_this_last)
+
+ enddo ObsLoop
+
+ total_num_inserted = total_num_inserted + num_inserted
+
+ else
+ write(msgstring,*)'no first observation in ',trim(adjustl(filename_seq(i)))
+ call error_handler(E_MSG,'merge_obs_seq', msgstring, source, revision, revdate)
+ endif
+
+ print*, '-------------- Obs seq file # : ', i
+ print*, 'Number of obs in previous seq : ', size_seq_out
+ print*, 'Number of obs to be inserted : ', size_seq_in
+ print*, 'Number of obs really inserted : ', num_inserted
+ print*, '---------------------------------------------------------'
+
+ call destroy_obs_sequence(seq_in)
+
+enddo
+
+print*, 'Total number of obs inserted : ', total_num_inserted
+print*, 'Actual number of obs in the new seq file :', get_num_key_range(seq_out)
+
+call write_obs_seq(seq_out, filename_out)
+
+! Time to clean up
+
+call destroy_obs_sequence(seq_out)
+call destroy_obs( obs)
+call destroy_obs( new_obs)
+call destroy_obs(next_obs)
+call destroy_obs(prev_obs)
+
+call timestamp(source,revision,revdate,'end')
+
+contains
+
+
+!=====================================================================
+
+
+subroutine merge_obs_seq_modules_used()
+
+! Initialize modules used that require it
+call initialize_utilities('merge_obs_seq')
+call register_module(source,revision,revdate)
+call static_init_obs_sequence()
+
+end subroutine merge_obs_seq_modules_used
+
+
+ subroutine compare_metadata(seq1, seq2, fname1, fname2)
+!
+! This subroutine compares the metadata for two different observation
+! sequences and terminates the program if they are not conformable.
+! In order to be merged, the two observation sequences must have the same
+! number of qc values, the same number of copies ...
+!
+! The messages might be a bit misleading 'warning', 'warning', 'error' ...
+!
+
+ type(obs_sequence_type), intent(IN) :: seq1, seq2
+ character(len=*), optional :: fname1, fname2
+
+integer :: num_copies1, num_qc1
+integer :: num_copies2, num_qc2
+integer :: num_copies , num_qc, i
+character(len=129) :: str1, str2
+character(len=255) :: msgstring1, msgstring2
+
+num_qc1 = get_num_qc( seq1)
+num_copies1 = get_num_copies(seq1)
+
+num_qc2 = get_num_qc( seq2)
+num_copies2 = get_num_copies(seq2)
+
+num_copies = num_copies1
+num_qc = num_qc1
+
+! get this ready in case we have to use it
+if (present(fname1) .and. present(fname2)) then
+ write(msgstring1,*)'Sequence files ', trim(fname1), ' and ', trim(fname2), &
+ ' are not compatible'
+else
+ msgstring1 = 'Sequence files cannot be merged because they are not compatible'
+endif
+
+if ( num_copies1 /= num_copies2 ) then
+ write(msgstring2,*)'Different numbers of data copies found: ', &
+ num_copies1, ' vs ', num_copies2
+ call error_handler(E_MSG, 'merge_obs_seq', msgstring2, source, revision, revdate)
+ num_copies = -1
+endif
+if ( num_qc1 /= num_qc2 ) then
+ write(msgstring2,*)'Different different numbers of QCs found: ', &
+ num_qc1, ' vs ', num_qc2
+ call error_handler(E_MSG, 'merge_obs_seq', msgstring2, source, revision, revdate)
+ num_qc = -1
+endif
+if ( num_copies < 0 .or. num_qc < 0 ) then
+ call error_handler(E_ERR, 'merge_obs_seq', msgstring1, source, revision, revdate)
+endif
+
+MetaDataLoop : do i=1, num_copies
+ str1 = trim(adjustl(get_copy_meta_data(seq1,i)))
+ str2 = trim(adjustl(get_copy_meta_data(seq2,i)))
+
+ if( str1 == str2 ) then
+ write(msgstring2,*)'metadata ',trim(adjustl(str1)), ' in both.'
+ call error_handler(E_MSG, 'merge_obs_seq', msgstring2, source, revision, revdate)
+ else
+ write(msgstring2,*)'metadata value mismatch. seq1: ', trim(adjustl(str1))
+ call error_handler(E_MSG, 'merge_obs_seq', msgstring2, source, revision, revdate)
+ write(msgstring2,*)'metadata value mismatch. seq2: ', trim(adjustl(str2))
+ call error_handler(E_MSG, 'merge_obs_seq', msgstring2, source, revision, revdate)
+ call error_handler(E_ERR, 'merge_obs_seq', msgstring1, source, revision, revdate)
+ endif
+enddo MetaDataLoop
+
+QCMetaData : do i=1, num_qc
+ str1 = trim(adjustl(get_qc_meta_data(seq1,i)))
+ str2 = trim(adjustl(get_qc_meta_data(seq2,i)))
+
+ if( str1 == str2 ) then
+ write(msgstring2,*)'qc metadata ', trim(adjustl(str1)), ' in both.'
+ call error_handler(E_MSG, 'merge_obs_seq', msgstring2, source, revision, revdate)
+ else
+ write(msgstring2,*)'qc metadata value mismatch. seq1: ', trim(adjustl(str1))
+ call error_handler(E_MSG, 'merge_obs_seq', msgstring2, source, revision, revdate)
+ write(msgstring2,*)'qc metadata value mismatch. seq2: ', trim(adjustl(str2))
+ call error_handler(E_MSG, 'merge_obs_seq', msgstring2, source, revision, revdate)
+ call error_handler(E_ERR, 'merge_obs_seq', msgstring1, source, revision, revdate)
+ endif
+enddo QCMetaData
+
+end subroutine compare_metadata
+
+! pass in an already opened sequence and a start/end time. this routine
+! really trims the observations out of the sequence, and returns a count
+! of how many remain.
+subroutine trim_seq(seq, trim_first, first_time, trim_last, last_time, seqfilename, &
+ print_msg, remaining_obs_count)
+ type(obs_sequence_type), intent(inout) :: seq
+ logical, intent(in) :: trim_first, trim_last
+ type(time_type), intent(in) :: first_time, last_time
+ character(len = *), intent(in) :: seqfilename
+ logical, intent(in) :: print_msg
+ integer, intent(out) :: remaining_obs_count
+
+ ! Need to find first obs with appropriate time, delete all earlier ones
+ if(trim_first) then
+ call delete_seq_head(first_time, seq, all_gone)
+ if(all_gone) then
+ if (print_msg) then
+ msgstring = 'Skipping: all obs in ' // trim(seqfilename) // &
+ ' are before first_obs_days:first_obs_seconds'
+ call error_handler(E_MSG,'merge_obs_seq',msgstring,source,revision,revdate)
+ endif
+ remaining_obs_count = 0
+ return
+ endif
+ endif
+
+ ! Also get rid of observations past the last_obs_time if requested
+ if(trim_last) then
+ call delete_seq_tail(last_time, seq, all_gone)
+ if(all_gone) then
+ if (print_msg) then
+ msgstring = 'Skipping: all obs in ' // trim(seqfilename) // &
+ ' are after last_obs_days:last_obs_seconds'
+ call error_handler(E_MSG,'merge_obs_seq',msgstring,source,revision,revdate)
+ endif
+ remaining_obs_count = 0
+ return
+ endif
+ endif
+
+ remaining_obs_count = get_num_key_range(seq)
+
+end subroutine trim_seq
+
+end program merge_obs_seq
Property changes on: DART/branches/nancy_work/obs_sequence/merge_obs_seq.f90
___________________________________________________________________
Name: svn:keywords
+ "Date Rev Author URL Id"
More information about the Dart-dev
mailing list