;----------------------------------------------------------------------------- ; ; bil-to-nc.prism.daily.ncl -- Convert PRISM daily BIL format (flat binary ; day files) to concatenated Netcdf file. ; ; 2015-oct-01 bil-to-nc.prism.daily.ncl: ; Original version for precip (ppt). ; By Dave Allured, NOAA/OAR/ESRL/PSD/CIRES. ; Read BIL (flat binary) day files, output Netcdf-4 classic. ; Unzip daily archives on the fly. ; Adapted from era-update.ncl, month_aves.ncl, etc. ; 2015-dec-16 Reverse latitudes on the fly, output south to north. ; ; 2016-mar-17 Add support for other PRISM daily vars: tmax, tmean, tmin. ; Add var table. ; 2016-aug-02 Add support for new PRISM daily var: tdmean. ; ; 2019-nov-05 Update config strings for source version D2 data updates. ; program_id = "bil-to-nc.prism.daily.ncl version 2019-nov-05" ; ;------------------------------------------------------------------------------ ; ; Notes: ; ; This program concatenates the entire time series of PRISM daily ; source files in one run, and makes a single concatenated Netcdf ; file. Append and update modes are not yet supported. ; ; This version auto-configures for all available dates in the ; local PRISM source archive. However, the archive must be ; consistent. Any missing dates in the middle will cause an ; intentional fatal error. ; ; For PRISM daily, the source archive may contain multiple file ; versions for some dates. The stability version for each day is ; selected in the order of preference in "stability_list" below. ; ; This version uses the CDL template method and ncgen for fine ; control of Netcdf extended attributes. ; ;----------------------------------------------------------------------------- ; ; Usage: ; ; 1. Go to the desired output directory. ; ; 2. Ensure that the input file template below is consistent ; with a directory or sym link to access the source data files. ; ; 3. Ensure there is no previous output file by the same name. ; ; 4. Run this program with NCL. Specify the variable name on ; the command line. Capture a log file for debugging, if needed. ; ; ncl bil-to-nc.prism.daily.ncl var=\"VARNAME\" ; ; VARNAME may be ppt, precip, tdmean, tmax, tmean, or tmin. ; ppt and precip both select the same precip variable ; ; This program will find and process all avaliable dates in the ; source archive for the selected variable. ; ;----------------------------------------------------------------------------- load "$NCARG_ROOT/lib/ncarg/nclscripts/contrib/ut_string.ncl" begin print ("") print (systemfunc ("date") + ": Start.") print ("Program = " + program_id) ;---------------------------------------------------- ; Run parameters. ;---------------------------------------------------- dquote = str_get_dq () ; program parameters newline = str_get_nl () nl = newline ; Templates for PRISM input paths and files. ; Infile_template will be used for BOTH the archive and data file names. indir_template = "download/INVAR/YYYY" infile_template = "PRISM_INVAR_STABILITY_XVERSION_YYYYMMDD_bil.EXT" infile_wildcard = "PRISM_INVAR_*.zip" ext_archive = "zip" ext_data = "bil" date_field_num = 5 ; date is in 5th position between underscores ; List stability versions in INCREASING order of preference. stability_list = "early provisional stable" ; INCREASING preference order! ; Output parameters. outfile_template = "OUTVAR.prism.daily.1981-present.nc" ; Descriptive metadata. Ref. PRISM metadata files and website. translated_by = "Dave Allured, NOAA/OAR/ESRL/PSD/CIRES" source_institution = "PRISM Climate Group at Oregon State University" source_documentation = "See PRISM website for source documentation," + nl \ + "data sources, citation, acknowledgements," \ + " and terms of use." terms_of_use = "http://www.prism.oregonstate.edu/documents/" \ + "PRISM_terms_of_use.pdf" source_website = "http://www.prism.oregonstate.edu" source_download_url = "ftp://prism.nacse.org/daily/INVAR/" conversion = "Convert from PRISM BIL (flat binary) source files," \ + newline + " to Netcdf-4 classic." + newline \ + "Reverse latitudes, output south to north." ; PRISM header file parameters. ; Copied near verbatim from PRISM_ppt_stable_4kmD2_20120101_bil.hdr ; (captured 2015-sep-22). ; Assume these parameters are identical for all variables. ; Note "d" added to coordinate parameters, to preserve full precision. ; BYTEORDER = I ; LAYOUT = BIL NROWS = 621 NCOLS = 1405 ; NBANDS = 1 ; NBITS = 32 ; BANDROWBYTES = 5620 ; TOTALROWBYTES = 5620 PIXELTYPE = "FLOAT" ULXMAP = -125d ULYMAP = 49.9166666666687d XDIM = 0.04166666666667d YDIM = 0.04166666666667d NODATA = -9999 ; Copy to conventional var names. nlats = NROWS nlons = NCOLS var_type = str_lower (PIXELTYPE) vmiss = totype (NODATA, var_type) ; Netcdf output parameters. out_format = "NetCDF4Classic" compression_level = 5 ; level 5 found to be good tradeoff time_chunk_size = 8000 ; chunk size for time coordinate var time_units = "days since 1980-1-1 0:0:0" calendar = "gregorian" time_type = "float" time_step = 1 ; daily increment for "days since" time_step_str = "daily" ;---------------------------------------------------- ; Table of supported variables. ;---------------------------------------------------- invar = var ; change usage name of command line parameter delete (var) ; prevent abuse if (invar .eq. "precip") then ; accomodate both synonyms invar = "ppt" end if ; Table using if blocks, to handle long strings. if (invar .eq. "ppt") then outvar = "precip" long_name = "Precipitation" var_units = "millimeters" dataset_title = "PRISM U.S. Daily Total Precipitation" dataset_version = "AN81/ppt/D2" filename_version = "4kmD2" end if if (invar .eq. "tdmean") then outvar = invar long_name = "Mean Dewpoint Temperature" var_units = "degrees C" dataset_title = "PRISM U.S. Daily Mean Dewpoint Temperature" dataset_version = "AN81/tdmean/D2" filename_version = "4kmD2" end if if (invar .eq. "tmax") then outvar = invar long_name = "Maximum Temperature" var_units = "degrees C" dataset_title = "PRISM U.S. Daily Maximum Temperature" dataset_version = "AN81/tmax/D2" filename_version = "4kmD2" end if if (invar .eq. "tmean") then outvar = invar long_name = "Mean Temperature" var_units = "degrees C" dataset_title = "PRISM U.S. Daily Mean Temperature" dataset_version = "AN81/tmean/D2" filename_version = "4kmD2" end if if (invar .eq. "tmin") then outvar = invar long_name = "Minimum Temperature" var_units = "degrees C" dataset_title = "PRISM U.S. Daily Minimum Temperature" dataset_version = "AN81/tmin/D2" filename_version = "4kmD2" end if ; Diagnostics. if (.not. isdefined ("outvar")) then print ("*** Abort. Unknown var name on command line.") print ("*** Var name = '" + var + "'.") status_exit (1) end if print ("") print ("Input var name = " + invar) print ("Output var name = " + outvar) ;---------------------------------------------------- ; Determine first and last dates to concatenate. ;---------------------------------------------------- print ("") print ("Find first and last dates to concatenate.") ; It is most efficient to use Unix commands to extract the first ; and last dates in the source archives. ; Caution, some file name parsing is hard coded here. The date string ; is extracted from a particular field separated by underscores. ; First resolve the related search templates. search_dirs = str_sub_str (indir_template, "INVAR", invar) search_dirs = str_sub_str (search_dirs, "YYYY", "????") infile_wildcard2 = str_sub_str (infile_wildcard, "INVAR", invar) print (" Input path = " + indir_template + " (template)") print (" Input path = " + search_dirs + " (wildcard)") print (" Filename wildcard = " + infile_wildcard2) ; Now run the actual directory scans. Get all file names for ; this variable, then extract just the dates. ; It is essential to include the sort command, because find ; returns unsorted lists. find_cmd = "find " + search_dirs + " -name '" + infile_wildcard2 + "'" parse_dates = "cut -f" + date_field_num + " -d_ | sort " all_dates = systemfunc (find_cmd + " | " + parse_dates + " | sort") ; YYYYMMDD nfiles = num (.not. (ismissing (all_dates))) print (" Number of files in this archive = " + nfiles) ; Check for expected valid file names. invalid = (strlen (all_dates) .ne. 8) if (any (invalid)) then fnum = 1 + min (ind (invalid)) print ("*** Abort. Invalid date strings in source file names.") print ("*** First invalid date string = '" + all_dates(fnum-1) + "'") print ("*** Position in list of files = " + fnum) print ("*** Check files and path specs, and archive directories.") status_exit (1) end if ; Get the very first and last dates. first_date = all_dates(0) ; YYYYMMDD last_date = all_dates(nfiles-1) ; YYYYMMDD first_last = (/ first_date, last_date /) ; YYYYMMDD print (" First and last dates in this archive = " + first_date \ + ", " + last_date) ;---------------------------------------------------- ; Create an empty output file from a CDL template. ;---------------------------------------------------- ; The CDL template is used to create simple storage modes that ; are not yet supported by NCL. This avoids redundant or ; confusing storage attributes. ; ; Only the core file structure is included in this template. ; Detail attributes and data will be added later, after ncgen. ; ; This is a CDL format text string for input to ncgen. The syntax ; is the same as a regular CDL text file, with these variations: ; ; 1. Use single quotes in place of double quotes, due to NCL ; quoting syntax. The single quotes will be converted back to ; double quotes, before CDL input to ncgen. ; ; 2. Real single quotes in the final CDL are prohibited. ; ; 3. As in regular CDL files, all external whitespace is ; equivalent. Therefore: ; ; 3a. Newlines are gratuitous, and may be omitted. ; ; 3b. Convert leading tabs to spaces, for readability. ; ; 4. The template must be a single long string, with all lines ; concatenated. ; ; 5. Omit all CDL comments. ; ; 6. Use special symbols for dynamic substitution. template = \ (/ "netcdf template { " \ + "dimensions: " \ + " time = UNLIMITED ; " \ + " lat = NLATS ; " \ + " lon = NLONS ; " \ + "variables: " \ + " float time(time) ; " \ + " time:_Storage = 'chunked' ; " \ + " time:_ChunkSizes = TIME_CHUNKSIZE ; " \ + " float lat(lat) ; " \ + " lat:_Storage = 'contiguous' ; " \ + " float lon(lon) ; " \ + " lon:_Storage = 'contiguous' ; " \ + " VTYPE OUTVAR(time, lat, lon) ; " \ + " OUTVAR:_FillValue = FILL_VALUE ; " \ + " OUTVAR:_Storage = 'chunked' ; " \ + " OUTVAR:_ChunkSizes = 1, NLATS, NLONS ; " \ + " OUTVAR:_DeflateLevel = DEFLATE_LEVEL ; " \ + " " \ + ":_Format = 'netCDF-4 classic model' ; " \ + "} " /) ; Convert the single quotes back to double quotes. template = str_sub_str (template, "'", dquote) ; Replace special symbols with actual values. fill_value_str = tostring (vmiss) template = str_sub_str (template, "NLATS", nlats+"") template = str_sub_str (template, "NLONS", nlons+"") template = str_sub_str (template, "OUTVAR", outvar) template = str_sub_str (template, "VTYPE", var_type) template = str_sub_str (template, "FILL_VALUE", fill_value_str) template = str_sub_str (template, "DEFLATE_LEVEL", compression_level+"") template = str_sub_str (template, "TIME_CHUNKSIZE", time_chunk_size+"") ; Make the output file name from its template. outfile = str_sub_str (outfile_template, "OUTVAR", outvar) ; Create an empty file with ncgen and the CDL template. print ("") print ("Create empty output file with ncgen and CDL template.") print (" Output file = " + outfile) ; Caution: Ncgen will stomp any previous file. ; This version does not provide overwrite protection. system ("echo '" + template + "' | ncgen -o " + outfile) ;---------------------------------------------------- ; Write metadata. ;---------------------------------------------------- print ("Write metadata.") ; Open the newly created file in write mode, for updating. out = addfile (outfile, "w") ; Add global attributes. clock_time = systemfunc ("date") url2 = str_sub_str (source_download_url, "INVAR", invar) out@Conventions = "CF-1.5" out@title = dataset_title out@dataset_version = dataset_version out@history = clock_time + ": " + get_script_name() \ + ":" + newline + conversion out@converter = program_id out@translated_by = translated_by out@source_institution = source_institution out@source_documentation = source_documentation out@terms_of_use = terms_of_use out@source_website = source_website out@source_download_url = url2 out@output_file_name = outfile ; Write time coordinate attributes. ; The actual time coordinates will be written incrementally. out->time@long_name = "time" out->time@units = time_units out->time@calendar = calendar out->time@time_step = time_step_str ; Write data variable attributes. ; Note, _FillValue is fundamental in Netcdf-4, was written by ncgen above. out->$outvar$@long_name = long_name out->$outvar$@units = var_units out->$outvar$@missing_value = vmiss ; preferred missing value att. out->$outvar$@dataset = dataset_title out->$outvar$@dataset_version = dataset_version ;---------------------------------------------------- ; Write lat and lon coordinates. ;---------------------------------------------------- print ("Write lat and lon coordinates.") ; Generate coordinate arrays from PRISM header parameters. ; Coordinates and data will be written in standard orientations ; South to North, and West to East. ; Accuracy controls: ; ; 1. Use only the lat/lon parameters from a PRISM header file (see above). ; These are much more precise than the parameters in other PRISM sources. ; ; 2. Use reciprocal multiplier to support exact fractional increments. ; ; 3. NCL's fspan is not guaranteed precision. Use integer step method ; with ispan, for guaranteed precision. ; Following is valid only when the increments are exact fractions of ; one degree. Custom for PRISM. opt_integer = 3 ; get exact integer divisors lats_per_degree = round (1.0 / YDIM, opt_integer) lons_per_degree = round (1.0 / XDIM, opt_integer) ; The reference point in PRISM is "UL", the upper left grid corner, ; which is the Northmost and Westmost grid cell. The reference point ; and the generated grid points are all grid centers. See PRISM docs. ; ; Latitudes are generated BACKWARD from ULYMAP at the reference corner, ; for the standard direction South to North. ; ; Longitudes are generated FORWARD from ULXMAP at the reference corner, ; for the standard direction West to East. dlats = ULYMAP - todouble (ispan (nlats-1, 0, 1)) / lats_per_degree dlons = ULXMAP + todouble (ispan (0, nlons-1, 1)) / lons_per_degree ; After precise calculation, convert doubles to floats for final output. lats = tofloat (dlats) lons = tofloat (dlons) ; Write coordinate variables to output file. ; Empty coordinate variables were previously created by the template, ; including named dimensions. out->lat = (/ lats /) out->lon = (/ lons /) out->lat@long_name = "latitude" ; write attributes out->lon@long_name = "longitude" out->lat@units = "degrees_north" out->lon@units = "degrees_east" out->lat@actual_range = (/ min (lats), max (lats) /) out->lon@actual_range = (/ min (lons), max (lons) /) print (" Min, max latitudes = " + min (lats) + ", " + max (lats)) print (" Min, max longitudes = " + min (lons) + ", " + max (lons)) print (" Nlats, nlons = " + nlats + ", " + nlons) ;------------------------------------------------- ; Generate CF time coordinates. ;------------------------------------------------- print ("") print ("Generate CF time coordinates.") print (" First and last dates = " + first_date + ", " + last_date) ; Convert first and last YYYYMMDD date strings to integers. years = stringtoint (str_get_cols (first_last, 0, 3)) months = stringtoint (str_get_cols (first_last, 4, 5)) days = stringtoint (str_get_cols (first_last, 6, 7)) ; Convert first and last dates to CF integer time coordinates. zeros = (/ 0, 0 /) ; dates plus time 00:00:00 hours = zeros minutes = zeros seconds = zeros opt_cal = 0 opt_cal@calendar = calendar time_range = ut_inv_calendar (years, months, days, hours, minutes, \ seconds, time_units, opt_cal) time1 = toint (time_range(0)) ; first time coordinate time2 = toint (time_range(1)) ; last time coordinate ; Generate complete CF time coordinate array in memory. ; Use robust integer math before converting to float. times = totype (ispan (time1, time2, time_step), time_type) ntimes = dimsizes (times) ; Finally, convert all times back to YYYYMMDD format, for generating ; input file names. times@units = time_units times@calendar = calendar yyyymmdd = ut_string (times, "%Y%N%D") yyyy = str_get_cols (yyyymmdd, 0, 3) ; also need substrings mmdd = str_get_cols (yyyymmdd, 4, 7) ; for paths, etc. print (" Number of days to concatenate = " + ntimes) ;------------------------------------------------------------- ; Set up for main loop. ;------------------------------------------------------------- ; Partially resolve the input templates. Do as much as possible ; outside the loop. NCL has problems with too many string assignments. ; Template examples (actual defs above): ; indir_template = "download/INVAR/YYYY" ; infile_template = "PRISM_INVAR_STABILITY_XVERSION_YYYYMMDD_bil.EXT" indir_template2 = str_sub_str (indir_template, "INVAR", invar) infile_template2 = str_sub_str (infile_template, "INVAR", invar) infile_template2 = str_sub_str (infile_template2, "XVERSION",filename_version) ; Prepare stability version strings. stability_strs = str_split (stability_list, " ") ; get individual strings nversions = dimsizes (stability_strs) earliest = new (nversions, string) earliest(:) = "None" ; default labels for source file details ;------------------------------------------------------------- ; Main loop. Read input files, copy grids to output file. ;------------------------------------------------------------- print ("") print (systemfunc ("date") + ": Start main loop.") print (" Read daily input files, append to output file.") ; Main loop over input files. do ti = 0, ntimes-1 test1 = (ti .ne. 0) .and. (mmdd(ti) .eq. "0101") ; progress display test2 = (ti .eq. ntimes-1) if (test1 .or. test2) then ; progress display print (systemfunc ("date") + " " + yyyymmdd(ti)) end if ; Find the preferred input file for the current date. ; Search for the different stability versions. ; Template examples, partially resolved, see loop setup above: ; indir_template2 = "download/invar/YYYY" ; infile_template2 = "PRISM_invar_STABILITY_xversion_YYYYMMDD_bil.EXT" base1 = str_sub_str (infile_template2, "YYYYMMDD", yyyymmdd(ti)) do si = nversions-1, 0, 1 ; preference is last (most stable) to first base2 = str_sub_str (base1, "STABILITY", stability_strs(si)) zipfile = indir_template2 + "/" \ + str_sub_str (base2, "EXT", ext_archive) zipfile = str_sub_str (zipfile, "YYYY", yyyy(ti)) ; this sub is needed ; to resolve the path found = fileexists (zipfile) ; if version file exists, exit loop if (found) then si_save = si break end if end do ; Check for archive file not found. if (.not. found) then print ("*** Missing input file. Abort.") print ("*** Date = " + yyyymmdd(ti)) print ("*** File = " + zipfile) print ("*** Did not find any of the " + nversions \ + " stability version file names.") status_exit (1) end if ; Diagnostics. infile = str_sub_str (base2, "EXT", ext_data) ; data file inside archive ; for exact filename match if (ti .eq. 0) then print (" First zip archive = " + zipfile) print (" First data file = " + infile) end if if (ti .eq. ntimes-1) then print (" Last zip archive = " + zipfile) print (" Last data file = " + infile) end if ; Save name of earliest file for each stability version. if (earliest(si_save) .eq. "None") then earliest(si_save) = infile ; the .bil file name is saved end if ; PRISM: Extract only the .bil data file (flat binary) ; from the zip archive for the current date. ; Unzip into the current directory, as a temp file. ; Use Bourne shell syntax with systemfunc. ; Option j = No path, extract to current directory. ; Option o = Always overwrite silently. ; Option q = Quiet mode, no normal messages. ; (Error messages will show directly on console, via stderr.) result = systemfunc ("unzip -joq " + zipfile + " " + infile \ + " ; echo Unzip exit status $?") ; Check unzip exit status. if (dimsizes (result) .ne. 1 .or. result .ne. "Unzip exit status 0") then print ("") print ("*** Date = " + yyyymmdd(ti)) print ("*** Current zip archive = " + zipfile) print ("*** Expected data file = " + infile) print ("*** " + result) print ("*** Error from unzip. Abort.") status_exit (1) end if ; Check for expected data file from archive. if (.not. fileexists (infile)) then print ("") print ("*** Date = " + yyyymmdd(ti)) print ("*** Current zip archive = " + zipfile) print ("*** Expected data file = " + infile) print ("*** " + result) print ("*** Expected data file is missing from current directory. Abort.") status_exit (1) end if ; Save history details. ; Behold horrid kludge to get full dates with platform independence! Ouch! if ( (ti .eq. 0) .or. (ti .eq. ntimes-1) ) then ls_bsd = "ls -go -T " + infile ; Mac OS etc. ls_gnu = "ls -go --time-style=long-iso " + infile ; Redhat Linux etc. details = systemfunc ( "( " + ls_bsd + " ; " + ls_gnu + " )" \ + " 2> /dev/null" \ ; omit error messages! + " | head -1" \ ; get only the first response! + " | tr -s ' ' | cut -f3- -d' ' " ) ; get only the useful fields! if (ti .eq. 0) then first_file_details = details else last_file_details = details end if end if ; Read binary .bil input file, single grid for current date. rec_num = 0 ; special mode for diagnostic; unknown_size = -1 ; read entire file as 1-D array data_1d := fbindirread (infile, rec_num, unknown_size, var_type) ndata = dimsizes (data_1d) expected = nlats * nlons if (ndata .ne. expected) then print ("") print ("*** Abort, invalid binary data file size.") print ("*** Date = " + yyyymmdd(ti)) print ("*** Current zip archive = " + zipfile) print ("*** Binary data file = " + infile) print ("*** " + result) print ("*** Input data type = " + var_type) print ("*** Expected nlats, nlons = " + nlats + ", " + nlons) print ("*** Expected data count = " + expected) print ("*** Actual data count = " + ndata) status_exit (1) end if ; Delete temp file with unique name, immediately after read and ; validation. No further need. system ("rm " + infile) ; Reshape input data to 2-D grid. ; Input order is rows North to South, each row containing grid points ; West to East. The onedtond function makes a (Y,X) grid following ; this ordering. dims_2d = (/ nlats, nlons /) grid = onedtond (data_1d, dims_2d) ; (Y,X) <- (Y*X) grid@_FillValue = vmiss ; need _FillValue for correct statistics ; Reverse latitude dimension to South to North, and write grid ; to output file, all in one operation. out->$outvar$(ti,:,:) = (/ grid(::-1,:) /) ; Also write the time coordinate for the current time step. ; Incremental write improves external performance monitoring, ; at the cost of slightly decreased efficiency. out->time(ti) = (/ times(ti) /) ; Accumulate statistics. vmin = min (grid) ; grid min, max vmax = max (grid) if (ti .eq. 0) then gmin = vmin ; init accumulators gmax = vmax else gmin = min ((/ gmin, vmin /)) ; accumulated min, max gmax = max ((/ gmax, vmax /)) end if end do ;------------------------------------------------------------- ; End main loop. Write summary attributes. ;------------------------------------------------------------- print ("") out->$outvar$@actual_range = (/ gmin, gmax /) print ("Min, max all " + outvar + " = " + gmin + ", " + gmax +" "+ var_units) out->$outvar$&time@actual_range = (/ times(0), times(ntimes-1) /) ; Record stability version details. do si = nversions-1, 0, 1 ; order is last (most stable) to first att_name = "first_" + stability_strs(si) + "_file" out@$att_name$ = earliest(si) end do ; Write source file details to global attributes. out@first_file_details = first_file_details out@last__file_details = last_file_details print (systemfunc ("date") + ": File complete.") print (" Output file = " + outfile) print ("") end exit