;--------------------------------------------------------------------------- ; ; NCL demo program to spatially join Netcdf Daymet tiles. ; ; 2012-oct-09 By Dave Allured, NOAA/PSD/CIRES Climate Analysis Branch. ; Original demo version. Joins two tiles into one output file. ; Infill of missing 2-D coordinates is not yet provided. ; 2012-oct-18 Specify file1, file2, and "join" on the command line, ; for repeat manual operation. ; Bug fix for concatenating the tileid global attributes. ; Bug fix for 2-D coordinate overlay. Must use masking to ; preserve good coordinates in no-data areas. ; 2012-oct-27 Add consistency check and trap for no common grid point. ; ; 2012-nov-23 Upgrade to join many tiles in one program run. ; Get input tile numbers from list file. ; Get complete 2-D coordinate grids from Daymet supplemental file. ; Common grid point between two tiles is no longer needed. ; Add diagnostics for mismatched 1-D coordinates. ; (2-D coordinates are not checked.) ; Change to 64-bit offset format to support large output files ; exceeding 2 gbytes. ; 2012-nov-26 Check alignment of 2-D coordinates between supplemental file ; and data files, report statistics only. ; Add actual_range attributes. ; 2012-dec-04 Use the official Daymet supplemental coordinate file. ; Add warning for excessive misalignment in 2-D coordinates. ; Make time dimension unlimited to support NCO concatenation. ; 2012-dec-26 Minor update. Add diagnostics for memory overflow. ; prog_id = "join.daymet.1226.ncl" ; ; Notes: ; ; This version can join many tiles in a single program run. ; Input tiles may be either spatially overlapping, or disjoint. ; The only spatial constraint is that all input tiles must be ; fully contained within the spatial domain of the supplemental ; coordinate file. ; ; This version can also join previous output files from the same ; program. In this way, smaller joins can be assembled into ; larger merged files, if needed. As with original tiles, the ; input files may be either overlapping or disjoint. Some minor ; programming changes for managing the input file names will be ; needed. ; ; This version joins 3-D input variables with time as the other ; dimension. The time dimension and time coordinates must match ; exactly between input files. With appropriate changes, the ; general method can be adapted to variables of other ranks. ; ; Also, the one-dimensional X and Y coordinates of each input ; file must exactly match a subset of the supplemental ; coordinates. The 2-D coordinates lat and lon should also ; match to within reasonable roundoff error, but this match ; does not need to be perfect. ; ; Statistics on the quality of alignment of the 2-D coordinates ; are printed in the middle of the program run. Please check ; these statistics for reasonable alignment. A difference of ; 1.0e-08 or less is reasonable roundoff error. ; ; For making very large merged files, it is better to first ; join Daymet files spatially for one year at a time, then ; concatenate along the time dimension last. Use capable ; software such as ncrcat from NCO utilities. You may run into ; memory size problems with NCL, if you concatenate over time ; before attempting to join spatially. ; ; Usage: ; ; 1. Download and unpack Daymet tiles for the area of interest, ; and for the desired years, from the Daymet website: ; ; http://daymet.ornl.gov/ ; ; 2. Download and unpack the Daymet supplemental coordinate file. ; Currently this is located on the Daymet Community Tools page. ; ; 3. Make a text file with tile numbers to be included. This ; can be done by cropping the original Daymet directory names: ; ; cd original_data ; ls -1d *1980 | cut -c 1-5 > tiles.list ; ; 4. Adjust the input and output parameters at the start of the ; program below, as needed. ; ; 5. Run this script with no command arguments: ; ; ncl join.daymet.ncl ; ; For automatic operation, you can make parameters "var" and/or ; "year" into command line arguments, then loop over one or both ; within a driver script. ; ;--------------------------------------------------------------------------- load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/contributed.ncl" ; Set run parameters here. begin var = "prcp" year = 1980 coord_file = "Daymet_Coordinates.nc" ; supplemental coordinate file list_file = "tiles.wyoming.list" ; filenames for demonstration scenario indir = "data" ; input directory or symbolic link outfile = var + ".wyoming." + year + ".nc" header_space = 5000 ; pad size to optimize Netcdf-3 output limit_2d_offset = 1.0e-08 ; offset limit for 2-D coordinates ; to trigger alignment warning ;----------------------------------------------------------- ; 1. Read tile numbers from list file. ;----------------------------------------------------------- print ("") print ("Read list file: " + list_file) read_all = -1 tiles = asciiread (list_file, read_all, "integer") nfiles = dimsizes (tiles) print (" Number of input tiles = " + nfiles) ;------------------------------------------------------------- ; 2. Read master coordinate arrays from supplemental file. ;------------------------------------------------------------- print ("") print ("Read coordinate file: " + coord_file) cf = addfile (coord_file, "r") xmain = cf->x ; read master 1-D coordinates ymain = cf->y lat_main = cf->lat ; read master 2-D coordinates lon_main = cf->lon nxc = dimsizes (xmain) nyc = dimsizes (ymain) print (" Coordinate grid size lat, lon = " + nyc + " x " + nxc) nxmiss = num (ismissing (lon_main) .or. (lon_main .eq. 0) ) nymiss = num (ismissing (lat_main) .or. (lat_main .eq. 0) ) ngrid = nxc * nyc pctx = sprintf (" (%0.1f%%)", (100.0 * nxmiss) / ngrid) pcty = sprintf (" (%0.1f%%)", (100.0 * nymiss) / ngrid) print (" Number of missing latitudes = " + nymiss + pcty) print (" Number of missing longitudes = " + nxmiss + pctx) ;------------------------------------------------------------------------- ; 3. Scan through all input files, and check alignment of coordinates. ;------------------------------------------------------------------------- print ("") print ("Scan through input files to check coordinates.") infiles = tiles(:) + "_" + year + "/" + var + ".nc" infile2 = indir + "/" + infiles(:) isub = new ((/ nxc, 2 /), integer) ; arrays to save tile subscripts jsub = new ((/ nyc, 2 /), integer) lat_max_offset = new (nfiles, double) lon_max_offset = new (nfiles, double) lat_count_exact = new (nfiles, double) lon_count_exact = new (nfiles, double) grid_count = new (nfiles, double) do fi = 0, nfiles-1 f = addfile (infile2(fi), "r") ; open next input file x1 = f->$var$&x ; read 1-D coordinate vectors y1 = f->$var$&y ; Check alignment of 1-D coordinates, X and Y. i1 = ind (x1(0) .eq. xmain) ; find lower left grid point within j1 = ind (y1(0) .eq. ymain) ; the master coordinate grid i2 = i1 + dimsizes (x1) - 1 ; find upper right subscripts j2 = j1 + dimsizes (y1) - 1 grid_count(fi) = dimsizes (x1) * dimsizes (y1) if (ismissing (i1) .or. (i2 .ge. nxc)) then print ("*** X coordinates not fully contained within coordinate grid,") print ("*** join fails. Abort.") print ("*** Input file = " + infile2(fi)) status_exit (1) end if if (ismissing (j1) .or. (j2 .ge. nyc)) then print ("*** Y coordinates not fully contained within coordinate grid,") print ("*** join fails. Abort.") print ("*** Input file = " + infile2(fi)) status_exit (1) end if print (" " + infiles(fi) + " i1:i2, j1:j2 = " + i1 + ":" + i2 + ", " \ + j1 + ":" + j2) ; progress display if (any (x1 .ne. xmain(i1:i2))) then print ("*** Mismatch in X coordinates, join fails. Abort.") print ("*** Input file = " + infile2(fi)) status_exit (1) end if if (any (y1 .ne. ymain(j1:j2))) then print ("*** Mismatch in Y coordinates, join fails. Abort.") print ("*** Input file = " + infile2(fi)) status_exit (1) end if isub(fi,:) = (/ i1, i2 /) ; save subscripts of current tile jsub(fi,:) = (/ j1, j2 /) ; within master grid ; Check alignment of 2-D coordinates, lat and lon. difs = abs (f->lat - lat_main(j1:j2, i1:i2)) lat_max_offset(fi) = max (difs) lat_count_exact(fi) = num (difs .eq. 0) difs = abs (f->lon - lon_main(j1:j2, i1:i2)) lon_max_offset(fi) = max (difs) lon_count_exact(fi) = num (difs .eq. 0) delete (difs) ; Also check for matched time coordinates. time = f->$var$&time ; read time coords for current file nt = dimsizes (time) time_dim_name = time!0 ; time dimension name, for later if (fi .eq. 0) then sample = f->$var$(0,0,0) ; capture attributes from first file time1 = time ; save first file time coordinates ntimes = nt end if if (nt .ne. ntimes .or. any (time .ne. time1)) then print ("*** Mismatch in time coordinates, join fails. Abort.") print ("*** Input file = " + infile2(fi)) status_exit (1) end if delete (time) delete (y1) delete (x1) end do imin = min (isub(:,0)) ; compute min and max tile subscripts imax = max (isub(:,1)) ; within master grid jmin = min (jsub(:,0)) jmax = max (jsub(:,1)) ; Print summary information for coordinates. print (" Full subset domain within master grid = " + imin + ":" + imax \ + ", " + jmin + ":" + jmax) lat_min = min (lat_main(jmin:jmax, imin:imax)) ; actual ranges for lat_max = max (lat_main(jmin:jmax, imin:imax)) ; output 2-D coordinates lon_min = min (lon_main(jmin:jmax, imin:imax)) lon_max = max (lon_main(jmin:jmax, imin:imax)) tile_min = min (tiles) tile_max = max (tiles) print (" Min, max included latitudes = " + lat_min + ", " + lat_max) print (" Min, max included longitudes = " + lon_min + ", " + lon_max) print (" Min, max included tile numbers = " + tile_min + ", " + tile_max) ;----------------------------------------------------------- ; 4. Check alignment of 2-D coordinates. ;----------------------------------------------------------- print ("") print ("Compare 2-D coordinates between master coordinate file") print ("and all input data files.") sum_lat = sum (lat_count_exact) sum_lon = sum (lon_count_exact) pct_lat = sprintf (" (%0.1f%%)", (100.0 * sum_lat) / sum (grid_count)) pct_lon = sprintf (" (%0.1f%%)", (100.0 * sum_lon) / sum (grid_count)) print (" Count of exact matches in latitudes = " + sum_lat + pct_lat) print (" Count of exact matches in longitudes = " + sum_lon + pct_lon) lat_max_max = max (lat_max_offset) lon_max_max = max (lon_max_offset) print (" Largest difference in latitudes = " + lat_max_max + " degrees") print (" Largest difference in longitudes = " + lon_max_max + " degrees") if (max ((/ lat_max_max, lon_max_max /)) .gt. limit_2d_offset) then print ("") print ("*** WARNING: Unexpected differences in 2-D coordinates") print ("*** between tiles and reference coordinate file.") print ("*** Check for possible map and data alignment problems.") end if ;----------------------------------------------------------- ; 5. Create super array. ;----------------------------------------------------------- print ("") print ("Create super array.") nxout = imax - imin + 1 ; output dimensions nyout = jmax - jmin + 1 out_dims = (/ ntimes, nyout, nxout /) type_size = sizeof (sample) array_size = type_size * product (todouble (out_dims)) print (" Data type = " + typeof (sample) + " (" + type_size \ + " bytes)") print (" Requested dimensions = (" + ntimes + ", " + nyout + ", " \ + nxout + ")") print (" Requested memory size = " + sprintf ("%0.0f bytes", array_size) \ + sprintf (" (%0.2f Gb)", array_size / 1e9)) ; If there is to be a memory overflow error, it will probably happen here. xout = new (out_dims, typeof (sample), sample@_FillValue) ; create with all missing values ;----------------------------------------------------------- ; 6. Overlay input arrays. ;----------------------------------------------------------- print ("") print ("Overlay input arrays.") isub = isub(:,:) - imin ; convert all tile subscripts jsub = jsub(:,:) - jmin ; within master grid, to overlay ; subscripts within output grid do fi = 0, nfiles-1 f = addfile (infile2(fi), "r") ; open next input file i1 = isub(fi,0) ; recover i, j subscripts for i2 = isub(fi,1) ; current tile within output grid j1 = jsub(fi,0) j2 = jsub(fi,1) print (" " + infiles(fi) + " i1:i2, j1:j2 = " + i1 + ":" + i2 + ", " \ + j1 + ":" + j2) ; progress display, first pass only ; First tile: Overlay main array; copy all metadata for data var. if (fi .eq. 0) then xout(:,j1:j2,i1:i2) = f->$var$ ; All other tiles: Overlay main array; use mask to preserve existing data. else xout(:,j1:j2,i1:i2) = where (ismissing (xout(:,j1:j2,i1:i2)), \ f->$var$, xout(:,j1:j2,i1:i2)) ; function suppresses metadata copy end if end do ;----------------------------------------------------------- ; 7. Fix up metadata for output file. ;----------------------------------------------------------- print ("") print ("Fix up metadata for output file.") ; Prepare global attributes. f = addfile (infile2(0), "r") ; re-open first input file to copy ; misc. vars and metadata gatts = True ; copy most global attributes copy_VarAtts (f, gatts) ; indirect for NCL 6.0.0 workaround if (isatt (gatts, "tileid")) then ; remove tile ID attribute, delete (gatts@tileid) ; no longer singular end if time_stamp = systemfunc ("date") ; add history attribute new_history = time_stamp + ": Tiles joined by " + prog_id if (isatt (gatts, "history")) then ; append to previous history attribute gatts@history = new_history + newline + gatts@history else gatts@history = new_history end if ; Clean up the X and Y coordinate variables. ; These might have gaps if disjoint tiles are assembled. ; Therefore, make complete copies of X and Y from the supplemental file. xout&x = xmain(imin:imax) ; apply only to super array, because xout&y = ymain(jmin:jmax) ; this is where the gaps might be delete (xout&x@_FillValue) ; remove vestigial attributes delete (xout&y@_FillValue) ; Prepare supplemental variables. tiles!0 = "ntiles" ; include list of tiles as data variable delete (tiles@_FillValue) ; remove vestigial attribute ; Compute actual_range attributes. xout&x@actual_range = (/ min (xout&x), max (xout&x) /) ; coordinate vars xout&y@actual_range = (/ min (xout&y), max (xout&y) /) xout&time@actual_range = (/ min (xout&time), max (xout&time) /) xmin = min (xout) xmax = max (xout) xout@actual_range = (/ xmin, xmax /) ; main data array tiles@actual_range = (/ tile_min, tile_max /) ; supplemental array ; Here lat_main and lon_main are used only as carriers ; for these output attributes. lat_main@actual_range = (/ lat_min, lat_max /) ; 2-D coordinates lon_main@actual_range = (/ lon_min, lon_max /) print (" Min, max " + var + " data = " + xmin + ", " + xmax) ;----------------------------------------------------------- ; 8. Write output file. ;----------------------------------------------------------- print ("") print ("Write output file: " + outfile) if (isfilepresent (outfile)) then ; overwrite any previous file system ("rm " + outfile) end if setfileoption ("nc", "Format", "64BitOffset") ; use Netcdf large file format out = addfile (outfile, "c") ; create new output file setfileoption (out, "HeaderReserveSpace", header_space) ; optimize Netcdf-3 output; see NCL docs copy_VarAtts (gatts, out) ; write global attributes dummy_size = -1 ; make the time dimension unlimited opt_unlimited = True filedimdef (out, time_dim_name, dummy_size, opt_unlimited) ; Write variables in approximately the same order as in the input files. ; Some variables are copied directly because they are not affected ; by the join process. ; Time, x, y coordinate variables get copied automatically. out->lambert_conformal_conic = f->lambert_conformal_conic out->tiles_included = tiles out->lat = lat_main(jmin:jmax, imin:imax) ; write 2-D output coordinates out->lon = lon_main(jmin:jmax, imin:imax) out->yearday = f->yearday ; copy auxiliary time arrays out->time_bnds = f->time_bnds out->$var$ = xout ; write the main data array last, ; to improve speed print ("Done.") end exit