Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 172 additions & 6 deletions util/create_regional_remapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,25 +62,48 @@ class Grid:
filename = ""
lat_name = "" # Name for lat coordinate in file
lon_name = "" # Name for lon coordinate in file
grid_type = "" # Type of grid file
# Additional SCRIP variables
area = np.array([])
corner_lat = np.array([])
corner_lon = np.array([])
imask = np.array([])
dims = np.array([])
num_corners = 0

def __init__(self,filename_,grid_filetype_):
self.filename = filename_
self.grid_type = grid_filetype_
f = netCDF4.Dataset(self.filename,"r")
if grid_filetype_ == "scrip":
self.size = f.dimensions["grid_size"].size
self.lat_name = "grid_center_lat"
self.lon_name = "grid_center_lon"
if "grid_corners" in f.dimensions:
self.num_corners = f.dimensions["grid_corners"].size
else:
self.size = f.dimensions["ncol"].size
self.lat_name = "lat"
self.lon_name = "lon"
f.close()

def grab_chunk(self,chunk):
def grab_chunk(self,chunk,load_full_scrip=False):
f = netCDF4.Dataset(self.filename,"r")
chunk_end = np.min([self.size,self.chunk_idx+chunk]);
self.lat = f[self.lat_name][self.chunk_idx:chunk_end].data
self.lon = f[self.lon_name][self.chunk_idx:chunk_end].data

# Load additional SCRIP variables if requested
if load_full_scrip and self.grid_type == "scrip":
self.area = f["grid_area"][self.chunk_idx:chunk_end].data
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When load_scrip is True but grid_area variable doesn't exist in the netCDF file, this will raise a KeyError. While SCRIP format files should have this variable, consider adding a check similar to the optional variables (grid_corner_lat, grid_imask) to handle files that might be missing this field, or at minimum provide a more informative error message.

Suggested change
self.area = f["grid_area"][self.chunk_idx:chunk_end].data
if "grid_area" in f.variables:
self.area = f["grid_area"][self.chunk_idx:chunk_end].data
else:
raise KeyError(
"Required SCRIP variable 'grid_area' not found in file '{}'. "
"Set load_full_scrip=False or provide a valid SCRIP grid file "
"containing 'grid_area'.".format(self.filename)
)

Copilot uses AI. Check for mistakes.
if "grid_corner_lat" in f.variables:
self.corner_lat = f["grid_corner_lat"][self.chunk_idx:chunk_end,:].data
self.corner_lon = f["grid_corner_lon"][self.chunk_idx:chunk_end,:].data
if "grid_imask" in f.variables:
self.imask = f["grid_imask"][self.chunk_idx:chunk_end].data
if self.chunk_idx == 0 and "grid_dims" in f.variables:
self.dims = f["grid_dims"][:].data

self.chunk_idx = chunk_end
f.close()

Expand All @@ -97,6 +120,20 @@ class Site:
offset = 0
length = 0
my_id = -1
# Additional SCRIP grid data for regional output (separate for lon and lat filtering)
lon_areas = np.array([])
lon_imasks = np.array([])
lon_corner_lats = np.array([])
lon_corner_lons = np.array([])
lat_areas = np.array([])
lat_imasks = np.array([])
lat_corner_lats = np.array([])
lat_corner_lons = np.array([])
# Final filtered SCRIP data
areas = np.array([])
corner_lats = np.array([])
corner_lons = np.array([])
imasks = np.array([])

def __init__(self,name_,bnds,bnds_type):
self.name = name_
Expand All @@ -110,12 +147,30 @@ def __init__(self,name_,bnds,bnds_type):
self.lat_bnds = slat + np.array([-1,1])*srad
self.lon_bnds = slon + np.array([-1,1])*srad

def filter_gids(self):
mask_lon = np.in1d(self.lon_gids,self.lat_gids)
mask_lat = np.in1d(self.lat_gids,self.lon_gids)
def filter_gids(self,filter_scrip=False):
mask_lon = np.isin(self.lon_gids,self.lat_gids)
mask_lat = np.isin(self.lat_gids,self.lon_gids)
self.gids = self.lon_gids[mask_lon]
self.lons = self.lons[mask_lon]
self.lats = self.lats[mask_lat]
if filter_scrip:
self.areas = self.lon_areas[mask_lon]
self.imasks = self.lon_imasks[mask_lon]
if len(self.lon_corner_lats) > 0:
self.corner_lats = self.lon_corner_lats[mask_lon,:]
self.corner_lons = self.lon_corner_lons[mask_lon,:]

# Sort by global IDs to preserve original grid order
sort_idx = np.argsort(self.gids)
self.gids = self.gids[sort_idx]
self.lons = self.lons[sort_idx]
self.lats = self.lats[sort_idx]
if filter_scrip:
self.areas = self.areas[sort_idx]
self.imasks = self.imasks[sort_idx]
if len(self.corner_lats) > 0:
self.corner_lats = self.corner_lats[sort_idx,:]
self.corner_lons = self.corner_lons[sort_idx,:]

class Sites:
m_sites = []
Expand Down Expand Up @@ -186,6 +241,82 @@ def construct_remap(casename,sites,grid):
csvwriter = csv.writer(csvfile)
for s in sites:
csvwriter.writerow([s.my_id,s.name,str(s.offset+1),str(s.offset+s.length)])

def construct_regional_grid(casename,sites,grid):
"""Create a SCRIP format grid file containing only the regional subset."""
# Combine all sites into a single regional grid
total_size = sum(s.length for s in sites)

Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing edge case handling: If total_size is 0 (no grid cells found in any site), the function will create an empty grid file. While this might be technically valid, it could indicate a problem with the site definitions or grid data. Consider adding a check and warning/error message if total_size == 0.

Suggested change
# If no grid cells are found, warn and do not create an empty grid file
if total_size == 0:
print(f"No grid cells found for any site; regional grid file '{casename}_regional_grid.nc' will not be created.", file=sys.stderr)
return

Copilot uses AI. Check for mistakes.
# Allocate arrays
lats = np.zeros(total_size)
lons = np.zeros(total_size)
areas = np.zeros(total_size)
imasks = np.zeros(total_size, dtype=np.int32)
has_corners = any(len(s.corner_lats) > 0 for s in sites)
num_corners = sites[0].corner_lats.shape[1] if has_corners else 0
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential IndexError if no sites are provided. The code accesses sites[0].corner_lats.shape[1] without first checking if the sites list is empty. Consider adding a check: if not sites: raise ValueError("No sites provided") at the beginning of the function, or add a guard before accessing sites[0].

Copilot uses AI. Check for mistakes.
corner_lats = np.zeros((total_size, num_corners)) if has_corners else None
corner_lons = np.zeros((total_size, num_corners)) if has_corners else None

# Fill arrays from sites
offset = 0
for s in sites:
n = s.length
lats[offset:offset+n] = s.lats
lons[offset:offset+n] = s.lons
areas[offset:offset+n] = s.areas
imasks[offset:offset+n] = s.imasks
Comment on lines +262 to +267
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing validation: The function doesn't validate that each site has the required SCRIP data (areas, imasks) before accessing them. If filter_scrip was False for a site (which shouldn't happen based on current code flow, but could if the API is used differently), accessing s.areas or s.imasks at lines 266-267 would use empty arrays, leading to silent data corruption. Consider adding a validation check or assertion at the start of the function to ensure all sites have populated SCRIP data.

Copilot uses AI. Check for mistakes.
if has_corners:
corner_lats[offset:offset+n,:] = s.corner_lats
corner_lons[offset:offset+n,:] = s.corner_lons
Comment on lines +268 to +270
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistent handling of empty corner data: At line 255, has_corners checks if any site has corner data. However, at lines 159-161 and 269-270, the code assumes that if one site has corners, the array will have a valid shape. If a site matches no grid cells (empty after filtering), s.corner_lats could be an empty array without a second dimension, causing an error when trying to access .shape[1] or when trying to assign to corner_lats[offset:offset+n,:]. Consider adding checks to handle sites with no matched cells gracefully.

Copilot uses AI. Check for mistakes.
Comment on lines +255 to +270
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code assumes all sites have the same number of corners when has_corners is True, but doesn't validate this assumption. If sites have different corner counts (e.g., one site has 4 corners and another has 3), this could lead to shape mismatch errors when filling the corner arrays at lines 269-270. Consider adding validation to ensure all sites with corners have the same shape, or handle sites with different corner counts appropriately.

Copilot uses AI. Check for mistakes.
offset += n

# Create regional grid netCDF file in SCRIP format
f = netCDF4.Dataset(casename+"_regional_grid.nc","w",format="NETCDF3_CLASSIC")
f.createDimension('grid_size', total_size)
if has_corners:
f.createDimension('grid_corners', num_corners)
f.createDimension('grid_rank', 1)

# Create variables
v_area = f.createVariable('grid_area', np.float64, ('grid_size',))
v_area.units = "radians^2"

v_clat = f.createVariable('grid_center_lat', np.float64, ('grid_size',), fill_value=9.96920996838687e+36)
v_clat.units = "degrees"

v_clon = f.createVariable('grid_center_lon', np.float64, ('grid_size',), fill_value=9.96920996838687e+36)
v_clon.units = "degrees"

if has_corners:
v_cornlat = f.createVariable('grid_corner_lat', np.float64, ('grid_size', 'grid_corners'), fill_value=9.96920996838687e+36)
v_cornlat.units = "degrees"

v_cornlon = f.createVariable('grid_corner_lon', np.float64, ('grid_size', 'grid_corners'), fill_value=9.96920996838687e+36)
v_cornlon.units = "degrees"

v_imask = f.createVariable('grid_imask', np.int32, ('grid_size',))

v_dims = f.createVariable('grid_dims', np.int32, ('grid_rank',))

# Write data
v_area[:] = areas
v_clat[:] = lats
v_clon[:] = lons
if has_corners:
v_cornlat[:,:] = corner_lats
v_cornlon[:,:] = corner_lons
v_imask[:] = imasks
v_dims[:] = [total_size]

# Add global attributes
f.api_version = 5.0
f.version = 5.0
f.floating_point_word_size = 8
f.file_size = 0

f.close()
print(f"Regional grid file created: {casename}_regional_grid.nc")
print(f" Total grid cells: {total_size}")


def main():
Expand All @@ -203,21 +334,24 @@ def main():
help='Case name for regional remapping, this will used for the file names of output.')
p.add_argument('chunksize', type=int, default=48602,
help='Option to read grid data in chunks, useful for large simulation grids')
p.add_argument('--output-regional-grid', action='store_true',
help='Output a SCRIP format grid file containing only the regional subset')
Comment on lines +337 to +338
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing documentation: The file header documentation (lines 32-49) describes the outputs of this script but doesn't mention the new optional regional grid output file that can be generated with the --output-regional-grid flag. Consider adding a third output to the documentation describing the regional_grid.nc file and its format.

Copilot uses AI. Check for mistakes.
m_args = p.parse_args();

# Load the data from args
src_grid = Grid(m_args.grid_file,m_args.grid_filetype)
remap_sites = Sites(m_args.site_info)

# Determine which gids in source grid match each site
load_scrip = m_args.output_regional_grid and src_grid.grid_type == "scrip"
print("Finding global dof's that could fit site dimensions...")
while(src_grid.chunk_idx < src_grid.size):
chnk_start = src_grid.chunk_idx+1
chnk_end = np.amin([src_grid.chunk_idx+m_args.chunksize,src_grid.size])
chnk_perc = np.round(chnk_end/src_grid.size*100,decimals=2)
print(" Searching cols (%4.2f%%): %12d - %12d, out of %12d total" %(chnk_perc,chnk_start,chnk_end,src_grid.size))
chunk_idx = src_grid.chunk_idx
src_grid.grab_chunk(m_args.chunksize)
src_grid.grab_chunk(m_args.chunksize, load_full_scrip=load_scrip)
for idx, isite in enumerate(remap_sites.m_sites):
isite.lon_bnds[0]=isite.lon_bnds[0] % 360
isite.lon_bnds[1]=isite.lon_bnds[1] % 360
Expand All @@ -229,17 +363,49 @@ def main():
(src_grid.lon <= isite.lon_bnds[1]))
isite.lons = np.append(isite.lons, src_grid.lon[lids[0]])
isite.lon_gids = np.append(isite.lon_gids, lids[0]+chunk_idx)

# Store SCRIP data for lon matches
if load_scrip:
isite.lon_areas = np.append(isite.lon_areas, src_grid.area[lids[0]])
isite.lon_imasks = np.append(isite.lon_imasks, src_grid.imask[lids[0]])
if len(src_grid.corner_lat) > 0:
if len(isite.lon_corner_lats) == 0:
isite.lon_corner_lats = src_grid.corner_lat[lids[0],:]
isite.lon_corner_lons = src_grid.corner_lon[lids[0],:]
else:
isite.lon_corner_lats = np.vstack([isite.lon_corner_lats, src_grid.corner_lat[lids[0],:]])
isite.lon_corner_lons = np.vstack([isite.lon_corner_lons, src_grid.corner_lon[lids[0],:]])
Comment on lines +376 to +377
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Performance issue: Using np.vstack in a loop (lines 376, 377, 393, 394) repeatedly reallocates and copies arrays, which is inefficient for large datasets. Since you're already using chunked reading for the main grid, consider either: (1) using a list and converting to array at the end with np.vstack(list) once, or (2) pre-allocating arrays and using index-based assignment, similar to how lons and lon_gids are handled with np.append.

Copilot uses AI. Check for mistakes.

lids = np.where( (src_grid.lat >= isite.lat_bnds[0]) &
(src_grid.lat <= isite.lat_bnds[1]))
isite.lats = np.append(isite.lats, src_grid.lat[lids[0]])
isite.lat_gids = np.append(isite.lat_gids, lids[0]+chunk_idx)

# Store SCRIP data for lat matches
if load_scrip:
isite.lat_areas = np.append(isite.lat_areas, src_grid.area[lids[0]])
isite.lat_imasks = np.append(isite.lat_imasks, src_grid.imask[lids[0]])
Comment on lines +370 to +387
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type safety concern: lon_imasks and lat_imasks are accumulated using np.append without explicit dtype, but the final imasks is expected to be int32 (line 254). If the source grid_imask has a different integer type, this could lead to type inconsistencies. Consider explicitly casting when appending: isite.lon_imasks = np.append(isite.lon_imasks, src_grid.imask[lids[0]]).astype(np.int32) or initializing the arrays with the correct dtype in the Site class.

Copilot uses AI. Check for mistakes.
if len(src_grid.corner_lat) > 0:
if len(isite.lat_corner_lats) == 0:
isite.lat_corner_lats = src_grid.corner_lat[lids[0],:]
isite.lat_corner_lons = src_grid.corner_lon[lids[0],:]
else:
isite.lat_corner_lats = np.vstack([isite.lat_corner_lats, src_grid.corner_lat[lids[0],:]])
isite.lat_corner_lons = np.vstack([isite.lat_corner_lons, src_grid.corner_lon[lids[0],:]])
Comment on lines +393 to +394
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Performance issue: Using np.vstack in a loop repeatedly reallocates and copies arrays, which is inefficient for large datasets. Since you're already using chunked reading for the main grid, consider either: (1) using a list and converting to array at the end with np.vstack(list) once, or (2) pre-allocating arrays and using index-based assignment, similar to how lats and lat_gids are handled with np.append.

Copilot uses AI. Check for mistakes.

# Consilidate the gids for each site
for idx, isite in enumerate(remap_sites.m_sites):
print("Setting gids for site: %s\n with bounds [%f,%f] x [%f,%f]" %(isite.name,isite.lon_bnds[0],isite.lon_bnds[1],isite.lat_bnds[0],isite.lat_bnds[1]))
isite.filter_gids()
isite.filter_gids(filter_scrip=load_scrip)
# Construct output files
construct_remap(m_args.casename,remap_sites.m_sites,src_grid)

# Construct regional grid file if requested
if m_args.output_regional_grid:
if src_grid.grid_type == "scrip":
construct_regional_grid(m_args.casename,remap_sites.m_sites,src_grid)
else:
print("Warning: --output-regional-grid only supported for SCRIP format grid files. Skipping.")

if __name__ == '__main__':
main()
Loading