Skip to content

Commit

Permalink
in progress ... commit for back up the latest
Browse files Browse the repository at this point in the history
  • Loading branch information
lee1043 committed Sep 25, 2024
1 parent 4fe0088 commit 63eb9d6
Show file tree
Hide file tree
Showing 3 changed files with 319 additions and 387 deletions.
90 changes: 63 additions & 27 deletions pcmdi_metrics/mean_climate/lib/calculate_climatology.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
import os

from pcmdi_metrics.io import select_subset, xcdat_open
from pcmdi_metrics.io import get_time, select_subset, xcdat_open
from pcmdi_metrics.utils import (
check_monthly_time_axis,
check_time_bounds_exist,
Expand All @@ -26,6 +26,7 @@ def calculate_climatology(
periodinname: bool = None,
climlist: list = None,
repair_time_axis: bool = False,
overwrite_output: bool = True,
):
"""
Calculate climatology from a dataset over a specified period.
Expand Down Expand Up @@ -60,6 +61,8 @@ def calculate_climatology(
List of climatologies to calculate and save (e.g., ["AC", "DJF", "MAM", "JJA", "SON"], default is all).
repair_time_axis : bool, optional
If True, regenerate time axis if data has incorrect time axis, default is False
overwrite_output: bool, optional
If True, output file will be overwritten regardless of its existance
Returns
-------
Expand Down Expand Up @@ -115,13 +118,20 @@ def calculate_climatology(
ds = ds.bounds.add_missing_bounds(["T"])
print("Generated time bounds")

if len(get_time(ds)) == 12:
input_is_annual_cycle = True
dec_mode = "JFD"
else:
input_is_annual_cycle = False
dec_mode = "DJF"

# Determine the output directory, using outpath if provided, else use the directory of outfile
outdir = outpath or os.path.dirname(outfile)
os.makedirs(outdir, exist_ok=True) # Create the directory if it doesn't exist
print("outdir:", outdir)

# Define the climatology period based on the provided start and end dates, or use the entire time series
if start is not None and end is not None:
if start is not None and end is not None and not input_is_annual_cycle:
# If a period is specified by the user, parse the start and end dates
start_yr, start_mo = map(int, start.split("-")[:2])
start_da = 1 # Default to the first day of the start month
Expand Down Expand Up @@ -162,33 +172,21 @@ def calculate_climatology(
print(f"start: {start_yr:04d}-{start_mo:02d}-{start_da:02d}")
print(f"end: {end_yr:04d}-{end_mo:02d}-{end_da:02d}")

# Configure Dask to handle large chunks and calculate climatology
# dask.config.set(**{"array.slicing.split_large_chunks": True})

# Compute seasonal climatology (weighted)
ds_clim = ds.temporal.climatology(
var,
freq="season",
weighted=True,
season_config={"dec_mode": "DJF", "drop_incomplete_djf": True},
)
# Compute monthly climatology (weighted)
ds_ac = ds.temporal.climatology(var, freq="month", weighted=True)

# Organize the computed climatologies into a dictionary
ds_clim_dict = {
season: ds_clim.isel(time=i)
for i, season in enumerate(["DJF", "MAM", "JJA", "SON"])
}
ds_clim_dict["AC"] = ds_ac # Add the annual cycle climatology

# Determine which climatologies to process, defaulting to all if none are specified
clims = climlist or ["AC", "DJF", "MAM", "JJA", "SON"]
seasons = climlist or ["AC", "DJF", "MAM", "JJA", "SON"]

# Find the first element except "AC"
first_season = next(item for item in seasons if item != "AC")

# Create a dictionary as pointer to climatology fields
ds_clim_dict = dict()

season_index_dict = {"DJF": 0, "MAM": 1, "JJA": 2, "SON": 3}

print("outdir, outfilename, outfile:", outdir, outfilename, outfile)

# Iterate over the selected climatologies and save each to a NetCDF file
for s in clims:
for s in seasons:
if outfilename_default_template:
# Define the output filename suffix based on the climatology and period (if specified)
addf = (
Expand Down Expand Up @@ -216,9 +214,47 @@ def calculate_climatology(
).replace("%(season)", s),
)

print(f"output file for {s} is {outpath_season}")
# Save the climatology to the output NetCDF file, including global attributes
ds_clim_dict[s].to_netcdf(outpath_season)
if not os.path.isfile(outpath_season) or overwrite_output:
# Handle the "AC" (Annual Cycle) case
if s == "AC":
ds_ac = (
ds
if input_is_annual_cycle
else ds.temporal.climatology(var, freq="month", weighted=True)
)

# Add the annual cycle climatology to the dictionary
ds_clim_dict["AC"] = ds_ac

# Handle the first season and subsequent seasons
else:
# (Optional) Configure Dask for large chunk processing if necessary
# dask.config.set(**{"array.slicing.split_large_chunks": True}) # Uncomment if needed

# Compute seasonal climatology (weighted) with specific settings for DJF
if s == first_season:
ds_clim = ds.temporal.climatology(
var,
freq="season",
weighted=True,
season_config={
"dec_mode": dec_mode,
"drop_incomplete_djf": True,
},
)

# Add computed climatology to the dictionary
ds_clim_dict[s] = ds_clim.isel(time=season_index_dict[s])

# Save the climatology file unless it's an annual cycle input and "AC"
if s != "AC" or not input_is_annual_cycle:
# Save climatology to the output NetCDF file, including global attributes
ds_clim_dict[s].to_netcdf(outpath_season)
print(
f"Successfully saved climatology for season '{s}' to {outpath_season}"
)
else:
print("Skipping 'AC' as input is already an annual cycle.")

ds.close()

Expand Down
104 changes: 62 additions & 42 deletions pcmdi_metrics/mean_climate/lib_unified/lib_unified.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,55 @@ def extract_info_from_model_catalogue(
return variables, models, runs_dict


def get_annual_cycle(
var,
data_path,
out_path,
start="1981-01",
end="2005-12",
repair_time_axis=True,
overwrite_output_ac=True,
):
ver = datetime.datetime.now().strftime("v%Y%m%d")

out_path_ver = os.path.join(out_path, ver)
os.makedirs(out_path_ver, exist_ok=True)

outfilename_head = (
f"{replace_date_pattern(str(os.path.basename(data_path)), '')}".replace(
"_.nc", ""
)
.replace("_*", "")
.replace(".nc", "")
.replace("*", "")
)
outfilename_template = (
f"{outfilename_head}_%(start-yyyymm)-%(end-yyyymm)_%(season).nc"
)

print("get_annual_cycle, var:", var)
print("data_path:", data_path)
print("out_path:", out_path)
print("outfilename_head:", outfilename_head)
print("outfilename_template:", outfilename_template)

d_clim_dict = calculate_climatology(
var,
infile=data_path,
outpath=out_path_ver,
outfilename=outfilename_template,
outfilename_default_template=False,
start=start,
end=end,
ver="",
periodinname=False,
repair_time_axis=repair_time_axis,
overwrite_output=overwrite_output_ac,
)

return d_clim_dict


def generate_model_data_path(model_data_path_template, var, model, run):
return (
model_data_path_template.replace("%(var)", var)
Expand Down Expand Up @@ -216,47 +265,6 @@ def get_unique_bases(variables):
return result


def get_annual_cycle(var, data_path, out_path):
start = "1981-01"
end = "2005-12"
ver = datetime.datetime.now().strftime("v%Y%m%d")

repair_time_axis = True

out_path_ver = os.path.join(out_path, ver)
os.makedirs(out_path_ver, exist_ok=True)

outfilename_head = (
f"{replace_date_pattern(str(os.path.basename(data_path)), '')}".replace(
"_.nc", ""
).replace("_*", "")
)
outfilename_template = (
f"{outfilename_head}_%(start-yyyymm)-%(end-yyyymm)_%(season).nc"
)

print("get_annual_cycle, var:", var)
print("data_path:", data_path)
print("out_path:", out_path)
print("outfilename_head:", outfilename_head)
print("outfilename_template:", outfilename_template)

d_clim_dict = calculate_climatology(
var,
infile=data_path,
outpath=out_path_ver,
outfilename=outfilename_template,
outfilename_default_template=False,
start=start,
end=end,
ver="",
periodinname=False,
repair_time_axis=repair_time_axis,
)

return d_clim_dict


def calc_metrics(ac_ref, ac_run, in_progress=True):
if in_progress:
metrics = None
Expand Down Expand Up @@ -293,6 +301,10 @@ def process_dataset(
common_grid,
interim_output_path_dict_data,
data_type="ref",
start="1981-01",
end="2005-12",
repair_time_axis=False,
overwrite_output_ac=True,
):
# Sanity checks
if data_type not in ["ref", "model"]:
Expand Down Expand Up @@ -338,7 +350,15 @@ def process_dataset(

# Calculate the annual cycle and save annual cycle
if var not in rad_diagnostic_variables:
ac = get_annual_cycle(varname, data_path, out_path)
ac = get_annual_cycle(
varname,
data_path,
out_path,
start=start,
end=end,
repair_time_axis=repair_time_axis,
overwrite_output_ac=overwrite_output_ac,
)
else:
ac = derive_rad_var(
var,
Expand Down
Loading

0 comments on commit 63eb9d6

Please sign in to comment.