Skip to content

Commit

Permalink
Refactor leaderboard code
Browse files Browse the repository at this point in the history
This commit refactors the leaderboard code using the new features from
ClimaAnalysis. This commit also deletes the old leaderboard code and the
tests for it. Also, there is an off by one month issue when handling the
dates and seasons. This is fixed in this commit.

The commit also moves the code to leaderboard.jl and a line is added to
the pipeline.

One significant difference is the leaderboard which now plot the best
and worst single model using only annual rather than averaging the error
over annual and seasonal data.
  • Loading branch information
ph-kev committed Oct 4, 2024
1 parent 279b95b commit fbd9a14
Show file tree
Hide file tree
Showing 12 changed files with 233 additions and 1,054 deletions.
4 changes: 3 additions & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,9 @@ steps:

- label: "GPU AMIP target: topography and diagnostic EDMF"
key: "gpu_amip_target_topo_diagedmf_shortrun"
command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_amip_target_topo_diagedmf_shortrun.yml --job_id gpu_amip_target_topo_diagedmf_shortrun"
command:
- "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_amip_target_topo_diagedmf_shortrun.yml --job_id gpu_amip_target_topo_diagedmf_shortrun"
- "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/leaderboard.jl experiments/ClimaEarth/output/amip/gpu_amip_target_topo_diagedmf_shortun/clima_atmos/output_active experiments/ClimaEarth/output/amip/gpu_amip_target_topo_diagedmf_shortrun_artifacts"
artifact_paths: "experiments/ClimaEarth/output/amip/gpu_amip_target_topo_diagedmf_shortrun_artifacts/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand Down
2 changes: 1 addition & 1 deletion experiments/ClimaEarth/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
[compat]
ArgParse = "1.1"
ArtifactWrappers = "0.2"
ClimaAnalysis = "0.5.4"
ClimaAnalysis = "0.5.10"
ClimaAtmos = "0.27"
ClimaCorePlots = "0.2"
ClimaDiagnostics = "0.2"
Expand Down
228 changes: 228 additions & 0 deletions experiments/ClimaEarth/leaderboard.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
import ClimaAnalysis
import ClimaUtilities.ClimaArtifacts: @clima_artifact
import GeoMakie
import CairoMakie
import Dates

@info "Error against observations"

# Tuple of short names for loading simulation and observational data
sim_obs_short_names_no_pr = [
("rsdt", "solar_mon"),
("rsut", "toa_sw_all_mon"),
("rlut", "toa_lw_all_mon"),
("rsutcs", "toa_sw_clr_t_mon"),
("rlutcs", "toa_lw_clr_t_mon"),
("rsds", "sfc_sw_down_all_mon"),
("rsus", "sfc_sw_up_all_mon"),
("rlds", "sfc_lw_down_all_mon"),
("rlus", "sfc_lw_up_all_mon"),
("rsdscs", "sfc_sw_down_clr_t_mon"),
("rsuscs", "sfc_sw_up_clr_t_mon"),
("rldscs", "sfc_lw_down_clr_t_mon"),
]

compare_vars_biases_plot_extrema = Dict(
"pr" => (-5.0, 5.0),
"rsdt" => (-2.0, 2.0),
"rsut" => (-50.0, 50.0),
"rlut" => (-50.0, 50.0),
"rsutcs" => (-20.0, 20.0),
"rlutcs" => (-20.0, 20.0),
"rsds" => (-50.0, 50.0),
"rsus" => (-10.0, 10.0),
"rlds" => (-50.0, 50.0),
"rlus" => (-50.0, 50.0),
"rsdscs" => (-10.0, 10.0),
"rsuscs" => (-10.0, 10.0),
"rldscs" => (-20.0, 20.0),
)


if length(ARGS) < 2
error("Usage: leaderboard.jl <path of folder containing NetCDF files>")
end

# Path to saved leaderboards
leaderboard_base_path = ARGS[begin]

# Path to simulation data
diagnostics_folder_path = ARGS[begin + 1]

# Dict for loading in simulation data
sim_var_dict = Dict{String, Any}(
"pr" =>
() -> begin
sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = "pr")
sim_var =
ClimaAnalysis.convert_units(sim_var, "mm/day", conversion_function = x -> x .* Float32(-86400))
sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var)
return sim_var
end,
)

# Loop to load the rest of the simulation data
for (short_name, _) in sim_obs_short_names_no_pr
sim_var_dict[short_name] =
() -> begin
sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = short_name)
sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var)
return sim_var
end
end

# Dict for loading observational data
obs_var_dict = Dict{String, Any}(
"pr" =>
(start_date) -> begin
obs_var = ClimaAnalysis.OutputVar(
joinpath(@clima_artifact("precipitation_obs"), "gpcp.precip.mon.mean.197901-202305.nc"),
"precip",
new_start_date = start_date,
shift_by = Dates.firstdayofmonth,
)
return obs_var
end,
)

# Loop to load the rest of the observational data
for (sim_name, obs_name) in sim_obs_short_names_no_pr
obs_var_dict[sim_name] =
(start_date) -> begin
obs_var = ClimaAnalysis.OutputVar(
joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
obs_name,
new_start_date = start_date,
shift_by = Dates.firstdayofmonth,
)
# Convert from W m-2 to W m^-2
ClimaAnalysis.units(obs_var) == "W m-2" ? obs_var = ClimaAnalysis.set_units(obs_var, "W m^-2") :
error("Did not expect $(ClimaAnalysis.units(obs_var)) for the units")
return obs_var
end
end

# Set up dict for storing simulation and observational data after processing
sim_obs_comparsion_dict = Dict()
seasons = ["ANN", "MAM", "JJA", "SON", "DJF"]

# Print dates for debugging
pr_var = sim_var_dict["pr"]() # it shouldn't matter what short name we use
output_dates = Dates.DateTime(pr_var.attributes["start_date"]) .+ Dates.Second.(ClimaAnalysis.times(pr_var))
@info "Working with dates:"
@info output_dates

for short_name in keys(sim_var_dict)
# Simulation data
sim_var = sim_var_dict[short_name]()

# Observational data
obs_var = obs_var_dict[short_name](sim_var.attributes["start_date"])

# Remove first spin_up_months from simulation
spin_up_months = 6
spinup_cutoff = spin_up_months * 31 * 86400.0
ClimaAnalysis.times(sim_var)[end] >= spinup_cutoff &&
(sim_var = ClimaAnalysis.window(sim_var, "time", left = spinup_cutoff))

obs_var = ClimaAnalysis.resampled_as(obs_var, sim_var)
obs_var_seasons = ClimaAnalysis.split_by_season(obs_var)
sim_var_seasons = ClimaAnalysis.split_by_season(sim_var)

# Add annual to start of seasons
obs_var_seasons = [obs_var, obs_var_seasons...]
sim_var_seasons = [sim_var, sim_var_seasons...]

# Take time average
obs_var_seasons = obs_var_seasons .|> ClimaAnalysis.average_time
sim_var_seasons = sim_var_seasons .|> ClimaAnalysis.average_time

# Add "mean " for plotting the title
for sim_var in sim_var_seasons
sim_var.attributes["short_name"] = "mean $(ClimaAnalysis.short_name(sim_var))"
end

# Save observation and simulation data
sim_obs_comparsion_dict[short_name] = Dict(
season => (sim_var_s, obs_var_s) for
(season, sim_var_s, obs_var_s) in zip(seasons, sim_var_seasons, obs_var_seasons)
)
end

compare_vars_biases_groups = [
["pr", "rsdt", "rsut", "rlut"],
["rsds", "rsus", "rlds", "rlus"],
["rsutcs", "rlutcs", "rsdscs", "rsuscs", "rldscs"],
]

# Plot bias plots
for season in seasons
for compare_vars_biases in compare_vars_biases_groups
fig_bias = CairoMakie.Figure(; size = (600, 300 * length(compare_vars_biases)))
for (loc, short_name) in enumerate(compare_vars_biases)
ClimaAnalysis.Visualize.plot_bias_on_globe!(
fig_bias,
sim_obs_comparsion_dict[short_name][season]...,
cmap_extrema = compare_vars_biases_plot_extrema[short_name],
p_loc = (loc, 1),
)
end
# Do if and else statement for naming files appropriately
if season != "ANN"
CairoMakie.save(joinpath(leaderboard_base_path, "bias_$(first(compare_vars_biases))_$season.png"), fig_bias)
else
CairoMakie.save(joinpath(leaderboard_base_path, "bias_$(first(compare_vars_biases))_total.png"), fig_bias)
end
end
end

# Plot leaderboard
# Load data into RMSEVariables
rmse_var_pr = ClimaAnalysis.read_rmses(
joinpath(@clima_artifact("cmip_model_rmse"), "pr_rmse_amip_pr_amip_5yr.csv"),
"pr",
units = "mm / day",
)
rmse_var_rsut = ClimaAnalysis.read_rmses(
joinpath(@clima_artifact("cmip_model_rmse"), "rsut_rmse_amip_rsut_amip_5yr.csv"),
"rsut",
units = "W m^-2",
)
rmse_var_rlut = ClimaAnalysis.read_rmses(
joinpath(@clima_artifact("cmip_model_rmse"), "rlut_rmse_amip_rlut_amip_5yr.csv"),
"rlut",
units = "W m^-2",
)

# Add models and units for CliMA
rmse_var_pr = ClimaAnalysis.add_model(rmse_var_pr, "CliMA")
ClimaAnalysis.add_unit!(rmse_var_pr, "CliMA", "mm / day")

rmse_var_rsut = ClimaAnalysis.add_model(rmse_var_rsut, "CliMA")
ClimaAnalysis.add_unit!(rmse_var_rsut, "CliMA", "W m^-2")

rmse_var_rlut = ClimaAnalysis.add_model(rmse_var_rlut, "CliMA")
ClimaAnalysis.add_unit!(rmse_var_rlut, "CliMA", "W m^-2")

# Add RMSE for the CliMA model and for each season
for season in seasons
rmse_var_pr["CliMA", season] = ClimaAnalysis.global_rmse(sim_obs_comparsion_dict["pr"][season]...)
rmse_var_rsut["CliMA", season] = ClimaAnalysis.global_rmse(sim_obs_comparsion_dict["rsut"][season]...)
rmse_var_rlut["CliMA", season] = ClimaAnalysis.global_rmse(sim_obs_comparsion_dict["rlut"][season]...)
end

# Plot box plots
rmse_vars = (rmse_var_pr, rmse_var_rsut, rmse_var_rlut)
fig_leaderboard = CairoMakie.Figure(; size = (800, 300 * 3 + 400), fontsize = 20)
for (loc, rmse_var) in enumerate(rmse_vars)
ClimaAnalysis.Visualize.plot_boxplot!(
fig_leaderboard,
rmse_var,
ploc = (loc, 1),
best_and_worst_category_name = "ANN",
)
end

# Plot leaderboard
ClimaAnalysis.Visualize.plot_leaderboard!(fig_leaderboard, rmse_vars..., best_category_name = "ANN", ploc = (4, 1))
CairoMakie.save(joinpath(leaderboard_base_path, "bias_leaderboard.png"), fig_leaderboard)
108 changes: 0 additions & 108 deletions experiments/ClimaEarth/run_amip.jl
Original file line number Diff line number Diff line change
Expand Up @@ -920,114 +920,6 @@ if ClimaComms.iamroot(comms_ctx)
files_root = ".monthly",
output_dir = dir_paths.artifacts,
)

## Compare against observations
if t_end > 84600 && config_dict["output_default_diagnostics"]
@info "Error against observations"
include("user_io/leaderboard.jl")
ClimaAnalysis = Leaderboard.ClimaAnalysis

compare_vars_biases_plot_extrema = Dict(
"pr" => (-5.0, 5.0),
"rsdt" => (-2.0, 2.0),
"rsut" => (-50.0, 50.0),
"rlut" => (-50.0, 50.0),
"rsutcs" => (-20.0, 20.0),
"rlutcs" => (-20.0, 20.0),
"rsds" => (-50.0, 50.0),
"rsus" => (-10.0, 10.0),
"rlds" => (-50.0, 50.0),
"rlus" => (-50.0, 50.0),
"rsdscs" => (-10.0, 10.0),
"rsuscs" => (-10.0, 10.0),
"rldscs" => (-20.0, 20.0),
)

diagnostics_folder_path = atmos_sim.integrator.p.output_dir
leaderboard_base_path = dir_paths.artifacts

compare_vars_biases_groups = [
["pr", "rsdt", "rsut", "rlut"],
["rsds", "rsus", "rlds", "rlus"],
["rsutcs", "rlutcs", "rsdscs", "rsuscs", "rldscs"],
]

function compute_biases(compare_vars_biases, dates)
if isempty(dates)
return map(x -> 0.0, compare_vars_biases)
else
return Leaderboard.compute_biases(
diagnostics_folder_path,
compare_vars_biases,
dates,
cmap_extrema = compare_vars_biases_plot_extrema,
)
end
end

function plot_biases(dates, biases, output_name)
isempty(dates) && return nothing

output_path = joinpath(leaderboard_base_path, "bias_$(output_name).png")
Leaderboard.plot_biases(biases; output_path)
end

first_var = get(
ClimaAnalysis.SimDir(diagnostics_folder_path),
short_name = first(first(compare_vars_biases_groups)),
)

diagnostics_times = ClimaAnalysis.times(first_var)
# Remove the first `spinup_months` months from the leaderboard
spinup_months = 6
# The monthly average output is at the end of the month, so this is safe
spinup_cutoff = spinup_months * 31 * 86400.0
if diagnostics_times[end] > spinup_cutoff
filter!(x -> x > spinup_cutoff, diagnostics_times)
end

output_dates = Dates.DateTime(first_var.attributes["start_date"]) .+ Dates.Second.(diagnostics_times)
@info "Working with dates:"
@info output_dates
## collect all days between cs.dates.date0 and cs.dates.date
MAM, JJA, SON, DJF = Leaderboard.split_by_season(output_dates)

for compare_vars_biases in compare_vars_biases_groups
ann_biases = compute_biases(compare_vars_biases, output_dates)
plot_biases(output_dates, ann_biases, first(compare_vars_biases) * "_total")

MAM_biases = compute_biases(compare_vars_biases, MAM)
plot_biases(MAM, MAM_biases, first(compare_vars_biases) * "_MAM")
JJA_biases = compute_biases(compare_vars_biases, JJA)
plot_biases(JJA, JJA_biases, first(compare_vars_biases) * "_JJA")
SON_biases = compute_biases(compare_vars_biases, SON)
plot_biases(SON, SON_biases, first(compare_vars_biases) * "_SON")
DJF_biases = compute_biases(compare_vars_biases, DJF)
plot_biases(DJF, DJF_biases, first(compare_vars_biases) * "_DJF")
end

compare_vars_rmses = ["pr", "rsut", "rlut"]

ann_biases = compute_biases(compare_vars_rmses, output_dates)
MAM_biases = compute_biases(compare_vars_rmses, MAM)
JJA_biases = compute_biases(compare_vars_rmses, JJA)
SON_biases = compute_biases(compare_vars_rmses, SON)
DJF_biases = compute_biases(compare_vars_rmses, DJF)

rmses = map(
(index) -> Leaderboard.RMSEs(;
model_name = "CliMA",
ANN = ann_biases[index],
DJF = DJF_biases[index],
MAM = MAM_biases[index],
JJA = JJA_biases[index],
SON = SON_biases[index],
),
1:length(compare_vars_rmses),
)

Leaderboard.plot_leaderboard(rmses; output_path = joinpath(leaderboard_base_path, "bias_leaderboard.png"))
end
end

## plot extra atmosphere diagnostics if specified
Expand Down
1 change: 0 additions & 1 deletion experiments/ClimaEarth/user_io/leaderboard.jl

This file was deleted.

18 changes: 0 additions & 18 deletions experiments/ClimaEarth/user_io/leaderboard/Leaderboard.jl

This file was deleted.

Loading

0 comments on commit fbd9a14

Please sign in to comment.