From fbd9a14eb1437e66fe4eb20ab2538d4d95af3254 Mon Sep 17 00:00:00 2001
From: Kevin Phan <98072684+ph-kev@users.noreply.github.com>
Date: Thu, 3 Oct 2024 16:02:21 -0700
Subject: [PATCH] Refactor leaderboard code

This commit refactors the leaderboard code using the new features from
ClimaAnalysis. This commit also deletes the old leaderboard code and the
tests for it. Also, there is an off by one month issue when handling the
dates and seasons. This is fixed in this commit.

The commit also moves the code to leaderboard.jl and a line is added to
the pipeline.

One significant difference is the leaderboard which now plot the best
and worst single model using only annual rather than averaging the error
over annual and seasonal data.
---
 .buildkite/pipeline.yml                       |   4 +-
 experiments/ClimaEarth/Project.toml           |   2 +-
 experiments/ClimaEarth/leaderboard.jl         | 228 ++++++++++++
 experiments/ClimaEarth/run_amip.jl            | 108 ------
 experiments/ClimaEarth/user_io/leaderboard.jl |   1 -
 .../user_io/leaderboard/Leaderboard.jl        |  18 -
 .../user_io/leaderboard/cmip_rmse.jl          | 135 -------
 .../user_io/leaderboard/compare_with_obs.jl   | 329 ------------------
 .../user_io/leaderboard/data_sources.jl       | 127 -------
 .../ClimaEarth/user_io/leaderboard/utils.jl   | 240 -------------
 test/experiment_tests/leaderboard.jl          |  90 -----
 test/runtests.jl                              |   5 +-
 12 files changed, 233 insertions(+), 1054 deletions(-)
 create mode 100644 experiments/ClimaEarth/leaderboard.jl
 delete mode 100644 experiments/ClimaEarth/user_io/leaderboard.jl
 delete mode 100644 experiments/ClimaEarth/user_io/leaderboard/Leaderboard.jl
 delete mode 100644 experiments/ClimaEarth/user_io/leaderboard/cmip_rmse.jl
 delete mode 100644 experiments/ClimaEarth/user_io/leaderboard/compare_with_obs.jl
 delete mode 100644 experiments/ClimaEarth/user_io/leaderboard/data_sources.jl
 delete mode 100644 experiments/ClimaEarth/user_io/leaderboard/utils.jl
 delete mode 100644 test/experiment_tests/leaderboard.jl

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 4ebd9c31a6..abfffaed8c 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -474,7 +474,9 @@ steps:
 
       - label: "GPU AMIP target: topography and diagnostic EDMF"
         key: "gpu_amip_target_topo_diagedmf_shortrun"
-        command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_amip_target_topo_diagedmf_shortrun.yml --job_id gpu_amip_target_topo_diagedmf_shortrun"
+        command:
+        - "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_amip_target_topo_diagedmf_shortrun.yml --job_id gpu_amip_target_topo_diagedmf_shortrun"
+        - "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/leaderboard.jl experiments/ClimaEarth/output/amip/gpu_amip_target_topo_diagedmf_shortun/clima_atmos/output_active experiments/ClimaEarth/output/amip/gpu_amip_target_topo_diagedmf_shortrun_artifacts"
         artifact_paths: "experiments/ClimaEarth/output/amip/gpu_amip_target_topo_diagedmf_shortrun_artifacts/*"
         env:
           CLIMACOMMS_DEVICE: "CUDA"
diff --git a/experiments/ClimaEarth/Project.toml b/experiments/ClimaEarth/Project.toml
index 0d57a91d15..47448e800c 100644
--- a/experiments/ClimaEarth/Project.toml
+++ b/experiments/ClimaEarth/Project.toml
@@ -34,7 +34,7 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 [compat]
 ArgParse = "1.1"
 ArtifactWrappers = "0.2"
-ClimaAnalysis = "0.5.4"
+ClimaAnalysis = "0.5.10"
 ClimaAtmos = "0.27"
 ClimaCorePlots = "0.2"
 ClimaDiagnostics = "0.2"
diff --git a/experiments/ClimaEarth/leaderboard.jl b/experiments/ClimaEarth/leaderboard.jl
new file mode 100644
index 0000000000..41b0a319c1
--- /dev/null
+++ b/experiments/ClimaEarth/leaderboard.jl
@@ -0,0 +1,228 @@
+import ClimaAnalysis
+import ClimaUtilities.ClimaArtifacts: @clima_artifact
+import GeoMakie
+import CairoMakie
+import Dates
+
+@info "Error against observations"
+
+# Tuple of short names for loading simulation and observational data
+sim_obs_short_names_no_pr = [
+    ("rsdt", "solar_mon"),
+    ("rsut", "toa_sw_all_mon"),
+    ("rlut", "toa_lw_all_mon"),
+    ("rsutcs", "toa_sw_clr_t_mon"),
+    ("rlutcs", "toa_lw_clr_t_mon"),
+    ("rsds", "sfc_sw_down_all_mon"),
+    ("rsus", "sfc_sw_up_all_mon"),
+    ("rlds", "sfc_lw_down_all_mon"),
+    ("rlus", "sfc_lw_up_all_mon"),
+    ("rsdscs", "sfc_sw_down_clr_t_mon"),
+    ("rsuscs", "sfc_sw_up_clr_t_mon"),
+    ("rldscs", "sfc_lw_down_clr_t_mon"),
+]
+
+compare_vars_biases_plot_extrema = Dict(
+    "pr" => (-5.0, 5.0),
+    "rsdt" => (-2.0, 2.0),
+    "rsut" => (-50.0, 50.0),
+    "rlut" => (-50.0, 50.0),
+    "rsutcs" => (-20.0, 20.0),
+    "rlutcs" => (-20.0, 20.0),
+    "rsds" => (-50.0, 50.0),
+    "rsus" => (-10.0, 10.0),
+    "rlds" => (-50.0, 50.0),
+    "rlus" => (-50.0, 50.0),
+    "rsdscs" => (-10.0, 10.0),
+    "rsuscs" => (-10.0, 10.0),
+    "rldscs" => (-20.0, 20.0),
+)
+
+
+if length(ARGS) < 2
+    error("Usage: leaderboard.jl <path of folder containing NetCDF files>")
+end
+
+# Path to saved leaderboards
+leaderboard_base_path = ARGS[begin]
+
+# Path to simulation data
+diagnostics_folder_path = ARGS[begin + 1]
+
+# Dict for loading in simulation data
+sim_var_dict = Dict{String, Any}(
+    "pr" =>
+        () -> begin
+            sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = "pr")
+            sim_var =
+                ClimaAnalysis.convert_units(sim_var, "mm/day", conversion_function = x -> x .* Float32(-86400))
+            sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var)
+            return sim_var
+        end,
+)
+
+# Loop to load the rest of the simulation data
+for (short_name, _) in sim_obs_short_names_no_pr
+    sim_var_dict[short_name] =
+        () -> begin
+            sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = short_name)
+            sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var)
+            return sim_var
+        end
+end
+
+# Dict for loading observational data
+obs_var_dict = Dict{String, Any}(
+    "pr" =>
+        (start_date) -> begin
+            obs_var = ClimaAnalysis.OutputVar(
+                joinpath(@clima_artifact("precipitation_obs"), "gpcp.precip.mon.mean.197901-202305.nc"),
+                "precip",
+                new_start_date = start_date,
+                shift_by = Dates.firstdayofmonth,
+            )
+            return obs_var
+        end,
+)
+
+# Loop to load the rest of the observational data
+for (sim_name, obs_name) in sim_obs_short_names_no_pr
+    obs_var_dict[sim_name] =
+        (start_date) -> begin
+            obs_var = ClimaAnalysis.OutputVar(
+                joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
+                obs_name,
+                new_start_date = start_date,
+                shift_by = Dates.firstdayofmonth,
+            )
+            # Convert from W m-2 to W m^-2
+            ClimaAnalysis.units(obs_var) == "W m-2" ? obs_var = ClimaAnalysis.set_units(obs_var, "W m^-2") :
+            error("Did not expect $(ClimaAnalysis.units(obs_var)) for the units")
+            return obs_var
+        end
+end
+
+# Set up dict for storing simulation and observational data after processing
+sim_obs_comparsion_dict = Dict()
+seasons = ["ANN", "MAM", "JJA", "SON", "DJF"]
+
+# Print dates for debugging
+pr_var = sim_var_dict["pr"]() # it shouldn't matter what short name we use
+output_dates = Dates.DateTime(pr_var.attributes["start_date"]) .+ Dates.Second.(ClimaAnalysis.times(pr_var))
+@info "Working with dates:"
+@info output_dates
+
+for short_name in keys(sim_var_dict)
+    # Simulation data
+    sim_var = sim_var_dict[short_name]()
+
+    # Observational data
+    obs_var = obs_var_dict[short_name](sim_var.attributes["start_date"])
+
+    # Remove first spin_up_months from simulation
+    spin_up_months = 6
+    spinup_cutoff = spin_up_months * 31 * 86400.0
+    ClimaAnalysis.times(sim_var)[end] >= spinup_cutoff &&
+        (sim_var = ClimaAnalysis.window(sim_var, "time", left = spinup_cutoff))
+
+    obs_var = ClimaAnalysis.resampled_as(obs_var, sim_var)
+    obs_var_seasons = ClimaAnalysis.split_by_season(obs_var)
+    sim_var_seasons = ClimaAnalysis.split_by_season(sim_var)
+
+    # Add annual to start of seasons
+    obs_var_seasons = [obs_var, obs_var_seasons...]
+    sim_var_seasons = [sim_var, sim_var_seasons...]
+
+    # Take time average
+    obs_var_seasons = obs_var_seasons .|> ClimaAnalysis.average_time
+    sim_var_seasons = sim_var_seasons .|> ClimaAnalysis.average_time
+
+    # Add "mean " for plotting the title
+    for sim_var in sim_var_seasons
+        sim_var.attributes["short_name"] = "mean $(ClimaAnalysis.short_name(sim_var))"
+    end
+
+    # Save observation and simulation data
+    sim_obs_comparsion_dict[short_name] = Dict(
+        season => (sim_var_s, obs_var_s) for
+        (season, sim_var_s, obs_var_s) in zip(seasons, sim_var_seasons, obs_var_seasons)
+    )
+end
+
+compare_vars_biases_groups = [
+    ["pr", "rsdt", "rsut", "rlut"],
+    ["rsds", "rsus", "rlds", "rlus"],
+    ["rsutcs", "rlutcs", "rsdscs", "rsuscs", "rldscs"],
+]
+
+# Plot bias plots
+for season in seasons
+    for compare_vars_biases in compare_vars_biases_groups
+        fig_bias = CairoMakie.Figure(; size = (600, 300 * length(compare_vars_biases)))
+        for (loc, short_name) in enumerate(compare_vars_biases)
+            ClimaAnalysis.Visualize.plot_bias_on_globe!(
+                fig_bias,
+                sim_obs_comparsion_dict[short_name][season]...,
+                cmap_extrema = compare_vars_biases_plot_extrema[short_name],
+                p_loc = (loc, 1),
+            )
+        end
+        # Do if and else statement for naming files appropriately
+        if season != "ANN"
+            CairoMakie.save(joinpath(leaderboard_base_path, "bias_$(first(compare_vars_biases))_$season.png"), fig_bias)
+        else
+            CairoMakie.save(joinpath(leaderboard_base_path, "bias_$(first(compare_vars_biases))_total.png"), fig_bias)
+        end
+    end
+end
+
+# Plot leaderboard
+# Load data into RMSEVariables
+rmse_var_pr = ClimaAnalysis.read_rmses(
+    joinpath(@clima_artifact("cmip_model_rmse"), "pr_rmse_amip_pr_amip_5yr.csv"),
+    "pr",
+    units = "mm / day",
+)
+rmse_var_rsut = ClimaAnalysis.read_rmses(
+    joinpath(@clima_artifact("cmip_model_rmse"), "rsut_rmse_amip_rsut_amip_5yr.csv"),
+    "rsut",
+    units = "W m^-2",
+)
+rmse_var_rlut = ClimaAnalysis.read_rmses(
+    joinpath(@clima_artifact("cmip_model_rmse"), "rlut_rmse_amip_rlut_amip_5yr.csv"),
+    "rlut",
+    units = "W m^-2",
+)
+
+# Add models and units for CliMA
+rmse_var_pr = ClimaAnalysis.add_model(rmse_var_pr, "CliMA")
+ClimaAnalysis.add_unit!(rmse_var_pr, "CliMA", "mm / day")
+
+rmse_var_rsut = ClimaAnalysis.add_model(rmse_var_rsut, "CliMA")
+ClimaAnalysis.add_unit!(rmse_var_rsut, "CliMA", "W m^-2")
+
+rmse_var_rlut = ClimaAnalysis.add_model(rmse_var_rlut, "CliMA")
+ClimaAnalysis.add_unit!(rmse_var_rlut, "CliMA", "W m^-2")
+
+# Add RMSE for the CliMA model and for each season
+for season in seasons
+    rmse_var_pr["CliMA", season] = ClimaAnalysis.global_rmse(sim_obs_comparsion_dict["pr"][season]...)
+    rmse_var_rsut["CliMA", season] = ClimaAnalysis.global_rmse(sim_obs_comparsion_dict["rsut"][season]...)
+    rmse_var_rlut["CliMA", season] = ClimaAnalysis.global_rmse(sim_obs_comparsion_dict["rlut"][season]...)
+end
+
+# Plot box plots
+rmse_vars = (rmse_var_pr, rmse_var_rsut, rmse_var_rlut)
+fig_leaderboard = CairoMakie.Figure(; size = (800, 300 * 3 + 400), fontsize = 20)
+for (loc, rmse_var) in enumerate(rmse_vars)
+    ClimaAnalysis.Visualize.plot_boxplot!(
+        fig_leaderboard,
+        rmse_var,
+        ploc = (loc, 1),
+        best_and_worst_category_name = "ANN",
+    )
+end
+
+# Plot leaderboard
+ClimaAnalysis.Visualize.plot_leaderboard!(fig_leaderboard, rmse_vars..., best_category_name = "ANN", ploc = (4, 1))
+CairoMakie.save(joinpath(leaderboard_base_path, "bias_leaderboard.png"), fig_leaderboard)
diff --git a/experiments/ClimaEarth/run_amip.jl b/experiments/ClimaEarth/run_amip.jl
index 67b106ddc7..f3c2cba560 100644
--- a/experiments/ClimaEarth/run_amip.jl
+++ b/experiments/ClimaEarth/run_amip.jl
@@ -920,114 +920,6 @@ if ClimaComms.iamroot(comms_ctx)
             files_root = ".monthly",
             output_dir = dir_paths.artifacts,
         )
-
-        ## Compare against observations
-        if t_end > 84600 && config_dict["output_default_diagnostics"]
-            @info "Error against observations"
-            include("user_io/leaderboard.jl")
-            ClimaAnalysis = Leaderboard.ClimaAnalysis
-
-            compare_vars_biases_plot_extrema = Dict(
-                "pr" => (-5.0, 5.0),
-                "rsdt" => (-2.0, 2.0),
-                "rsut" => (-50.0, 50.0),
-                "rlut" => (-50.0, 50.0),
-                "rsutcs" => (-20.0, 20.0),
-                "rlutcs" => (-20.0, 20.0),
-                "rsds" => (-50.0, 50.0),
-                "rsus" => (-10.0, 10.0),
-                "rlds" => (-50.0, 50.0),
-                "rlus" => (-50.0, 50.0),
-                "rsdscs" => (-10.0, 10.0),
-                "rsuscs" => (-10.0, 10.0),
-                "rldscs" => (-20.0, 20.0),
-            )
-
-            diagnostics_folder_path = atmos_sim.integrator.p.output_dir
-            leaderboard_base_path = dir_paths.artifacts
-
-            compare_vars_biases_groups = [
-                ["pr", "rsdt", "rsut", "rlut"],
-                ["rsds", "rsus", "rlds", "rlus"],
-                ["rsutcs", "rlutcs", "rsdscs", "rsuscs", "rldscs"],
-            ]
-
-            function compute_biases(compare_vars_biases, dates)
-                if isempty(dates)
-                    return map(x -> 0.0, compare_vars_biases)
-                else
-                    return Leaderboard.compute_biases(
-                        diagnostics_folder_path,
-                        compare_vars_biases,
-                        dates,
-                        cmap_extrema = compare_vars_biases_plot_extrema,
-                    )
-                end
-            end
-
-            function plot_biases(dates, biases, output_name)
-                isempty(dates) && return nothing
-
-                output_path = joinpath(leaderboard_base_path, "bias_$(output_name).png")
-                Leaderboard.plot_biases(biases; output_path)
-            end
-
-            first_var = get(
-                ClimaAnalysis.SimDir(diagnostics_folder_path),
-                short_name = first(first(compare_vars_biases_groups)),
-            )
-
-            diagnostics_times = ClimaAnalysis.times(first_var)
-            # Remove the first `spinup_months` months from the leaderboard
-            spinup_months = 6
-            # The monthly average output is at the end of the month, so this is safe
-            spinup_cutoff = spinup_months * 31 * 86400.0
-            if diagnostics_times[end] > spinup_cutoff
-                filter!(x -> x > spinup_cutoff, diagnostics_times)
-            end
-
-            output_dates = Dates.DateTime(first_var.attributes["start_date"]) .+ Dates.Second.(diagnostics_times)
-            @info "Working with dates:"
-            @info output_dates
-            ## collect all days between cs.dates.date0 and cs.dates.date
-            MAM, JJA, SON, DJF = Leaderboard.split_by_season(output_dates)
-
-            for compare_vars_biases in compare_vars_biases_groups
-                ann_biases = compute_biases(compare_vars_biases, output_dates)
-                plot_biases(output_dates, ann_biases, first(compare_vars_biases) * "_total")
-
-                MAM_biases = compute_biases(compare_vars_biases, MAM)
-                plot_biases(MAM, MAM_biases, first(compare_vars_biases) * "_MAM")
-                JJA_biases = compute_biases(compare_vars_biases, JJA)
-                plot_biases(JJA, JJA_biases, first(compare_vars_biases) * "_JJA")
-                SON_biases = compute_biases(compare_vars_biases, SON)
-                plot_biases(SON, SON_biases, first(compare_vars_biases) * "_SON")
-                DJF_biases = compute_biases(compare_vars_biases, DJF)
-                plot_biases(DJF, DJF_biases, first(compare_vars_biases) * "_DJF")
-            end
-
-            compare_vars_rmses = ["pr", "rsut", "rlut"]
-
-            ann_biases = compute_biases(compare_vars_rmses, output_dates)
-            MAM_biases = compute_biases(compare_vars_rmses, MAM)
-            JJA_biases = compute_biases(compare_vars_rmses, JJA)
-            SON_biases = compute_biases(compare_vars_rmses, SON)
-            DJF_biases = compute_biases(compare_vars_rmses, DJF)
-
-            rmses = map(
-                (index) -> Leaderboard.RMSEs(;
-                    model_name = "CliMA",
-                    ANN = ann_biases[index],
-                    DJF = DJF_biases[index],
-                    MAM = MAM_biases[index],
-                    JJA = JJA_biases[index],
-                    SON = SON_biases[index],
-                ),
-                1:length(compare_vars_rmses),
-            )
-
-            Leaderboard.plot_leaderboard(rmses; output_path = joinpath(leaderboard_base_path, "bias_leaderboard.png"))
-        end
     end
 
     ## plot extra atmosphere diagnostics if specified
diff --git a/experiments/ClimaEarth/user_io/leaderboard.jl b/experiments/ClimaEarth/user_io/leaderboard.jl
deleted file mode 100644
index e3af41d01f..0000000000
--- a/experiments/ClimaEarth/user_io/leaderboard.jl
+++ /dev/null
@@ -1 +0,0 @@
-include("leaderboard/Leaderboard.jl")
diff --git a/experiments/ClimaEarth/user_io/leaderboard/Leaderboard.jl b/experiments/ClimaEarth/user_io/leaderboard/Leaderboard.jl
deleted file mode 100644
index 6dc73c9f3e..0000000000
--- a/experiments/ClimaEarth/user_io/leaderboard/Leaderboard.jl
+++ /dev/null
@@ -1,18 +0,0 @@
-module Leaderboard
-
-import ClimaComms
-import ClimaAnalysis
-import Dates
-import NCDatasets
-import Interpolations
-import CairoMakie
-import GeoMakie
-import ClimaCoupler
-import Statistics: mean
-import ClimaUtilities.ClimaArtifacts: @clima_artifact
-
-include("data_sources.jl")
-include("utils.jl")
-include("compare_with_obs.jl")
-
-end
diff --git a/experiments/ClimaEarth/user_io/leaderboard/cmip_rmse.jl b/experiments/ClimaEarth/user_io/leaderboard/cmip_rmse.jl
deleted file mode 100644
index 6f107bbf68..0000000000
--- a/experiments/ClimaEarth/user_io/leaderboard/cmip_rmse.jl
+++ /dev/null
@@ -1,135 +0,0 @@
-import Statistics: median, quantile
-
-const RMSE_FILE_PATHS = Dict()
-
-RMSE_FILE_PATHS["pr"] = joinpath(@clima_artifact("cmip_model_rmse"), "pr_rmse_amip_pr_amip_5yr.csv")
-RMSE_FILE_PATHS["rsut"] = joinpath(@clima_artifact("cmip_model_rmse"), "rsut_rmse_amip_rsut_amip_5yr.csv")
-RMSE_FILE_PATHS["rlut"] = joinpath(@clima_artifact("cmip_model_rmse"), "rlut_rmse_amip_rlut_amip_5yr.csv")
-
-short_names = ["pr", "rsut", "rlut"]
-for short_name in short_names
-    open(RMSE_FILE_PATHS[short_name], "r") do io
-        # Skip the header
-        header = readline(io)
-
-        # Process each line
-        for line in eachline(io)
-            # Split the line by comma
-            fields = split(line, ',')
-            model_name = fields[1]
-            DJF, MAM, JJA, SON, ANN = map(x -> parse(Float64, x), fields[2:end])
-
-            push!(OTHER_MODELS_RMSEs[short_name], RMSEs(; model_name, DJF, MAM, JJA, SON, ANN))
-        end
-    end
-end
-
-"""
-    best_single_model(RMSEs)
-
-Return the one model that has the overall smallest error.
-"""
-function best_single_model(RMSEs)
-    _, index = findmin(r -> abs.(values(r)), RMSEs)
-    return RMSEs[index]
-end
-
-"""
-    worst_single_model(RMSEs)
-
-Return the one model that has the overall largest error.
-"""
-function worst_single_model(RMSEs)
-    _, index = findmax(r -> abs.(values(r)), RMSEs)
-    return RMSEs[index]
-end
-
-"""
-    RMSE_stats(RMSEs)
-
-RMSEs is the dictionary OTHER_MODELS_RMSEs.
-
-Return:
-- best single model
-- worst single model
-- "model" with all the medians
-- "model" with all the best values
-- "model" with all the worst values
-"""
-function RMSE_stats(dict_vecRMSEs)
-    stats = Dict()
-    # cumulative_error maps model_names with the total RMSE across metrics normalized by median(RMSE)
-    cumulative_error = Dict()
-    for (key, vecRMSEs) in dict_vecRMSEs
-        # Collect into vectors that we can process independently
-        all_values = stack(values.(vecRMSEs))
-        ANN, DJF, JJA, MAM, SON = ntuple(i -> all_values[i, :], 5)
-
-        median_model = RMSEs(;
-            model_name = "Median",
-            ANN = median(ANN),
-            DJF = median(DJF),
-            JJA = median(JJA),
-            MAM = median(MAM),
-            SON = median(SON),
-        )
-
-        worst_model = RMSEs(;
-            model_name = "Worst",
-            ANN = maximum(abs.(ANN)),
-            DJF = maximum(abs.(DJF)),
-            JJA = maximum(abs.(JJA)),
-            MAM = maximum(abs.(MAM)),
-            SON = maximum(abs.(SON)),
-        )
-
-        best_model = RMSEs(;
-            model_name = "Best",
-            ANN = minimum(abs.(ANN)),
-            DJF = minimum(abs.(DJF)),
-            JJA = minimum(abs.(JJA)),
-            MAM = minimum(abs.(MAM)),
-            SON = minimum(abs.(SON)),
-        )
-
-        quantile25 = RMSEs(;
-            model_name = "Quantile 0.25",
-            ANN = quantile(ANN, 0.25),
-            DJF = quantile(DJF, 0.25),
-            JJA = quantile(JJA, 0.25),
-            MAM = quantile(MAM, 0.25),
-            SON = quantile(SON, 0.25),
-        )
-
-        quantile75 = RMSEs(;
-            model_name = "Quantile 0.75",
-            ANN = quantile(ANN, 0.75),
-            DJF = quantile(DJF, 0.75),
-            JJA = quantile(JJA, 0.75),
-            MAM = quantile(MAM, 0.75),
-            SON = quantile(SON, 0.75),
-        )
-
-        for rmse in vecRMSEs
-            haskey(cumulative_error, cumulative_error) || (cumulative_error[rmse.model_name] = 0.0)
-            cumulative_error[rmse.model_name] += sum(values(rmse) ./ values(median_model))
-        end
-
-        stats[key] = (;
-            best_single_model = best_single_model(vecRMSEs),
-            worst_single_model = worst_single_model(vecRMSEs),
-            median_model,
-            worst_model,
-            best_model,
-            quantile25,
-            quantile75,
-        )
-    end
-
-    _, absolute_best_model = findmin(cumulative_error)
-    _, absolute_worst_model = findmax(cumulative_error)
-
-    return (; stats, absolute_best_model, absolute_worst_model)
-end
-
-const COMPARISON_RMSEs_STATS = RMSE_stats(OTHER_MODELS_RMSEs)
diff --git a/experiments/ClimaEarth/user_io/leaderboard/compare_with_obs.jl b/experiments/ClimaEarth/user_io/leaderboard/compare_with_obs.jl
deleted file mode 100644
index f93c4ba68d..0000000000
--- a/experiments/ClimaEarth/user_io/leaderboard/compare_with_obs.jl
+++ /dev/null
@@ -1,329 +0,0 @@
-import CairoMakie
-
-const OBS_DS = Dict()
-const SIM_DS_KWARGS = Dict()
-const OTHER_MODELS_RMSEs = Dict()
-
-function preprocess_pr_fn(data)
-    # -1 kg/m/s2 -> 1 mm/day
-    return data .* Float32(-86400)
-end
-
-function replace_nan(x; v = 0.0)
-    return map(x -> isnan(x) ? zero(x) : x, v)
-end
-
-Base.@kwdef struct RMSEs
-    model_name::String
-    ANN::Union{<:Real, ClimaAnalysis.OutputVar} = 0.0
-    DJF::Union{<:Real, ClimaAnalysis.OutputVar} = 0.0
-    MAM::Union{<:Real, ClimaAnalysis.OutputVar} = 0.0
-    JJA::Union{<:Real, ClimaAnalysis.OutputVar} = 0.0
-    SON::Union{<:Real, ClimaAnalysis.OutputVar} = 0.0
-end
-
-function Base.values(r::RMSEs)
-    val_or_rmse(v::Real) = v
-    val_or_rmse(v::ClimaAnalysis.OutputVar) = v.attributes["rmse"]
-
-    return val_or_rmse.([r.ANN, r.DJF, r.MAM, r.JJA, r.SON])
-end
-
-OBS_DS["pr"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("precipitation_obs"), "gpcp.precip.mon.mean.197901-202305.nc"),
-    var_name = "precip",
-)
-SIM_DS_KWARGS["pr"] = (; preprocess_data_fn = preprocess_pr_fn, new_units = "mm / day")
-OTHER_MODELS_RMSEs["pr"] = []
-
-OBS_DS["rsut"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
-    var_name = "toa_sw_all_mon",
-)
-SIM_DS_KWARGS["rsut"] = (;)
-OTHER_MODELS_RMSEs["rsut"] = []
-
-OBS_DS["rlut"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
-    var_name = "toa_lw_all_mon",
-)
-SIM_DS_KWARGS["rlut"] = (;)
-OTHER_MODELS_RMSEs["rlut"] = []
-
-OBS_DS["rsdt"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
-    var_name = "solar_mon",
-)
-SIM_DS_KWARGS["rsdt"] = (;)
-
-OBS_DS["rsutcs"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
-    var_name = "toa_sw_clr_t_mon",
-)
-SIM_DS_KWARGS["rsutcs"] = (;)
-
-OBS_DS["rlutcs"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
-    var_name = "toa_lw_clr_t_mon",
-)
-SIM_DS_KWARGS["rlutcs"] = (;)
-
-OBS_DS["rsus"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
-    var_name = "sfc_sw_up_all_mon",
-)
-SIM_DS_KWARGS["rsus"] = (;)
-
-OBS_DS["rsds"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
-    var_name = "sfc_sw_down_all_mon",
-)
-SIM_DS_KWARGS["rsds"] = (;)
-
-OBS_DS["rlus"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
-    var_name = "sfc_lw_up_all_mon",
-)
-SIM_DS_KWARGS["rlus"] = (;)
-
-OBS_DS["rlds"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
-    var_name = "sfc_lw_down_all_mon",
-)
-SIM_DS_KWARGS["rlds"] = (;)
-
-OBS_DS["rsdscs"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
-    var_name = "sfc_sw_down_clr_t_mon",
-)
-SIM_DS_KWARGS["rsdscs"] = (;)
-
-OBS_DS["rsuscs"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
-    var_name = "sfc_sw_up_clr_t_mon",
-)
-SIM_DS_KWARGS["rsuscs"] = (;)
-
-OBS_DS["rldscs"] = ObsDataSource(;
-    path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
-    var_name = "sfc_lw_down_clr_t_mon",
-)
-SIM_DS_KWARGS["rldscs"] = (;)
-
-include("cmip_rmse.jl")
-
-function bias(output_dir::AbstractString, short_name::AbstractString, target_dates::AbstractArray{<:Dates.DateTime})
-    obs = OBS_DS[short_name]
-    sim = SimDataSource(; path = output_dir, short_name, SIM_DS_KWARGS[short_name]...)
-    return bias(obs, sim, target_dates)
-end
-
-function compute_biases(output_dir, short_names, target_dates::AbstractArray{<:Dates.DateTime}; cmap_extrema = Dict())
-    return map(short_names) do name
-        bias_outvar = bias(output_dir, name, target_dates)
-        # The attribute is used in plot_bias to fix the colormap
-        haskey(cmap_extrema, name) && (bias_outvar.attributes["cmap_extrema"] = cmap_extrema[name])
-        return bias_outvar
-    end
-end
-
-
-# colors are sampled in [0,1], so define a function that linearly transforms x ∈ [lo, hi] to [0, 1].
-to_unitrange(x::Number, lo::Number, hi::Number) = (x - lo) / (hi - lo)
-
-"""
-    constrained_cmap(cols, lo, hi; [categorical=false], [rev=false], [mid=0])
-    constrained_cmap(lo, hi; [categorical=false], [rev=false], [mid=0])
-
-Constrain a colormap to a given range.
-
-Given a colormap implicitly defined in `± maximum(abs, (lo, hi))`, constrain it to the range [lo, hi].
-This is useful to ensure that a colormap which is desired to diverge symmetrically around zero maps
-the same color intensity to the same magnitude.
-
-The second form is a convenience function that uses the `:redsblues` colormap.
-
-# Arguments
-- `cols`: a vector of colors, or a ColorScheme
-- `lo`: lower bound of the range
-- `hi`: upper bound of the range
-
-# Keyword Arguments
-- `mid`: midpoint of the range  # TODO: test `mid` better
-- `categorical`: flag for whether returned colormap should be categorical or continous
-- `rev`: flag for whether to reverse the colormap
-
-# Returns
-- `cmap::Makie.ColorGradient`: a colormap
-"""
-function constrained_cmap(cols::Vector, lo, hi; mid = 0, categorical = false, rev = false)
-    constrained_cmap(CairoMakie.Makie.ColorScheme(cols), lo, hi; mid, categorical, rev)
-end
-function constrained_cmap(cols::CairoMakie.Makie.ColorScheme, lo, hi; mid = 0, categorical = false, rev = false)
-    rev && (cols = reverse(cols))  # reverse colorscheme if requested, don't reverse below in `cgrad`.
-    absmax = maximum(abs, (lo, hi) .- mid)
-    # map lo, hi ∈ [-absmax, absmax] onto [0,1] to sample their corresponding colors
-    lo_m, hi_m = to_unitrange.((lo, hi) .- mid, -absmax, absmax)
-    # values on [0,1] where each color in cols is defined
-    colsvals = range(0, 1; length = length(cols))
-    # filter colsvals, keep only values in [lo_m, hi_m] + the endpoints lo_m and hi_m.
-    filter_colsvals = filter(x -> lo_m <= x <= hi_m, unique([lo_m; colsvals; hi_m]))
-    # select colors in filtered range; interpolate new low and hi colors.
-    newcols = CairoMakie.Makie.get(cols, filter_colsvals)
-    # values on [0,1] where the new colors are defined
-    new_colsvals = to_unitrange.(filter_colsvals, lo_m, hi_m)
-    cmap = CairoMakie.cgrad(newcols, new_colsvals; categorical, rev = false)
-    return cmap
-end
-
-function plot_biases(biases; output_path)
-    fig = CairoMakie.Figure(; size = (600, 300 * length(biases)))
-    loc = 1
-
-    for bias_var in biases
-        min_level, max_level = get(bias_var.attributes, "cmap_extrema", extrema(bias_var.data))
-
-        # Make sure that 0 is at the center
-        cmap = constrained_cmap(CairoMakie.cgrad(:vik).colors, min_level, max_level; categorical = true)
-        nlevels = 11
-        # Offset so that it covers 0
-        levels = collect(range(min_level, max_level, length = nlevels))
-        offset = levels[argmin(abs.(levels))]
-        levels = levels .- offset
-        ticklabels = map(x -> string(round(x; digits = 0)), levels)
-        ticks = (levels, ticklabels)
-
-        more_kwargs = Dict(
-            :plot => Dict(:colormap => cmap, :levels => levels, :extendhigh => :auto, :extendlow => :auto),
-            :cb => Dict(:ticks => ticks),
-        )
-
-        ClimaAnalysis.Visualize.contour2D_on_globe!(fig, bias_var; p_loc = (loc, 1), more_kwargs)
-        loc = loc + 1
-    end
-    CairoMakie.save(output_path, fig)
-end
-
-function plot_leaderboard(rmses; output_path)
-    fig = CairoMakie.Figure(; size = (800, 300 * length(rmses) + 400), fontsize = 20)
-    loc = 1
-
-    NUM_BOXES = 4 + 1 # 4 seasons and 1 annual
-    NUM_MODELS = 2  # CliMA vs best
-
-    num_variables = length(rmses)
-
-    var_names = map(r -> r.ANN.attributes["var_short_name"], rmses)
-
-    # The square plot is at the very bottom
-    loc_squares = length(rmses) + 1
-    ax_squares = CairoMakie.Axis(
-        fig[loc_squares, 1],
-        yticks = (1:num_variables, reverse(var_names)),
-        xticks = ([3, NUM_BOXES + 3], ["CliMA", "Best model"]),
-        aspect = NUM_BOXES * NUM_MODELS,
-    )
-    ax_squares2 = CairoMakie.Axis(
-        fig[loc_squares, 1],
-        xaxisposition = :top,
-        xticks = (0.5:4.5, ["Ann", "DJF", "MAM", "JJA", "SON"]),
-        aspect = NUM_BOXES * NUM_MODELS,
-    )
-    CairoMakie.hidespines!(ax_squares2)
-    CairoMakie.hideydecorations!(ax_squares2)
-
-    # Preallocate the matrix for the squares, each row is (4 seasons + annual) x
-    # models compared, and there is one row per variable
-    squares = zeros(NUM_BOXES * NUM_MODELS, num_variables)
-
-    (; absolute_best_model, absolute_worst_model) = COMPARISON_RMSEs_STATS
-
-    for (var_num, rmse) in enumerate(rmses)
-        short_name = rmse.ANN.attributes["var_short_name"]
-        units = rmse.ANN.attributes["units"]
-        ax = CairoMakie.Axis(
-            fig[loc, 1],
-            ylabel = "$short_name [$units]",
-            xticks = (1:5, ["Ann", "DJF", "MAM", "JJA", "SON"]),
-            title = "Global RMSE $short_name [$units]",
-        )
-
-        # Against other models
-
-        (; median_model) = COMPARISON_RMSEs_STATS.stats[short_name]
-
-        best_single_model = first(filter(x -> x.model_name == absolute_best_model, OTHER_MODELS_RMSEs[short_name]))
-        worst_single_model = first(filter(x -> x.model_name == absolute_worst_model, OTHER_MODELS_RMSEs[short_name]))
-
-        squares[begin:NUM_BOXES, end - var_num + 1] .= values(rmse) ./ values(median_model)
-        squares[(NUM_BOXES + 1):end, end - var_num + 1] .= values(best_single_model) ./ values(median_model)
-
-        CairoMakie.scatter!(
-            ax,
-            1:5,
-            values(median_model),
-            label = median_model.model_name,
-            color = :black,
-            marker = :hline,
-            markersize = 10,
-            visible = false,
-        )
-
-        categories = vcat(map(_ -> collect(1:5), 1:length(OTHER_MODELS_RMSEs[short_name]))...)
-
-        CairoMakie.boxplot!(
-            ax,
-            categories,
-            vcat(values.(OTHER_MODELS_RMSEs[short_name])...);
-            whiskerwidth = 1,
-            width = 0.35,
-            mediancolor = :black,
-            color = :gray,
-            whiskerlinewidth = 1,
-        )
-
-        CairoMakie.scatter!(ax, 1:5, values(best_single_model), label = absolute_best_model)
-        CairoMakie.scatter!(ax, 1:5, values(worst_single_model), label = absolute_worst_model)
-
-        # If we want to plot other models
-        # for model in OTHER_MODELS_RMSEs[short_name]
-        #     CairoMakie.scatter!(ax, 1:5, values(model), marker = :hline)
-        # end
-
-        CairoMakie.scatter!(
-            ax,
-            1:5,
-            values(rmse),
-            label = rmse.model_name,
-            marker = :star5,
-            markersize = 20,
-            color = :green,
-        )
-
-        # Add a fake extra point to center the legend a little better
-        CairoMakie.scatter!(ax, [6.5], [0.1], markersize = 0.01)
-        CairoMakie.axislegend()
-        loc = loc + 1
-    end
-
-    colormap = CairoMakie.Reverse(:RdYlGn)
-
-    # Now, the square plot
-    CairoMakie.heatmap!(
-        ax_squares,
-        squares,
-        colormap = colormap,
-        # Trick to exclude the zeros
-        lowclip = :white,
-        colorrange = (1e-10, maximum(squares)),
-    )
-    CairoMakie.vlines!(ax_squares2, NUM_BOXES, color = :black, linewidth = 3.0)
-    CairoMakie.Colorbar(
-        fig[loc_squares, 2],
-        limits = extrema(squares),
-        label = "RMSE/median(RMSE)",
-        colormap = colormap,
-    )
-
-    CairoMakie.save(output_path, fig)
-end
diff --git a/experiments/ClimaEarth/user_io/leaderboard/data_sources.jl b/experiments/ClimaEarth/user_io/leaderboard/data_sources.jl
deleted file mode 100644
index ce126a6df9..0000000000
--- a/experiments/ClimaEarth/user_io/leaderboard/data_sources.jl
+++ /dev/null
@@ -1,127 +0,0 @@
-"""
-    A struct to describe some observation data that we want to compare against.
-"""
-struct ObsDataSource
-
-    # NOTE: This struct is not concretely typed, but we don't care about performance here.
-    # We only care about beauty.
-
-    """Path of the NetCDF file"""
-    path::AbstractString
-
-    """Name of the variable of interest in the NetCDF file"""
-    var_name::AbstractString
-
-    """Name of the time dimension in the NetCDF file"""
-    time_name::AbstractString
-    """Name of the longitude dimension in the NetCDF file"""
-    lon_name::AbstractString
-    """Name of the latitude dimension in the NetCDF file"""
-    lat_name::AbstractString
-
-    """Function that has to be applied to the data to convert it to different units"""
-    preprocess_data_fn::Function
-
-    """The NCDataset associated to the file"""
-    ncdataset::NCDatasets.NCDataset
-
-    """Shift dates so that they are at the end of month?"""
-    shift_to_end_of_month::Bool
-end
-
-function ObsDataSource(;
-    path,
-    var_name,
-    time_name = "time",
-    lon_name = "lon",
-    lat_name = "lat",
-    preprocess_data_fn = identity,
-    shift_to_end_of_month = true,
-)
-
-    ncdataset = NCDatasets.NCDataset(path)
-
-    return ObsDataSource(
-        path,
-        var_name,
-        time_name,
-        lon_name,
-        lat_name,
-        preprocess_data_fn,
-        ncdataset,
-        shift_to_end_of_month,
-    )
-end
-
-"""
-    Base.close(ds::ObsDataSource)
-
-Close the file associated to `ds`.
-"""
-function Base.close(ds::ObsDataSource)
-    NCDatasets.close(ds.ncdataset)
-end
-
-"""
-    A struct to describe some simulation data.
-"""
-struct SimDataSource
-
-    # This struct is not concretely typed, but we don't care about performance here. We only
-    # care about beauty.
-
-    """Path of the simulation output"""
-    path::AbstractString
-
-    """Short name of the variable of interest"""
-    short_name::AbstractString
-
-    """Reduction to consider"""
-    reduction::AbstractString
-
-    """Period of the reduction (e.g., 1d, 30d)"""
-    period::AbstractString
-
-    """ClimaAnalysis OutputVar"""
-    var::ClimaAnalysis.OutputVar
-
-    """Simulation longitudes and latitudes"""
-    lonlat::Tuple{AbstractArray, AbstractArray}
-
-    """Function that has to be applied to the data to convert it to different units"""
-    preprocess_data_fn::Function
-
-    # TODO: This should be handled by ClimaAnalysis
-    """preprocess_data_fn is typically used to change units, so we have to tell ClimaAnalysis what the new
-    units are."""
-    new_units::Union{Nothing, AbstractString}
-end
-
-function SimDataSource(;
-    path,
-    short_name,
-    reduction = "average",
-    period = "10d",
-    preprocess_data_fn = identity,
-    new_units = nothing,
-)
-
-    sim = ClimaAnalysis.SimDir(path)
-    # TODO: Add period, for the time-being, we just pick up what's there
-    var = get(sim; short_name, reduction)
-
-    lonlat = (var.dims["lon"], var.dims["lat"])
-
-    return SimDataSource(path, short_name, reduction, period, var, lonlat, preprocess_data_fn, new_units)
-end
-
-"""
-    data_at_date(sim_ds::SimDataSource, date::Dates.DateTime)
-
-Return the simulation data at the given date.
-"""
-function data_at_date(sim_ds::SimDataSource, date::Dates.DateTime)
-    start_date = Dates.DateTime(sim_ds.var.attributes["start_date"])
-    time_diff_seconds = (date - start_date) / Dates.Second(1)
-    return sim_ds.preprocess_data_fn(ClimaAnalysis.slice(sim_ds.var, time = time_diff_seconds).data)
-end
diff --git a/experiments/ClimaEarth/user_io/leaderboard/utils.jl b/experiments/ClimaEarth/user_io/leaderboard/utils.jl
deleted file mode 100644
index bf6472f75f..0000000000
--- a/experiments/ClimaEarth/user_io/leaderboard/utils.jl
+++ /dev/null
@@ -1,240 +0,0 @@
-import OrderedCollections: OrderedDict
-
-"""
-    isequispaced(arr::Vector)
-
-Return whether the array is equispaced or not.
-"""
-function isequispaced(arr::Vector)
-    return all(diff(arr) .≈ arr[begin + 1] - arr[begin])
-end
-
-"""
-    resample(
-        data::AbstractArray,
-        src_lonlat::Tuple{<:AbstractArray, <:AbstractArray},
-        dest_lonlat::Tuple{<:AbstractArray, <:AbstractArray},
-        )
-
-Resample 2D `data` from `src_lonlat` to `dest_lonlat`.
-
-Note, "flat" boundary conditions are imposed for the outer edges. This should make sense for
-most data sources (that are defined all over the globe but do not necessarily have points at
-the poles). Be sure to check that it makes sense for your data.
-
-The resampling performed here is a 0th-order constant resampling.
-"""
-
-function resample(
-    data::AbstractArray,
-    src_lonlat::Tuple{<:AbstractArray, <:AbstractArray},
-    dest_lonlat::Tuple{<:AbstractArray, <:AbstractArray},
-)
-
-    # Interpolations.jl wants ranges, so we have to make ranges out of the given src_lonlat
-    #
-    # NOTE: We are assuming that lonlat are equispaced. Check this!
-    vec_to_range(v) = range(v[begin], v[end], length = length(v))
-    src_lonlat_ranges = vec_to_range.(src_lonlat)
-
-    itp = Interpolations.linear_interpolation(
-        src_lonlat_ranges,
-        data,
-        extrapolation_bc = (Interpolations.Periodic(), Interpolations.Flat()),
-    )
-
-    dest_lon, dest_lat = dest_lonlat
-
-    return [itp(lon, lat) for lon in dest_lon, lat in dest_lat]
-end
-
-"""
-    integration_weights(lonlat::Tuple{<:AbstractArray, <:AbstractArray})
-
-Compute the integration weights for a first-order spherical integration.
-"""
-function integration_weights(lonlat::Tuple{<:AbstractArray, <:AbstractArray})
-    lon, lat = lonlat
-
-    # We are also assuming that they are in degrees
-    abs(maximum(lon)) <= π && error("longitude should be in degrees")
-    abs(maximum(lat)) <= π / 2 && error("latitude should be in degrees")
-
-    isequispaced(lon) || error("Longitude is not equispaced")
-    isequispaced(lat) || error("Latitude is not equispaced")
-
-    dlon_rad = deg2rad(abs(lon[begin + 1] - lon[begin]))
-    dlat_rad = deg2rad(abs(lat[begin + 1] - lat[begin]))
-
-    return [cosd(lat1) * dlon_rad * dlat_rad for _ in lon, lat1 in lat]
-end
-
-"""
-    integrate_on_sphere(
-        data::AbstractArray,
-        lonlat::Tuple{<:AbstractArray, <:AbstractArray},
-        )
-
-Integrate `data` on a sphere with a first-order scheme. `data` has to be discretized on
-`lonlat`.
-"""
-function integrate_on_sphere(data::AbstractArray, lonlat::Tuple{<:AbstractArray, <:AbstractArray})
-    lon, lat = lonlat
-    size_data = size(data)
-    exp_size = (length(lon), length(lat))
-    size_data == exp_size || error("Inconsistent dimensions $size_data != $exp_size")
-    return sum(data .* integration_weights(lonlat)) ./ sum(integration_weights(lonlat))
-end
-
-"""
-    mse(
-        data1::AbstractArray,
-        data2::AbstractArray,
-        lonlat::Tuple{<:AbstractArray, <:AbstractArray},
-        )
-
-Compute the 2D map of the mean-square error.
-"""
-function mse(data1::AbstractArray, data2::AbstractArray, lonlat::Tuple{<:AbstractArray, <:AbstractArray})
-    lon, lat = lonlat
-    size_data1 = size(data1)
-    size_data2 = size(data2)
-    exp_size = (length(lon), length(lat))
-    size_data1 == exp_size || error("Inconsistent dimensions $size_data1 != $exp_size")
-    size_data1 == size_data2 || error("Inconsistent dimensions $size_data1 != $size_data2")
-
-    return (data1 .- data2) .^ 2
-end
-
-"""
-    bias(
-        sim::AbstractArray,
-        obs::AbstractArray,
-        lonlat::Tuple{<:AbstractArray, <:AbstractArray},
-        )
-
-Compute the 2D map of the bias (simulated - observed).
-"""
-function bias(sim::AbstractArray, obs::AbstractArray, lonlat::Tuple{<:AbstractArray, <:AbstractArray})
-    lon, lat = lonlat
-    size_sim = size(sim)
-    size_obs = size(obs)
-    exp_size = (length(lon), length(lat))
-    size_sim == exp_size || error("Inconsistent dimensions $size_sim != $exp_size")
-    size_sim == size_obs || error("Inconsistent dimensions $size_obs != $size_data2")
-
-    return sim .- obs
-end
-
-"""
-    bias(
-        obs_ds::ObsDataSource,
-        sim_ds::SimDataSource,
-        target_dates::AbstractArray{<: Dates.DateTime},
-        )
-
-Compute the 2D map of the bias (simulated - observed) for data averaged over the given dates.
-
-The return value is a `ClimaAnalysis.OutputVar` with the computed `rmse` and `bias` among
-its attributes.
-"""
-function bias(obs_ds::ObsDataSource, sim_ds::SimDataSource, target_dates::AbstractArray{<:Dates.DateTime})
-    lonlat = sim_ds.lonlat
-    simulated_data = map(d -> data_at_date(sim_ds, d), target_dates) |> mean
-    observational_data = map(d -> find_and_resample(obs_ds, d, lonlat), target_dates) |> mean
-
-    bias_arr = bias(simulated_data, observational_data, lonlat)
-    mse_arr = mse(simulated_data, observational_data, lonlat)
-
-    short_name = ClimaAnalysis.short_name(sim_ds.var)
-
-    bias_dims = OrderedDict("lon" => lonlat[1], "lat" => lonlat[2])
-    bias_dim_attribs = Dict{String, Any}()
-
-    rmse = round(sqrt(integrate_on_sphere(mse_arr, lonlat)); sigdigits = 3)
-    global_bias = round(integrate_on_sphere(bias_arr, lonlat); sigdigits = 3)
-
-    units = isnothing(sim_ds.new_units) ? sim_ds.var.attributes["units"] : sim_ds.new_units
-
-    bias_attribs = Dict{String, Any}(
-        "short_name" => "sim-obs_$short_name",
-        "var_short_name" => "$short_name",
-        "long_name" => "SIM - OBS mean $short_name\n(RMSE: $rmse $units, Global bias: $global_bias $units)",
-        "rmse" => rmse,
-        "bias" => global_bias,
-        "units" => units,
-    )
-
-    return ClimaAnalysis.OutputVar(bias_attribs, bias_dims, bias_dim_attribs, bias_arr)
-end
-
-"""
-    integrate_on_sphere(var::ClimaAnalysis.OutputVar)
-
-Integrate the given `var` onto the sphere with a first order integration scheme.
-"""
-function integrate_on_sphere(var::ClimaAnalysis.OutputVar)
-    lonlat = (var.dims["lon"], var.dims["lat"])
-    return integrate_on_sphere(var.data, lonlat)
-end
-
-"""
-    find_and_resample(
-        observed_data::ObsDataSource,
-        date::Dates.DateTime,
-        dest_lonlat::Tuple{<:AbstractArray, <:AbstractArray},
-        )
-
-Find the data corresponding to the given `date` in `observed_data` and resample it to
-`dest_lonlat`.
-"""
-function find_and_resample(
-    observed_data::ObsDataSource,
-    date::Dates.DateTime,
-    dest_lonlat::Tuple{<:AbstractArray, <:AbstractArray},
-)
-    obs = observed_data
-
-    available_times = obs.ncdataset[observed_data.time_name]
-
-    if observed_data.shift_to_end_of_month
-        available_times = Dates.DateTime.(Dates.lastdayofmonth.(available_times))
-    end
-
-    time_index = ClimaAnalysis.Utils.nearest_index(available_times, date)
-
-    # NOTE: We are hardcoding that the time index is the last one and that there are three
-    # dimensions! We should generalize this (the NetCDF file contains all the information to
-    # deduce this).
-
-    data_arr = obs.preprocess_data_fn(obs.ncdataset[obs.var_name][:, :, time_index])
-
-    lon_arr = obs.ncdataset[obs.lon_name][:]
-    lat_arr = obs.ncdataset[obs.lat_name][:]
-
-    return resample(data_arr, (lon_arr, lat_arr), dest_lonlat)
-end
-
-"""
-    split_by_season(dates::AbstractArray{<: Dates.DateTime})
-
-Take an array of dates and return 4 split into seasons.
-"""
-function split_by_season(dates::AbstractArray{<:Dates.DateTime})
-    MAM, JJA, SON, DJF =
-        Vector{Dates.DateTime}(), Vector{Dates.DateTime}(), Vector{Dates.DateTime}(), Vector{Dates.DateTime}()
-
-    for date in dates
-        if Dates.Month(3) <= Dates.Month(date) <= Dates.Month(5)
-            push!(MAM, date)
-        elseif Dates.Month(6) <= Dates.Month(date) <= Dates.Month(8)
-            push!(JJA, date)
-        elseif Dates.Month(9) <= Dates.Month(date) <= Dates.Month(11)
-            push!(SON, date)
-        else
-            push!(DJF, date)
-        end
-    end
-
-    return (MAM, JJA, SON, DJF)
-end
diff --git a/test/experiment_tests/leaderboard.jl b/test/experiment_tests/leaderboard.jl
deleted file mode 100644
index 03e55571fd..0000000000
--- a/test/experiment_tests/leaderboard.jl
+++ /dev/null
@@ -1,90 +0,0 @@
-import Test: @test, @testset, @test_throws
-import ClimaComms
-@static pkgversion(ClimaComms) >= v"0.6" && ClimaComms.@import_required_backends
-import ClimaCoupler
-import ClimaAnalysis
-import ClimaUtilities.ClimaArtifacts: @clima_artifact
-import Dates
-
-# Load file to test
-include("../../experiments/ClimaEarth/user_io/leaderboard.jl")
-# Data
-include(joinpath(pkgdir(ClimaCoupler), "artifacts", "artifact_funcs.jl"))
-
-@testset "Leaderboard utils" begin
-    @test Leaderboard.isequispaced([1, 2, 3])
-    @test !Leaderboard.isequispaced([1, 2, 4])
-
-    input_matrix = reshape(1.0:16, (4, 4))
-
-    @test Leaderboard.resample(input_matrix, (1.0:4, 1.0:4), ([2.8], [3.7]))[1] == 13.6
-
-    @test_throws ErrorException Leaderboard.integration_weights(([1.0], [10.0]))
-    @test_throws ErrorException Leaderboard.integration_weights(([10.0], [1.0]))
-    @test_throws ErrorException Leaderboard.integration_weights(([10.0, 11.0, 13.0], [10.0]))
-    @test_throws ErrorException Leaderboard.integration_weights(([10.0, 20.0], [10.0, 11.0, 13.0]))
-
-    @test Leaderboard.integration_weights(([10.0, 20.0], [20.0, 35.0]))[1] ≈ deg2rad(10.0) * deg2rad(15.0) * cosd(20.0)
-
-    @test Leaderboard.integrate_on_sphere(ones(361, 181), (collect(-180.0:1:180.0), collect(-90.0:1:90.0))) ≈ 1
-
-    @test_throws ErrorException Leaderboard.mse([1], [2, 3], ([1], [2]))
-    @test_throws ErrorException Leaderboard.mse([1, 2], [2, 3, 4], ([1], [2]))
-
-    @test_throws ErrorException Leaderboard.bias([1], [2, 3], ([1], [2]))
-    @test_throws ErrorException Leaderboard.bias([1, 2], [2, 3, 4], ([1], [2]))
-
-    dates = [
-        Dates.DateTime(2015, 1, 13),
-        Dates.DateTime(2018, 2, 13),
-        Dates.DateTime(1981, 7, 6),
-        Dates.DateTime(1993, 11, 19),
-        Dates.DateTime(2040, 4, 1),
-        Dates.DateTime(2000, 8, 18),
-    ]
-
-    expected_dates = (
-        [Dates.DateTime(2040, 4, 1)],
-        [Dates.DateTime(1981, 7, 6), Dates.DateTime(2000, 8, 18)],
-        [Dates.DateTime(1993, 11, 19)],
-        [Dates.DateTime(2015, 1, 13), Dates.DateTime(2018, 2, 13)],
-    )
-
-    @test Leaderboard.split_by_season(dates) == expected_dates
-end
-
-@testset "Leaderboard" begin
-    simdir = ClimaAnalysis.SimDir(@__DIR__)
-
-    preprocess_fn = (data) -> data .* Float32(-1 / 86400)
-
-    # The conversion is technically not correct for this data source, but what
-    # we care about here is that preprocess_data_fn works
-    sim_datasource = Leaderboard.SimDataSource(path = @__DIR__, short_name = "pr", preprocess_data_fn = preprocess_fn)
-
-    pr = get(simdir, "pr")
-
-    @test sim_datasource.lonlat[1] == pr.dims["lon"]
-    @test sim_datasource.lonlat[2] == pr.dims["lat"]
-
-    @test Leaderboard.data_at_date(sim_datasource, Dates.DateTime(1979, 1, 2)) == preprocess_fn(pr.data[1, :, :])
-
-    obs_datasource = Leaderboard.ObsDataSource(;
-        path = joinpath(@clima_artifact("precipitation_obs"), "gpcp.precip.mon.mean.197901-202305.nc"),
-        var_name = "precip",
-        preprocess_data_fn = preprocess_fn,
-    )
-
-    lat = obs_datasource.ncdataset["lat"][20]
-    lon = obs_datasource.ncdataset["lon"][30]
-
-    @test Leaderboard.find_and_resample(obs_datasource, Dates.DateTime(1979, 1, 5), ([lon], [lat]))[1] ==
-          preprocess_fn(obs_datasource.ncdataset["precip"][30, 20, 1])
-
-    # A very weak test, to make sure we can call the function.
-    # We assume the key functions bias and rmse are tested elsewhere.
-    computed_bias =
-        Leaderboard.bias(obs_datasource, sim_datasource, [Dates.DateTime(1979, 1, 1), Dates.DateTime(1979, 1, 2)])
-    @test haskey(computed_bias.attributes, "rmse")
-    @test haskey(computed_bias.attributes, "bias")
-end
diff --git a/test/runtests.jl b/test/runtests.jl
index 7aab184757..a462ae293f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -50,10 +50,7 @@ end
 gpu_broken || @safetestset "experiment test: CoupledSims tests" begin
     include("experiment_tests/coupled_sims.jl")
 end
-# This test fails because of the undownloadable artifact
-# @safetestset "experiment test: Leaderboard" begin
-#     include("experiment_tests/leaderboard.jl")
-# end
+
 @safetestset "component model test: ClimaAtmos" begin
     include("component_model_tests/climaatmos_tests.jl")
 end