Merge branch 'main' into optimise-mvnormal-scan

CDCgov · Jul 16, 2024 · 73ceaa5 · 73ceaa5
2 parents 4b7929e + 717f586
commit 73ceaa5
Show file tree

Hide file tree

Showing 15 changed files with 490 additions and 17 deletions.
diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml
@@ -26,7 +26,7 @@ jobs:
           Pkg.develop(path = "./EpiAware");
           Pkg.develop(path = "./pipeline")'
       - name: Run benchmarks
-        run: julia --project=benchmark -e 'using BenchmarkCI; BenchmarkCI.judge(; baseline = "origin/main")'
+        run: julia --project=benchmark -e 'using BenchmarkCI; BenchmarkCI.judge(; baseline = "origin/main", retune = true)'
       - name: Post results
         run: julia --project=benchmark -e 'using BenchmarkCI; BenchmarkCI.postjudge()'
         env:

diff --git a/pipeline/Project.toml b/pipeline/Project.toml
@@ -5,7 +5,9 @@ authors = ["Sam Abbott", "Sam Brand", "Zach Susswein"]
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
+AlgebraOfGraphics = "cbdf2221-f076-402e-a563-3d30da359d67"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 Dagger = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
 DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

diff --git a/pipeline/scripts/create_figure1.jl b/pipeline/scripts/create_figure1.jl
@@ -0,0 +1,35 @@
+## Script to make figure 1
+using Pkg
+Pkg.activate(joinpath(@__DIR__(), ".."))
+
+using EpiAwarePipeline, EpiAware, AlgebraOfGraphics, JLD2, DrWatson, Plots, DataFramesMeta,
+      Statistics, Distributions, CSV
+
+##
+pipelines = [
+    SmoothOutbreakPipeline(), MeasuresOutbreakPipeline(),
+    SmoothEndemicPipeline(), RoughEndemicPipeline()]
+
+## load some data and create a dataframe for the plot
+truth_data_files = readdir(datadir("truth_data")) |>
+                   strs -> filter(s -> occursin("jld2", s), strs)
+analysis_df = CSV.File(plotsdir("analysis_df.csv")) |> DataFrame
+truth_df = mapreduce(vcat, truth_data_files) do filename
+    D = load(joinpath(datadir("truth_data"), filename))
+    make_truthdata_dataframe(filename, D, pipelines)
+end
+
+## Make mainfigure plots
+
+# Define scenario titles and reference times for figure 1
+scenario_dict = Dict(
+    "measures_outbreak" => (title = "Outbreak with measures", T = 28),
+    "smooth_outbreak" => (title = "Outbreak no measures", T = 35),
+    "smooth_endemic" => (title = "Smooth endemic", T = 35),
+    "rough_endemic" => (title = "Rough endemic", T = 35)
+)
+
+fig1 = figureone(truth_df, analysis_df, scenario_dict)
+
+## Save the figure
+save(plotsdir("figure1.png"), fig1)
diff --git a/pipeline/scripts/create_prediction_dataframe.jl b/pipeline/scripts/create_prediction_dataframe.jl
@@ -0,0 +1,33 @@
+using Pkg
+Pkg.activate(joinpath(@__DIR__(), ".."))
+
+using EpiAwarePipeline, EpiAware, AlgebraOfGraphics, JLD2, DrWatson, Plots, DataFramesMeta,
+      Statistics, Distributions, DrWatson
+
+## load some data and create a dataframe for the plot
+files = readdir(datadir("epiaware_observables")) |>
+        strs -> filter(s -> occursin("jld2", s), strs)
+
+## Define scenarios
+pipelines = [
+    SmoothOutbreakPipeline(), MeasuresOutbreakPipeline(),
+    SmoothEndemicPipeline(), RoughEndemicPipeline()]
+
+## Set up EpiData objects: Used in the prediction dataframe for infection generating
+## processes that don't use directly in simulation.
+gi_params = make_gi_params(pipelines[1])
+epi_datas = map(gi_params["gi_means"]) do μ
+    σ = gi_params["gi_stds"][1]
+    shape = (μ / σ)^2
+    scale = σ^2 / μ
+    Gamma(shape, scale)
+end .|> gen_dist -> EpiData(gen_distribution = gen_dist)
+
+## Calculate the prediction dataframe
+prediction_df = mapreduce(vcat, files) do filename
+    output = load(joinpath(datadir("epiaware_observables"), filename))
+    make_prediction_dataframe_from_output(filename, output, epi_datas, pipelines)
+end
+
+## Save the prediction dataframe
+CSV.write(plotsdir("analysis_df.csv"), prediction_df)
diff --git a/pipeline/src/EpiAwarePipeline.jl b/pipeline/src/EpiAwarePipeline.jl
@@ -12,15 +12,17 @@ module EpiAwarePipeline
 
 using CSV, Dagger, DataFramesMeta, Dates, Distributions, DocStringExtensions, DrWatson,
       EpiAware, Plots, Statistics, ADTypes, AbstractMCMC, Plots, JLD2, MCMCChains, Turing,
-      DynamicPPL, LogExpFunctions, RCall, LinearAlgebra, Random
+      DynamicPPL, LogExpFunctions, RCall, LinearAlgebra, Random, AlgebraOfGraphics,
+      CairoMakie
 
 # Exported pipeline types
 export AbstractEpiAwarePipeline, EpiAwarePipeline, AbstractRtwithoutRenewalPipeline,
        RtwithoutRenewalPriorPipeline, EpiAwareExamplePipeline, SmoothOutbreakPipeline,
        MeasuresOutbreakPipeline, SmoothEndemicPipeline, RoughEndemicPipeline
 
 # Exported utility functions
-export calculate_processes
+export calculate_processes, generate_quantiles_for_targets,
+       timeseries_samples_into_quantiles
 
 # Exported configuration types
 export TruthSimulationConfig, InferenceConfig
@@ -46,6 +48,12 @@ export define_forecast_epiprob, generate_forecasts
 # Exported functions: scoring functions
 export score_parameters
 
+# Exported functions: Analysis functions for constructing dataframes
+export make_prediction_dataframe_from_output, make_truthdata_dataframe
+
+# Exported functions: Make main plots
+export figureone
+
 # Exported functions: plot functions
 export plot_truth_data, plot_Rt
 
@@ -57,5 +65,7 @@ include("simulate/simulate.jl")
 include("infer/infer.jl")
 include("forecast/forecast.jl")
 include("scoring/score_parameters.jl")
+include("analysis/analysis.jl")
+include("mainplots/mainplots.jl")
 include("plot_functions.jl")
 end
diff --git a/pipeline/src/analysis/analysis.jl b/pipeline/src/analysis/analysis.jl
@@ -0,0 +1,2 @@
+include("make_truthdata_dataframe.jl")
+include("make_prediction_dataframe_from_output.jl")
diff --git a/pipeline/src/analysis/make_prediction_dataframe_from_output.jl b/pipeline/src/analysis/make_prediction_dataframe_from_output.jl
@@ -0,0 +1,64 @@
+"""
+Create a dataframe containing prediction results based on the given output and input data.
+
+# Arguments
+- `filename`: The name of the file.
+- `output`: The output data containing inference configuration, IGP model, and other information.
+- `epi_datas`: The input data for the epidemiological model.
+- `qs`: An optional array of quantiles to calculate. Default is `[0.025, 0.5, 0.975]`.
+
+# Returns
+A dataframe containing the prediction results.
+
+"""
+function make_prediction_dataframe_from_output(
+        filename, output, epi_datas, pipelines; qs = [0.025, 0.5, 0.975])
+    #Get the scenario, IGP model, latent model and true mean GI
+    inference_config = output["inference_config"]
+    igp_model = output["inference_config"].igp |> string
+    scenario = EpiAwarePipeline._get_scenario_from_filename(filename, pipelines)
+    latent_model = EpiAwarePipeline._get_latent_model_from_filename(filename)
+    true_mean_gi = EpiAwarePipeline._get_true_gi_mean_from_filename(filename)
+
+    #Get the quantiles for the targets across the gi mean scenarios
+    #if Renewal model, then we use the underlying epi model
+    #otherwise we use the epi datas to loop over different gi mean implications
+    used_epi_datas = igp_model == "Renewal" ? [output["epiprob"].epi_model.data] : epi_datas
+
+    preds = nothing
+    try
+        preds = map(used_epi_datas) do epi_data
+            generate_quantiles_for_targets(output, epi_data, qs)
+        end
+        used_gi_means = igp_model == "Renewal" ?
+                        [EpiAwarePipeline._get_used_gi_mean_from_filename(filename)] :
+                        make_gi_params(EpiAwareExamplePipeline())["gi_means"]
+
+        #Create the dataframe columnwise
+        df = mapreduce(vcat, preds, used_gi_means) do pred, used_gi_mean
+            mapreduce(vcat, keys(pred)) do target
+                target_mat = pred[target]
+                target_times = collect(1:size(target_mat, 1)) .+
+                               (inference_config.tspan[1] - 1)
+                _df = DataFrame(target_times = target_times)
+                _df[!, "Scenario"] .= scenario
+                _df[!, "IGP_Model"] .= igp_model
+                _df[!, "Latent_Model"] .= latent_model
+                _df[!, "True_GI_Mean"] .= true_mean_gi
+                _df[!, "Used_GI_Mean"] .= used_gi_mean
+                _df[!, "Reference_Time"] .= inference_config.tspan[2]
+                _df[!, "Target"] .= string(target)
+                # quantile predictions
+                for (j, q) in enumerate(qs)
+                    q_str = split(string(q), ".")[end]
+                    _df[!, "q_$(q_str)"] = target_mat[:, j]
+                end
+                return _df
+            end
+        end
+        return df
+    catch
+        @warn "Error in generating quantiles for targets in file $filename"
+        return nothing
+    end
+end
diff --git a/pipeline/src/analysis/make_truthdata_dataframe.jl b/pipeline/src/analysis/make_truthdata_dataframe.jl
@@ -0,0 +1,38 @@
+
+"""
+    make_truthdata_dataframe(filename, truth_data, pipelines; I_0 = 100.0)
+
+Create a DataFrame containing truth data for analysis.
+
+# Arguments
+- `filename::String`: The name of the file.
+- `truth_data::Dict`: A dictionary containing truth data.
+- `pipelines::Array`: An array of pipelines.
+- `I_0::Float64`: Initial value for I_t (default: 100.0).
+
+# Returns
+- `df::DataFrame`: A DataFrame containing the truth data.
+
+"""
+function make_truthdata_dataframe(filename, truth_data, pipelines; I_0 = 100.0)
+    I_t = truth_data["I_t"]
+    true_mean_gi = truth_data["truth_gi_mean"]
+    log_It = _calc_log_infections(I_t)
+    rt = _calc_rt(I_t, I_0)
+    scenario = _get_scenario_from_filename(filename, pipelines)
+    truth_procs = (; log_I_t = log_It, rt, Rt = truth_data["truth_process"])
+
+    df = mapreduce(vcat, keys(truth_procs)) do target
+        proc = truth_procs[target]
+        _df = DataFrame(
+            target_times = 1:length(proc),
+            target_values = proc
+        )
+        _df[!, "Scenario"] .= scenario
+        _df[!, "True_GI_Mean"] .= true_mean_gi
+        _df[!, "Target"] .= string(target)
+        return _df
+    end
+
+    return df
+end
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		include("make_truthdata_dataframe.jl")
		include("make_prediction_dataframe_from_output.jl")