Skip to content

Commit 0dfbdd2

Browse files
authored
Issue 537: Fix making analysis dataframes and record priors (#535)
* Update changelog.md * Update make_model_priors.jl * Make `define_epiprob` more modular _make_epidata can also be reused elsewhere * restructure analysis dataframe func * fix make_prediction_dataframe_from_output And add simple unit test with committed test data * Revert "fix make_prediction_dataframe_from_output" This reverts commit 7e6238b. * Reapply "fix make_prediction_dataframe_from_output" This reverts commit 14d29b4.
1 parent 5274760 commit 0dfbdd2

File tree

9 files changed

+141
-52
lines changed

9 files changed

+141
-52
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,3 +393,6 @@ EpiAware/docs/src/getting-started/tutorials/censored-obs.md
393393
EpiAware/docs/Manifest*.toml
394394

395395
!benchmark/Manifest.toml
396+
397+
# Test data
398+
!pipeline/test/analysis/test_data.jld2

manuscript/changelog.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,37 @@
22

33
## 16th Aug 2024
44
Changed the proposed latent processes to daily rather than weekly incremental changes.
5+
6+
## 5th Dec 2024
7+
Changed priors to
8+
9+
```julia
10+
"""
11+
Constructs and returns a dictionary of prior distributions for the latent model
12+
parameters. This is the default method.
13+
14+
# Arguments
15+
- `pipeline`: An instance of the `AbstractEpiAwarePipeline` type.
16+
17+
# Returns
18+
A dictionary containing the following prior distributions:
19+
- `"transformed_process_init_prior"`: A normal distribution with mean 0.0 and
20+
standard deviation 0.25.
21+
- `"std_prior"`: A half-normal distribution with standard deviation 0.25.
22+
- `"damp_param_prior"`: A beta distribution with shape parameters 0.5 and 0.5.
23+
24+
"""
25+
function make_model_priors(pipeline::AbstractEpiAwarePipeline)
26+
transformed_process_init_prior = Normal(0.0, 0.25)
27+
std_prior = HalfNormal(0.025)
28+
damp_param_prior = Beta(1, 9)
29+
log_I0_prior = Normal(log(100.0), 1e-1)
30+
31+
return Dict(
32+
"transformed_process_init_prior" => transformed_process_init_prior,
33+
"std_prior" => std_prior,
34+
"damp_param_prior" => damp_param_prior,
35+
"log_I0_prior" => log_I0_prior
36+
)
37+
end
38+
```

pipeline/src/analysis/make_prediction_dataframe_from_output.jl

Lines changed: 46 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ Create a dataframe containing prediction results based on the given output and i
33
44
# Arguments
55
- `filename`: The name of the file.
6-
- `output`: The output data containing inference configuration, IGP model, and other information.
6+
- `output`: The output data containing inference configuration, IGP model, and other
7+
using Base: infer_effects
8+
information.
79
- `epi_datas`: The input data for the epidemiological model.
810
- `qs`: An optional array of quantiles to calculate. Default is `[0.025, 0.5, 0.975]`.
911
@@ -12,53 +14,57 @@ A dataframe containing the prediction results.
1214
1315
"""
1416
function make_prediction_dataframe_from_output(
15-
filename, output, epi_datas, pipelines; qs = [0.025, 0.5, 0.975])
16-
#Get the scenario, IGP model, latent model and true mean GI
17+
output, true_mean_gi; qs = [0.025, 0.25, 0.5, 0.75, 0.975],
18+
transformation = oneexpy)
19+
#Unpack the output
1720
inference_config = output["inference_config"]
18-
igp_model = output["inference_config"].igp |> string
19-
scenario = EpiAwarePipeline._get_scenario_from_filename(filename, pipelines)
20-
latent_model = EpiAwarePipeline._get_latent_model_from_filename(filename)
21-
true_mean_gi = EpiAwarePipeline._get_true_gi_mean_from_filename(filename)
21+
forecasts = output["forecast_results"]
22+
#Get the scenario, IGP model, latent model and true mean GI
23+
igp_model = inference_config["igp"] |> igp_name -> split(igp_name, ".")[end]
24+
scenario = inference_config["scenario"]
25+
latent_model = inference_config["latent_model"]
26+
used_gi_mean = inference_config["gi_mean"]
27+
used_gi_std = inference_config["gi_std"]
28+
(start_time, reference_time) = inference_config["tspan"] |>
29+
tspan -> split(tspan, "_") |>
30+
tspan -> (
31+
parse(Int, tspan[1]), parse(Int, tspan[2]))
2232

2333
#Get the quantiles for the targets across the gi mean scenarios
2434
#if Renewal model, then we use the underlying epi model
2535
#otherwise we use the epi datas to loop over different gi mean implications
26-
used_epi_datas = igp_model == "Renewal" ? [output["epiprob"].epi_model.data] : epi_datas
36+
used_gi_means = igp_model == "Renewal" ?
37+
[used_gi_mean] :
38+
make_gi_params(EpiAwareExamplePipeline())["gi_means"]
2739

28-
preds = nothing
29-
try
30-
preds = map(used_epi_datas) do epi_data
31-
generate_quantiles_for_targets(output, epi_data, qs)
32-
end
33-
used_gi_means = igp_model == "Renewal" ?
34-
[EpiAwarePipeline._get_used_gi_mean_from_filename(filename)] :
35-
make_gi_params(EpiAwareExamplePipeline())["gi_means"]
40+
used_epidatas = map(used_gi_means) do
41+
_make_epidata(ḡ, used_gi_std; transformation = transformation)
42+
end
43+
44+
preds = map(used_epidatas) do epi_data
45+
generate_quantiles_for_targets(forecasts, epi_data, qs)
46+
end
3647

37-
#Create the dataframe columnwise
38-
df = mapreduce(vcat, preds, used_gi_means) do pred, used_gi_mean
39-
mapreduce(vcat, keys(pred)) do target
40-
target_mat = pred[target]
41-
target_times = collect(1:size(target_mat, 1)) .+
42-
(inference_config.tspan[1] - 1)
43-
_df = DataFrame(target_times = target_times)
44-
_df[!, "Scenario"] .= scenario
45-
_df[!, "IGP_Model"] .= igp_model
46-
_df[!, "Latent_Model"] .= latent_model
47-
_df[!, "True_GI_Mean"] .= true_mean_gi
48-
_df[!, "Used_GI_Mean"] .= used_gi_mean
49-
_df[!, "Reference_Time"] .= inference_config.tspan[2]
50-
_df[!, "Target"] .= string(target)
51-
# quantile predictions
52-
for (j, q) in enumerate(qs)
53-
q_str = split(string(q), ".")[end]
54-
_df[!, "q_$(q_str)"] = target_mat[:, j]
55-
end
56-
return _df
48+
#Create the dataframe columnwise
49+
df = mapreduce(vcat, preds, used_gi_means) do pred, used_gi_mean
50+
mapreduce(vcat, keys(pred)) do target
51+
target_mat = pred[target]
52+
target_times = collect(1:size(target_mat, 1)) .+ (start_time - 1)
53+
_df = DataFrame(target_times = target_times)
54+
_df[!, "Scenario"] .= scenario
55+
_df[!, "IGP_Model"] .= igp_model
56+
_df[!, "Latent_Model"] .= latent_model
57+
_df[!, "True_GI_Mean"] .= true_mean_gi
58+
_df[!, "Used_GI_Mean"] .= used_gi_mean
59+
_df[!, "Reference_Time"] .= reference_time
60+
_df[!, "Target"] .= string(target)
61+
# quantile predictions
62+
for (j, q) in enumerate(qs)
63+
q_str = split(string(q), ".")[end]
64+
_df[!, "q_$(q_str)"] = target_mat[:, j]
5765
end
66+
return _df
5867
end
59-
return df
60-
catch
61-
@warn "Error in generating quantiles for targets in file $filename"
62-
return nothing
6368
end
69+
return df
6470
end

pipeline/src/constructors/make_model_priors.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,18 @@ parameters. This is the default method.
88
# Returns
99
A dictionary containing the following prior distributions:
1010
- `"transformed_process_init_prior"`: A normal distribution with mean 0.0 and
11-
standard deviation 0.25.
12-
- `"std_prior"`: A half-normal distribution with standard deviation 0.25.
13-
- `"damp_param_prior"`: A beta distribution with shape parameters 0.5 and 0.5.
11+
standard deviation 0.025.
12+
- `"std_prior"`: A half-normal distribution with standard deviation 0.025.
13+
- `"damp_param_prior"`: A beta distribution with shape parameters 1 and 9.
14+
- `"log_I0_prior"`: A normal distribution with mean log(100.0) and standard
15+
deviation 1e-1.
1416
1517
"""
1618
function make_model_priors(pipeline::AbstractEpiAwarePipeline)
1719
transformed_process_init_prior = Normal(0.0, 0.25)
18-
std_prior = HalfNormal(0.25)
19-
damp_param_prior = Beta(0.5, 0.5)
20-
log_I0_prior = Normal(log(100.0), 1e-5)
20+
std_prior = HalfNormal(0.025)
21+
damp_param_prior = Beta(1, 9)
22+
log_I0_prior = Normal(log(100.0), 1e-1)
2123

2224
return Dict(
2325
"transformed_process_init_prior" => transformed_process_init_prior,

pipeline/src/infer/define_epiprob.jl

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,28 @@
1+
"""
2+
Create an `EpiData` object based on the provided generation interval mean and standard
3+
deviation by assuming a gamma distribution.
4+
5+
# Arguments
6+
- `gi_mean::Float64`: Mean of the generation interval.
7+
- `gi_std::Float64`: Standard deviation of the generation interval.
8+
- `transformation`: A transformation function to be applied
9+
(default is `EpiAwarePipeline.oneexpy` a custom implementation of `exp`).
10+
11+
# Returns
12+
- `model_data::EpiData`: An `EpiData` object containing the generation interval distribution
13+
and transformation.
14+
15+
"""
16+
function _make_epidata(gi_mean, gi_std; transformation = EpiAwarePipeline.oneexpy)
17+
shape = (gi_mean / gi_std)^2
18+
scale = gi_std^2 / gi_mean
19+
gen_distribution = Gamma(shape, scale)
20+
21+
model_data = EpiData(
22+
gen_distribution = gen_distribution, transformation = transformation)
23+
return model_data
24+
end
25+
126
"""
227
Create an `EpiProblem` object based on the provided `InferenceConfig`.
328
@@ -9,12 +34,8 @@ Create an `EpiProblem` object based on the provided `InferenceConfig`.
934
1035
"""
1136
function define_epiprob(config::InferenceConfig)
12-
shape = (config.gi_mean / config.gi_std)^2
13-
scale = config.gi_std^2 / config.gi_mean
14-
gen_distribution = Gamma(shape, scale)
15-
16-
model_data = EpiData(
17-
gen_distribution = gen_distribution, transformation = config.transformation)
37+
model_data = _make_epidata(
38+
config.gi_mean, config.gi_std; transformation = config.transformation)
1839
#Build the epidemiological model
1940
epi = config.igp(data = model_data, initialisation_prior = config.log_I0_prior)
2041

pipeline/test/Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
33
AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
44
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
5+
DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
56
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
67
DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
78
EpiAware = "b2eeebe4-5992-4301-9193-7ebc9f62c855"
9+
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
810
LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
911
MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
1012
RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
@testset "test dataframe construct for one dataset" begin
2+
using JLD2, DataFramesMeta
3+
output = load(joinpath(@__DIR__(), "test_data.jld2"))
4+
true_mean_gi = 10.0
5+
6+
df = make_prediction_dataframe_from_output(output, true_mean_gi)
7+
@test !isempty(df)
8+
@test "Scenario" in names(df)
9+
@test "IGP_Model" in names(df)
10+
@test "Latent_Model" in names(df)
11+
@test "True_GI_Mean" in names(df)
12+
@test "Used_GI_Mean" in names(df)
13+
@test "Reference_Time" in names(df)
14+
@test "Target" in names(df)
15+
@test "q_025" in names(df)
16+
@test "q_25" in names(df)
17+
@test "q_5" in names(df)
18+
@test "q_75" in names(df)
19+
@test "q_975" in names(df)
20+
end
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
include("make_prediction_dataframe_from_output.jl")

pipeline/test/analysis/test_data.jld2

453 KB
Binary file not shown.

0 commit comments

Comments
 (0)