Skip to content

Commit

Permalink
report: new module for briefing reports
Browse files Browse the repository at this point in the history
  • Loading branch information
abhidg committed Sep 8, 2024
1 parent f208131 commit 0922ab7
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 102 deletions.
4 changes: 2 additions & 2 deletions src/olm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import webbrowser
import urllib
from pathlib import Path
from .util import build
from .report import make_report
from .outbreaks import OUTBREAKS

USAGE = """olm: Office for Linelist Management
Expand Down Expand Up @@ -70,7 +70,7 @@ def main():
case "report":
if args.outbreak not in OUTBREAKS:
abort(f"Outbreak not supported: {args.outbreak}")
build(
make_report(
args.outbreak,
args.data or OUTBREAKS[args.outbreak]["url"],
OUTBREAKS[args.outbreak]["plots"],
Expand Down
17 changes: 8 additions & 9 deletions src/olm/outbreaks/marburg.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Marburg 2023 Equatorial Guinea Outbreak - {{ published_date }}</title>
<meta name="description" content="Marburg 2023 outbreak briefing report from Global.health">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/water.css@2/out/light.css">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap" rel="stylesheet">
<link rel="stylesheet" href="https://www.monkeypox.global.health/style.css">
<link rel="shortcut icon" type="image/x-icon"
Expand All @@ -16,15 +15,15 @@
</head>

<body>
<nav>
<ul>
<li><a href="https://l66noa47nk.execute-api.eu-central-1.amazonaws.com/web">Data</a></li>
<li><a href="https://github.com/globaldothealth/marburg">GitHub</a></li>
</ul>
<img class="logo" src="https://global.health/wp-content/uploads/2020/07/gh-logo-full-black.png"
alt="Global.health logo"></li>
</nav>
<main>
<nav>
<img class="logo" src="https://global.health/wp-content/uploads/2020/07/gh-logo-full-black.png"
alt="Global.health logo"></li>
<ul>
<li><a href="https://l66noa47nk.execute-api.eu-central-1.amazonaws.com/web">Data</a></li>
<li><a href="https://github.com/globaldothealth/marburg">GitHub</a></li>
</ul>
</nav>
<h1>Marburg 2023 Equatorial Guinea Outbreak<br>{{ published_date }}</h1>
<p class="byline">From the <a href="https://global.health">Global.health</a> team (<a
href="mailto:info@global.health">info@global.health</a>)
Expand Down
16 changes: 8 additions & 8 deletions src/olm/outbreaks/mpox-2024.html
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@
</head>

<body>
<nav>
<ul>
<li><a href="https://mpox-2024.s3.eu-central-1.amazonaws.com/latest.csv">Data</a></li>
<li><a href="https://github.com/globaldothealth/outbreak-data/wiki/GHL2024.D11.1E71">GitHub</a></li>
</ul>
<img class="logo" src="https://global.health/wp-content/uploads/2020/07/gh-logo-full-black.png"
alt="Global.health logo"></li>
</nav>
<main>
<nav>
<img class="logo" src="https://global.health/wp-content/uploads/2020/07/gh-logo-full-black.png"
alt="Global.health logo"></li>
<ul>
<li><a href="https://mpox-2024.s3.eu-central-1.amazonaws.com/latest.csv">Data</a></li>
<li><a href="https://github.com/globaldothealth/outbreak-data/wiki/GHL2024.D11.1E71">GitHub</a></li>
</ul>
</nav>
<h1>Mpox 2024 Outbreak</h1>
<p class="byline">From the <a href="https://global.health">Global.health</a> team (<a
href="mailto:info@global.health">info@global.health</a>). Published on {{ published_date}}
Expand Down
92 changes: 92 additions & 0 deletions src/olm/report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""
Briefing report generator module
"""

import datetime
from typing import Callable, Any
from pathlib import Path

import chevron
import plotly.io
import plotly.graph_objects as go

from .util import read_csv, store_s3, invalidate_cache

PlotFunction = Callable[..., dict[str, Any] | go.Figure]
PlotData = tuple[str, PlotFunction, dict[str, Any]]


def render(template: Path, variables: dict[str, Any]) -> str:
with template.open() as f:
return chevron.render(f, variables)


def render_figure(fig, key: str) -> str:
return {key: plotly.io.to_html(fig, include_plotlyjs=False, full_html=False)}


def make_report(
outbreak_name: str,
data_url: str,
plots: list[PlotData],
date_columns: list[str] = [],
output_bucket: str | None = None,
cloudfront_distribution: str | None = None,
):
"""Build epidemiological report
Parameters
----------
outbreak_name
Name of the outbreak
data_url
Data file for the outbreak, can be a S3 URL
plots
List of plot or table specifications for the outbreak, such as those
in :module:`olm.outbreaks`
date_columns
If specified, lists additional date columns to be passed to read_csv()
output_bucket
Output S3 bucket to write result to, in addition to local HTML output
to {outbreak_name}.html
cloudfront_distribution
If specified, invalidates the cache for the cloudfront distribution
without which changes are not made available
"""
assert " " not in outbreak_name, "Outbreak name should not have spaces"
date = datetime.datetime.today().date()
output_file = f"{outbreak_name}.html"
if not (template := Path(__file__).parent / "outbreaks" / output_file).exists():
raise FileNotFoundError(f"Template for outbreak not found at: {template}")
var = {"published_date": str(date)}
df = read_csv(data_url, date_columns)
for plot in plots:
kwargs = {} if len(plot) == 2 else plot[2]
plot_type = plot[0].split("/")[0]
match plot_type:
case "data":
var.update(plot[1](df, **kwargs))
case "table":
var[plot[0].removeprefix("table/")] = plot[1](df, **kwargs).to_html(
index=False
)
case "figure":
var.update(
render_figure(
plot[1](df, **kwargs), plot[0].removeprefix("figure/")
)
)

report_data = render(template, var)
Path(output_file).write_text(report_data)
print("wrote", output_file)

if output_bucket:
store_s3(
report_data,
[f"{outbreak_name}/index.html", f"{outbreak_name}/{date}.html"],
bucket_name=output_bucket,
content_type="text/html",
)
if cloudfront_distribution:
invalidate_cache(cloudfront_distribution)
85 changes: 2 additions & 83 deletions src/olm/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,12 @@
import re
import logging
import datetime
from pathlib import Path
from typing import Any, Callable

import boto3
import chevron

import pandas as pd
import plotly.graph_objects as go
import plotly.io

PlotFunction = Callable[..., dict[str, Any] | go.Figure]
PlotData = tuple[str, PlotFunction, dict[str, Any]]

pd.options.mode.chained_assignment = None

AGE_BINS = [
Expand Down Expand Up @@ -92,15 +87,6 @@ def name_bin(bin_idx: int) -> str:
return f"{bin[0]}-{bin[1]}"


def render(template: Path, variables: dict[str, Any]) -> str:
with template.open() as f:
return chevron.render(f, variables)


def render_figure(fig, key: str) -> str:
return {key: plotly.io.to_html(fig, include_plotlyjs=False, full_html=False)}


def percentage_occurrence(df: pd.DataFrame, filter_series: pd.Series) -> int:
"""Returns percentage occurrence of filter_series within a dataframe"""
return int(round(100 * sum(filter_series) / len(df)))
Expand Down Expand Up @@ -159,70 +145,3 @@ def read_csv(filename: str, additional_date_columns: list[str] = []) -> pd.DataF
df = pd.read_csv(filename, dtype=str, na_values=["N/K", "NK"])
fix_datetimes(df, additional_date_columns)
return df


def build(
outbreak_name: str,
data_url: str,
plots: list[PlotData],
date_columns: list[str] = [],
output_bucket: str | None = None,
cloudfront_distribution: str | None = None,
):
"""Build epidemiological report
Parameters
----------
outbreak_name
Name of the outbreak
data_url
Data file for the outbreak, can be a S3 URL
plots
List of plot or table specifications for the outbreak, such as those
in :module:`olm.outbreaks`
date_columns
If specified, lists additional date columns to be passed to read_csv()
output_bucket
Output S3 bucket to write result to, in addition to local HTML output
to {outbreak_name}.html
cloudfront_distribution
If specified, invalidates the cache for the cloudfront distribution
without which changes are not made available
"""
assert " " not in outbreak_name, "Outbreak name should not have spaces"
date = datetime.datetime.today().date()
output_file = f"{outbreak_name}.html"
if not (template := Path(__file__).parent / "outbreaks" / output_file).exists():
raise FileNotFoundError(f"Template for outbreak not found at: {template}")
var = {"published_date": str(date)}
df = read_csv(data_url, date_columns)
for plot in plots:
kwargs = {} if len(plot) == 2 else plot[2]
plot_type = plot[0].split("/")[0]
match plot_type:
case "data":
var.update(plot[1](df, **kwargs))
case "table":
var[plot[0].removeprefix("table/")] = plot[1](df, **kwargs).to_html(
index=False
)
case "figure":
var.update(
render_figure(
plot[1](df, **kwargs), plot[0].removeprefix("figure/")
)
)

report_data = render(template, var)
Path(output_file).write_text(report_data)
print("wrote", output_file)

if output_bucket:
store_s3(
report_data,
[f"{outbreak_name}/index.html", f"{outbreak_name}/{date}.html"],
bucket_name=output_bucket,
content_type="text/html",
)
if cloudfront_distribution:
invalidate_cache(cloudfront_distribution)

0 comments on commit 0922ab7

Please sign in to comment.