Skip to content

[ENH] Use pregenerate figures #96

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion .github/workflows/update_submodule.yml
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I just realized that we should also install the Python dependencies to ensure that your nice script doesn't error out during the wf 🤦‍♀️

can't suggest in this part of the file, but I think

    - name: Set up Python 3.11
      uses: actions/setup-python@v5
      with:
        python-version: "3.11"

    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt

after the checkout step should do the trick!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes, good point

Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,27 @@ jobs:
submodules: recursive
token: ${{ secrets.CLIMATE_DB_DATA_PAT }}

- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt

- name: Update submodule
run: |
cd data
git switch main && git pull
git switch main
output=$(git pull)
echo "$output"
cd ..
if [[ "$output" != *"Already up to date."* ]]; then
./code/create_prerendered_figures.py
fi


- name: Create pull request
uses: peter-evans/create-pull-request@v7
Expand Down
34 changes: 18 additions & 16 deletions climate_emotions_map/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
from dash.exceptions import PreventUpdate

from . import utility as utils
from .data_loader import NATIONAL_SAMPLE_SIZE, SURVEY_DATA
from .data_loader import (
NATIONAL_SAMPLE_SIZE,
PRERENDERED_BARPLOTS,
SURVEY_DATA,
)
from .layout import MAP_LAYOUT, SINGLE_SUBQUESTION_FIG_KW, construct_layout
from .make_descriptive_plots import make_descriptive_plots
from .make_map import make_map
Expand Down Expand Up @@ -307,25 +311,23 @@ def update_stacked_bar_plots(
show_all_responses_checked,
):
"""Update the stacked bar plots for all questions based on the selected criteria."""
if show_all_responses_checked:
threshold = None
elif not show_all_responses_checked:
threshold = DEFAULT_QUESTION["outcome"]

figure_lookup_key = (
state,
is_party_stratify_checked,
threshold,
NUM_DECIMALS,
)

figures = []
for output in ctx.outputs_list:
# Example: {'id': {'question': 'q2', 'type': 'stacked-bar-plot'}, 'property': 'figure'}
question = output["id"]["question"]

if show_all_responses_checked:
threshold = None
elif not show_all_responses_checked:
threshold = DEFAULT_QUESTION["outcome"]

figure = make_stacked_bar(
question=question,
subquestion="all",
state=state,
stratify=is_party_stratify_checked,
threshold=threshold,
decimals=NUM_DECIMALS,
)
figures.append(figure)
figures.append(PRERENDERED_BARPLOTS[figure_lookup_key][question])

return figures

Expand Down
25 changes: 22 additions & 3 deletions climate_emotions_map/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@
"""

import json
import pickle as pkl
from pathlib import Path

import pandas as pd

BASE_PATH = Path(__file__).parents[1]


def load_data_file(file: str) -> pd.DataFrame:
"""Load a TSV data file into a dataframe."""
return pd.read_csv(
Path(__file__).parents[1] / "data" / "survey_results" / file,
BASE_PATH / "data" / "survey_results" / file,
sep="\t",
dtype={"question": str, "sub_question": str, "outcome": str},
)
Expand All @@ -23,7 +26,7 @@ def load_data_file(file: str) -> pd.DataFrame:
def load_data_dictionary(file: str) -> pd.DataFrame:
"""Load a data dictionary TSV into a dataframe."""
return pd.read_csv(
Path(__file__).parents[1] / "data" / "data_dictionaries" / file,
BASE_PATH / "data" / "data_dictionaries" / file,
sep="\t",
# Some data dictionaries have "None" as a meaningful value, so we have to prevent it
# from being interpreted as a NaN by pandas
Expand All @@ -32,6 +35,21 @@ def load_data_dictionary(file: str) -> pd.DataFrame:
)


def load_prerendered_figures(file: str) -> dict:
"""Load a pickle file containing a dictionary of prerendered plotly figures."""
target_file = BASE_PATH / "code/assets" / file
# Because this module always runs the loaders, even when imported by the create_prerendered_figures module
# we need to allow for the file to not exist yet when we want to run the script the first time
if not target_file.exists():
print(
"Prerendered figures not found. Run create_prerendered_figures.py to generate them."
)
return {}

print(f"Loading prerendered figures from {target_file}")
return pkl.load(target_file.open("rb"))


def remove_ignored_rows(df: pd.DataFrame) -> pd.DataFrame:
"""Remove rows from a dataframe that have a value of TRUE in the "ignore" column."""
return df[df["ignore"] == False]
Expand All @@ -40,7 +58,7 @@ def remove_ignored_rows(df: pd.DataFrame) -> pd.DataFrame:
def load_geojson_object(file: str) -> dict:
"""Load a geojson file into a dataframe."""
return json.loads(
(Path(__file__).parents[1] / "code" / "assets" / file).read_text(),
(BASE_PATH / "code" / "assets" / file).read_text(),
)


Expand Down Expand Up @@ -155,3 +173,4 @@ def get_domain_text() -> dict[str, str]:

NATIONAL_SAMPLE_SIZE = SURVEY_DATA["samplesizes_state.tsv"]["n"].sum()
GEOJSON_OBJECTS = load_geojson_objects()
PRERENDERED_BARPLOTS = load_prerendered_figures("prerendered_figures.pkl")
13 changes: 4 additions & 9 deletions climate_emotions_map/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dash import dcc, html

from . import utility as utils
from .data_loader import DATA_DICTIONARIES, DOMAIN_TEXT
from .data_loader import DATA_DICTIONARIES, DOMAIN_TEXT, PRERENDERED_BARPLOTS
from .make_descriptive_plots import make_descriptive_plots
from .make_map import make_map
from .make_stacked_bar_plots import make_stacked_bar
Expand Down Expand Up @@ -440,14 +440,9 @@ def create_bar_plots_for_question(question_id: str, subquestion_id: str):
"type": "stacked-bar-plot",
"question": question_id,
},
figure=make_stacked_bar(
question=question_id,
subquestion=subquestion_id,
state=None,
stratify=False,
threshold=DEFAULT_QUESTION["outcome"],
decimals=NUM_DECIMALS,
),
figure=PRERENDERED_BARPLOTS[
None, False, DEFAULT_QUESTION["outcome"], NUM_DECIMALS
][question_id],
config=DCC_GRAPH_CONFIG,
),
w=1200,
Expand Down
Binary file added code/assets/prerendered_figures.pkl
Binary file not shown.
65 changes: 65 additions & 0 deletions code/create_prerendered_figures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env python

import pickle as pkl
import sys
from pathlib import Path

# Hacky hacky gets the job done for the next import
sys.path.append(str(Path(__file__).parent.parent))

from climate_emotions_map.make_stacked_bar_plots import ( # noqa
DATA_DICTIONARIES,
make_stacked_bar,
)
from climate_emotions_map.utility import DEFAULT_QUESTION, NUM_DECIMALS # noqa

UNIQUE_QUESTIONS = (
DATA_DICTIONARIES["question_dictionary.tsv"]["question"].unique().tolist()
)
UNIQUE_STATES = (
DATA_DICTIONARIES["state_abbreviations.tsv"]["state"].unique().tolist()
)
OUTPUT_FILE = Path(__file__).parents[0] / "assets/prerendered_figures.pkl"


def make_full_set_of_barplots(
state=None, stratify=None, threshold=None, decimals=NUM_DECIMALS
):
"""
This returns a dictionary for all questions where keys are question IDs
and values are the plotly graph object figure for each question.
"""
return {
question: make_stacked_bar(
question, "all", state, stratify, threshold, decimals
)
for question in UNIQUE_QUESTIONS
}


def make_all_figures():
"""
Iterate through all combinations of questions and states
to create the complete set of figures.

Returns a dictionary keyed on the tuple of (state, stratified, threshold) in that order
"""
figures = {}
# A state of None means we are looking at national level questions
for state in UNIQUE_STATES + [None]:
for stratify in [False, True]:
# For state level figures, we don't stratify by party
if state is not None and stratify:
continue
for threshold in [None, DEFAULT_QUESTION["outcome"]]:
key = (state, stratify, threshold, NUM_DECIMALS)
figures[key] = make_full_set_of_barplots(*key)
return figures


if __name__ == "__main__":
figures = make_all_figures()
with OUTPUT_FILE.open("wb") as f:
pkl.dump(figures, f)

print(f"Done prerendering figures to {OUTPUT_FILE}!")