neurodatascience · surchs · Oct 10, 2024 · Sep 18, 2024 · Sep 25, 2024 · Sep 25, 2024
diff --git a/.github/workflows/update_submodule.yml b/.github/workflows/update_submodule.yml
@@ -16,11 +16,27 @@ jobs:
                 submodules: recursive
                 token: ${{ secrets.CLIMATE_DB_DATA_PAT }}
 
+        -   name: Set up Python 3.11
+            uses: actions/setup-python@v5
+            with:
+                python-version: '3.11'
+
+        -   name: Install dependencies
+            run: |
+                python -m pip install --upgrade pip
+                pip install -r requirements.txt
+
         -   name: Update submodule
             run: |
                 cd data
-                git switch main && git pull
+                git switch main
+                output=$(git pull)
+                echo "$output"
                 cd ..
+                if [[ "$output" != *"Already up to date."* ]]; then
+                    ./code/create_prerendered_figures.py
+                fi
+
 
         -   name: Create pull request
             uses: peter-evans/create-pull-request@v7

diff --git a/climate_emotions_map/app.py b/climate_emotions_map/app.py
@@ -15,7 +15,11 @@
 from dash.exceptions import PreventUpdate
 
 from . import utility as utils
-from .data_loader import NATIONAL_SAMPLE_SIZE, SURVEY_DATA
+from .data_loader import (
+    NATIONAL_SAMPLE_SIZE,
+    PRERENDERED_BARPLOTS,
+    SURVEY_DATA,
+)
 from .layout import MAP_LAYOUT, SINGLE_SUBQUESTION_FIG_KW, construct_layout
 from .make_descriptive_plots import make_descriptive_plots
 from .make_map import make_map
@@ -307,25 +311,23 @@ def update_stacked_bar_plots(
     show_all_responses_checked,
 ):
     """Update the stacked bar plots for all questions based on the selected criteria."""
+    if show_all_responses_checked:
+        threshold = None
+    elif not show_all_responses_checked:
+        threshold = DEFAULT_QUESTION["outcome"]
+
+    figure_lookup_key = (
+        state,
+        is_party_stratify_checked,
+        threshold,
+        NUM_DECIMALS,
+    )
+
     figures = []
     for output in ctx.outputs_list:
         # Example: {'id': {'question': 'q2', 'type': 'stacked-bar-plot'}, 'property': 'figure'}
         question = output["id"]["question"]
-
-        if show_all_responses_checked:
-            threshold = None
-        elif not show_all_responses_checked:
-            threshold = DEFAULT_QUESTION["outcome"]
-
-        figure = make_stacked_bar(
-            question=question,
-            subquestion="all",
-            state=state,
-            stratify=is_party_stratify_checked,
-            threshold=threshold,
-            decimals=NUM_DECIMALS,
-        )
-        figures.append(figure)
+        figures.append(PRERENDERED_BARPLOTS[figure_lookup_key][question])
 
     return figures
 

diff --git a/climate_emotions_map/data_loader.py b/climate_emotions_map/data_loader.py
@@ -6,15 +6,18 @@
 """
 
 import json
+import pickle as pkl
 from pathlib import Path
 
 import pandas as pd
 
+BASE_PATH = Path(__file__).parents[1]
+
 
 def load_data_file(file: str) -> pd.DataFrame:
     """Load a TSV data file into a dataframe."""
     return pd.read_csv(
-        Path(__file__).parents[1] / "data" / "survey_results" / file,
+        BASE_PATH / "data" / "survey_results" / file,
         sep="\t",
         dtype={"question": str, "sub_question": str, "outcome": str},
     )
@@ -23,7 +26,7 @@ def load_data_file(file: str) -> pd.DataFrame:
 def load_data_dictionary(file: str) -> pd.DataFrame:
     """Load a data dictionary TSV into a dataframe."""
     return pd.read_csv(
-        Path(__file__).parents[1] / "data" / "data_dictionaries" / file,
+        BASE_PATH / "data" / "data_dictionaries" / file,
         sep="\t",
         # Some data dictionaries have "None" as a meaningful value, so we have to prevent it
         # from being interpreted as a NaN by pandas
@@ -32,6 +35,21 @@ def load_data_dictionary(file: str) -> pd.DataFrame:
     )
 
 
+def load_prerendered_figures(file: str) -> dict:
+    """Load a pickle file containing a dictionary of prerendered plotly figures."""
+    target_file = BASE_PATH / "code/assets" / file
+    # Because this module always runs the loaders, even when imported by the create_prerendered_figures module
+    # we need to allow for the file to not exist yet when we want to run the script the first time
+    if not target_file.exists():
+        print(
+            "Prerendered figures not found. Run create_prerendered_figures.py to generate them."
+        )
+        return {}
+
+    print(f"Loading prerendered figures from {target_file}")
+    return pkl.load(target_file.open("rb"))
+
+
 def remove_ignored_rows(df: pd.DataFrame) -> pd.DataFrame:
     """Remove rows from a dataframe that have a value of TRUE in the "ignore" column."""
     return df[df["ignore"] == False]
@@ -40,7 +58,7 @@ def remove_ignored_rows(df: pd.DataFrame) -> pd.DataFrame:
 def load_geojson_object(file: str) -> dict:
     """Load a geojson file into a dataframe."""
     return json.loads(
-        (Path(__file__).parents[1] / "code" / "assets" / file).read_text(),
+        (BASE_PATH / "code" / "assets" / file).read_text(),
     )
 
 
@@ -155,3 +173,4 @@ def get_domain_text() -> dict[str, str]:
 
 NATIONAL_SAMPLE_SIZE = SURVEY_DATA["samplesizes_state.tsv"]["n"].sum()
 GEOJSON_OBJECTS = load_geojson_objects()
+PRERENDERED_BARPLOTS = load_prerendered_figures("prerendered_figures.pkl")
diff --git a/climate_emotions_map/layout.py b/climate_emotions_map/layout.py
@@ -5,7 +5,7 @@
 from dash import dcc, html
 
 from . import utility as utils
-from .data_loader import DATA_DICTIONARIES, DOMAIN_TEXT
+from .data_loader import DATA_DICTIONARIES, DOMAIN_TEXT, PRERENDERED_BARPLOTS
 from .make_descriptive_plots import make_descriptive_plots
 from .make_map import make_map
 from .make_stacked_bar_plots import make_stacked_bar
@@ -440,14 +440,9 @@ def create_bar_plots_for_question(question_id: str, subquestion_id: str):
                 "type": "stacked-bar-plot",
                 "question": question_id,
             },
-            figure=make_stacked_bar(
-                question=question_id,
-                subquestion=subquestion_id,
-                state=None,
-                stratify=False,
-                threshold=DEFAULT_QUESTION["outcome"],
-                decimals=NUM_DECIMALS,
-            ),
+            figure=PRERENDERED_BARPLOTS[
+                None, False, DEFAULT_QUESTION["outcome"], NUM_DECIMALS
+            ][question_id],
             config=DCC_GRAPH_CONFIG,
         ),
         w=1200,

diff --git a/code/assets/prerendered_figures.pkl b/code/assets/prerendered_figures.pkl
diff --git a/code/create_prerendered_figures.py b/code/create_prerendered_figures.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+import pickle as pkl
+import sys
+from pathlib import Path
+
+# Hacky hacky gets the job done for the next import
+sys.path.append(str(Path(__file__).parent.parent))
+
+from climate_emotions_map.make_stacked_bar_plots import (  # noqa
+    DATA_DICTIONARIES,
+    make_stacked_bar,
+)
+from climate_emotions_map.utility import DEFAULT_QUESTION, NUM_DECIMALS  # noqa
+
+UNIQUE_QUESTIONS = (
+    DATA_DICTIONARIES["question_dictionary.tsv"]["question"].unique().tolist()
+)
+UNIQUE_STATES = (
+    DATA_DICTIONARIES["state_abbreviations.tsv"]["state"].unique().tolist()
+)
+OUTPUT_FILE = Path(__file__).parents[0] / "assets/prerendered_figures.pkl"
+
+
+def make_full_set_of_barplots(
+    state=None, stratify=None, threshold=None, decimals=NUM_DECIMALS
+):
+    """
+    This returns a dictionary for all questions where keys are question IDs
+    and values are the plotly graph object figure for each question.
+    """
+    return {
+        question: make_stacked_bar(
+            question, "all", state, stratify, threshold, decimals
+        )
+        for question in UNIQUE_QUESTIONS
+    }
+
+
+def make_all_figures():
+    """
+    Iterate through all combinations of questions and states
+    to create the complete set of figures.
+
+    Returns a dictionary keyed on the tuple of (state, stratified, threshold) in that order
+    """
+    figures = {}
+    # A state of None means we are looking at national level questions
+    for state in UNIQUE_STATES + [None]:
+        for stratify in [False, True]:
+            # For state level figures, we don't stratify by party
+            if state is not None and stratify:
+                continue
+            for threshold in [None, DEFAULT_QUESTION["outcome"]]:
+                key = (state, stratify, threshold, NUM_DECIMALS)
+                figures[key] = make_full_set_of_barplots(*key)
+    return figures
+
+
+if __name__ == "__main__":
+    figures = make_all_figures()
+    with OUTPUT_FILE.open("wb") as f:
+        pkl.dump(figures, f)
+
+    print(f"Done prerendering figures to {OUTPUT_FILE}!")