📊 wpp: 2024 release (#2936)

* wip: wpp * meadow * snapshot * wip: wpp * separate estimates vs projections * mid-year population * add more indicators * read rates * growth rates * add new method * add projections * add kwargs * add fertility rates * ignore data * add fertility rate * migration (snapshot) * migration indicators * drop all-zero age group fertility * add deaths to snapshot * add death rate * add deaths * add metadata * add reminder * deaths per 1,000 people * change in unit: population -> people * add instructions * update data * deaths: add 10-year age groups * le, births, median-age * birth_rate, population_density * births * grapher: median_age * enable non-strict mode * grapher: births * grapher: population_density * grapher: life_expectancy * fix: population_density being ignored * grapher: life expectancy * fix LE * sex_ratio * fix death rate merge * fix infinit value * mortality_rates * grapher: mortalities * grapher: mortalities * ensure distinct titles * jinja typo * jinja typo * fix sex dimension * fix unit in migrants * harmonization typo in fertility * 0 -> at birth * 0 -> at birth * more: 0->at_birth * rollback at_birth -> 0 * remove unused code * population as integer * wip * wip: metadata * wip * mortality_rate * childbearing age, population doubling time * population doubling times, childbearing age * wip * typo dataset title * median age metadata * wip * remove unused file * 📊 wpp: 2024 release * dependency ratio * fix typo * fix spacing * wip: spacing * exclude extra income groups * json typo * ✨ explore_mode: fix tabs * remove cached tab * remove cached table in display * ✨ indicator-upgrader: add version to dataset name * wip * decimal places to median age * remove 'fertility' word in text * add transposed df view commented for future * remove linebreak * ✨ indicator upgrader: show all suggestions * option to show/not show all suggestions * change error metric (absolute) * add 10-19 age group for deaths * add 0-9 age group for deaths * fixes to metadata * missing note * change attribution * specify life expectancy type * remove unwanted whitespace
owid · Jul 11, 2024 · 74f8c90 · 74f8c90
1 parent 278241c
commit 74f8c90
Show file tree

Hide file tree

Showing 38 changed files with 2,980 additions and 5 deletions.
diff --git a/.gitignore b/.gitignore
@@ -53,3 +53,5 @@ site/
 
 
 notebooks/
+
+zpop/
diff --git a/apps/wizard/app_pages/indicator_upgrade/dataset_selection.py b/apps/wizard/app_pages/indicator_upgrade/dataset_selection.py
@@ -172,6 +172,12 @@ def _dataset_new_selectbox_on_change():
                 value=False,
                 on_change=set_states_if_form_is_modified,
             )
+            reduced_suggestions = st.toggle(
+                "Reduced list of suggestions",
+                help="",
+                value=False,
+                on_change=set_states_if_form_is_modified,
+            )
             similarity_name = st.selectbox(
                 label="Similarity matching function",
                 options=similarity_names,
@@ -210,6 +216,7 @@ def _dataset_new_selectbox_on_change():
         map_identical_indicators=map_identical,
         similarity_function_name=similarity_name,
         enable_bulk_explore=enable_bulk_explore,
+        complete_suggestions=not reduced_suggestions,
     )
 
 
@@ -232,6 +239,7 @@ class SearchConfigForm(BaseModel):
     map_identical_indicators: bool
     similarity_function_name: str
     enable_bulk_explore: bool
+    complete_suggestions: bool
 
     def __init__(self, **data: Any) -> None:
         """Constructor."""

diff --git a/apps/wizard/app_pages/indicator_upgrade/explore_mode.py b/apps/wizard/app_pages/indicator_upgrade/explore_mode.py
@@ -520,7 +520,8 @@ def st_show_country_overview(df_indicators: pd.DataFrame, indicator_old: str, in
                 indicator_new: "New",
             }
         )
-        error = df_[COLUMN_RELATIVE_ERROR].dropna()
+
+        error = df_[COLUMN_ABS_RELATIVE_ERROR].dropna()
         error_max = error.replace([np.inf, -np.inf], np.nan).abs().max()
         error = error.replace([np.inf], error_max)
         error = error.replace([-np.inf], -error_max)
@@ -541,7 +542,9 @@ def st_show_country_overview(df_indicators: pd.DataFrame, indicator_old: str, in
         data=df_countries,
         column_config={
             "error": st.column_config.LineChartColumn(
-                "Error(year)", help="Difference between old and new indicator values."
+                "Error(year)",
+                help="Difference between old and new indicator values.",
+                y_min=0,
             ),
             "old": st.column_config.LineChartColumn("Old"),
             "new": st.column_config.LineChartColumn("New"),

diff --git a/apps/wizard/app_pages/indicator_upgrade/indicator_mapping.py b/apps/wizard/app_pages/indicator_upgrade/indicator_mapping.py
@@ -54,7 +54,7 @@ def render_indicator_mapping(search_form) -> Dict[int, int]:
     else:
         with st.container(border=True):
             # 1/ Title, description, options
-            st_show_header()
+            st_show_header(search_form)
 
             # 2/ Map indicators
             # Show columns with indicators that were automatically (and manually) mapped
@@ -91,6 +91,7 @@ def _get_params_cached(
     map_identical_indicators,
     similarity_function_name,
     enable_bulk_explore,
+    complete_suggestions: bool = True,
 ):
     """Cached version of `get_params`.
 
@@ -120,6 +121,14 @@ def _get_params_cached(
 
     # 1.4/ Get remaining mapping suggestions
     # This is for those indicators which couldn't be automatically mapped
+    if complete_suggestions:
+        missing_new = new_indicators.rename(
+            columns={
+                "id": "id_new",
+                "name": "name_new",
+            }
+        )
+
     suggestions = find_mapping_suggestions_cached(
         missing_old=missing_old,
         missing_new=missing_new,
@@ -168,6 +177,7 @@ def get_params(search_form):
         search_form.map_identical_indicators,
         search_form.similarity_function_name,
         search_form.enable_bulk_explore,
+        search_form.complete_suggestions,
     )
 
     # Set states
@@ -176,12 +186,12 @@ def get_params(search_form):
     return iu, indicator_id_to_display, df_data
 
 
-def st_show_header():
+def st_show_header(search_form):
     """Show title, description, etc."""
     # Title
     st.header("Map indicators")
     st.markdown(
-        "Map indicators from the [Old dataset]({OWID_ENV.dataset_admin_site(search_form.dataset_new_id)}) to the [New dataset]({OWID_ENV.dataset_admin_site(search_form.dataset_new_id)}). The idea is that the indicators in the new dataset will replace those from the old dataset in our charts. You can choose to ignore some indicators if you want to.",
+        f"Map indicators from the [Old dataset]({OWID_ENV.dataset_admin_site(search_form.dataset_new_id)}) to the [New dataset]({OWID_ENV.dataset_admin_site(search_form.dataset_new_id)}). The idea is that the indicators in the new dataset will replace those from the old dataset in our charts. You can choose to ignore some indicators if you want to.",
     )
 
     # Row 1

diff --git a/dag/demography.yml b/dag/demography.yml
@@ -22,6 +22,50 @@ steps:
   data://grapher/demography/2023-03-31/population:
     - data://garden/demography/2023-03-31/population
 
+  # WPP (2024)
+  data-private://meadow/un/2024-07-11/un_wpp:
+    ## Population
+    - snapshot-private://un/2024-07-11/un_wpp_population.csv
+    - snapshot-private://un/2024-07-11/un_wpp_population_density.xlsx
+    ## Population doubling times
+    - snapshot-private://un/2024-07-11/un_wpp_population_doubling.xlsx
+    ## Growth
+    - snapshot-private://un/2024-07-11/un_wpp_growth_rate.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_nat_change_rate.xlsx
+    ## Fertility
+    - snapshot-private://un/2024-07-11/un_wpp_fert_rate_tot.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_fert_rate_age.xlsx
+    ## Migration
+    - snapshot-private://un/2024-07-11/un_wpp_migration.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_migration_rate.xlsx
+    ## Deaths
+    - snapshot-private://un/2024-07-11/un_wpp_deaths.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_deaths_age.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_deaths_age_fem.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_deaths_age_male.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_death_rate.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_child_mortality.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_infant_mortality.xlsx
+    ## Births
+    - snapshot-private://un/2024-07-11/un_wpp_births_age.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_births_sex.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_birth_rate.xlsx
+    ## Median age
+    - snapshot-private://un/2024-07-11/un_wpp_median_age.xlsx
+    ## Child bearing age
+    - snapshot-private://un/2024-07-11/un_wpp_childbearing_age.xlsx
+    ## Life expectancy
+    - snapshot-private://un/2024-07-11/un_wpp_le.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_le_f.xlsx
+    - snapshot-private://un/2024-07-11/un_wpp_le_m.xlsx
+
+  data-private://garden/un/2024-07-11/un_wpp:
+    - data-private://meadow/un/2024-07-11/un_wpp
+  data-private://grapher/un/2024-07-11/un_wpp:
+    - data-private://garden/un/2024-07-11/un_wpp
+  data-private://grapher/un/2024-07-11/un_wpp_full:
+    - data-private://garden/un/2024-07-11/un_wpp
+
   # Population (Fariss et al.)
   data://meadow/demography/2023-12-20/population_fariss:
     - snapshot://demography/2023-12-20/population_fariss.rds

diff --git a/etl/helpers.py b/etl/helpers.py
@@ -657,6 +657,12 @@ def load_snapshot(self, short_name: Optional[str] = None, **kwargs) -> Snapshot:
         assert isinstance(snap, Snapshot)
         return snap
 
+    def read_snap_table(self, short_name: Optional[str] = None, **kwargs) -> Table:
+        """Load snapshot dependency. short_name defaults to the current step's short_name."""
+        snap = self.load_snapshot(short_name=short_name)
+        tb = snap.read(**kwargs)
+        return tb
+
     def load_dataset(
         self,
         short_name: Optional[str] = None,