From b609fc4af71669529c4582a7710b58d883f603e6 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Wed, 13 Aug 2025 19:58:24 +0100 Subject: [PATCH 01/16] Add script to generate descriptive plots for consumables impact manuscript - add heatmaps - add detailed table of average availability by consumable across scenarios --- .../descriptive_analysis.py | 372 ++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100644 src/scripts/consumables_analyses/descriptive_analysis.py diff --git a/src/scripts/consumables_analyses/descriptive_analysis.py b/src/scripts/consumables_analyses/descriptive_analysis.py new file mode 100644 index 0000000000..bf3528ea87 --- /dev/null +++ b/src/scripts/consumables_analyses/descriptive_analysis.py @@ -0,0 +1,372 @@ +''' +Produce plots and estimates for the manuscript "Estimating the health gains and value for money of reducing drug stock-outs in Malawi: an individual-based modelling study" +''' + +import datetime +import os +import textwrap +from pathlib import Path + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +from typing import Literal, Sequence, Optional, Union, List + +from tlo import Date +from tlo.analysis.utils import ( + extract_params, + get_scenario_info, + get_scenario_outputs, + load_pickled_dataframes, +) + +# Define a timestamp for script outputs +timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M") + +# Print the start time of the script +print('Script Start', datetime.datetime.now().strftime('%H:%M')) + +# Create folders to store results +resourcefilepath = Path("./resources") +consumable_resourcefilepath = resourcefilepath / "healthsystem/consumables" +simulationfilepath = Path('./outputs/sakshi.mohan@york.ac.uk') +outputfilepath = Path('./outputs/consumables_impact_analysis') +if not os.path.exists(outputfilepath): + os.makedirs(outputfilepath) + +# Utility functions +# Prepare availability data +def prepare_availability_dataset_for_plots( + _df: pd.DataFrame, + scenario_list: Optional[list[int]] = None, + scenario_names_dict: Optional[dict[str, str]] = None, + consumable_resourcefilepath: Path = None, + resourcefilepath: Path = None +) -> pd.DataFrame: + """ + Prepares a consumable availability dataset by merging facility and item category data, + renaming columns for scenarios, and cleaning category names for plotting. + """ + if scenario_list is None: + scenario_list = [] + if scenario_names_dict is None: + scenario_names_dict = {} + + # Load item category mapping + program_item_mapping = pd.read_csv( + consumable_resourcefilepath / 'ResourceFile_Consumables_Item_Designations.csv', + usecols=['Item_Code', 'item_category'] + ) + program_item_mapping = program_item_mapping.rename(columns={'Item_Code': 'item_code'}) + program_item_mapping = program_item_mapping[program_item_mapping['item_category'].notna()] + + # Load facility list + mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv") + + # Merge facility and program info + _df = _df.merge( + mfl[['District', 'Facility_Level', 'Facility_ID']], + on='Facility_ID', how='left' + ) + _df = _df.merge(program_item_mapping, on='item_code', how='left') + + # Rename scenario columns + _df = _df.rename(columns=scenario_names_dict) + + # Clean item category names + clean_category_names = { + 'cancer': 'Cancer', + 'cardiometabolicdisorders': 'Cardiometabolic Disorders', + 'contraception': 'Contraception', + 'general': 'General', + 'hiv': 'HIV', + 'malaria': 'Malaria', + 'ncds': 'Non-communicable Diseases', + 'neonatal_health': 'Neonatal Health', + 'other_childhood_illnesses': 'Other Childhood Illnesses', + 'reproductive_health': 'Reproductive Health', + 'road_traffic_injuries': 'Road Traffic Injuries', + 'tb': 'Tuberculosis', + 'undernutrition': 'Undernutrition', + 'epi': 'Expanded programme on immunization' + } + _df['item_category'] = _df['item_category'].map(clean_category_names) + + return _df + +# Wrap Labels +def wrap_labels(labels, width=15): + """Wrap each label to the given character width.""" + return [textwrap.fill(str(lab), width) if lab is not None else "" for lab in labels] + +# Generate heatmaps of average availability +def generate_heatmap( + df: pd.DataFrame, + include_levels: Optional[List[str]] = None, + value_col: str = "Actual", + row: str = "item_category", + col: str = "Facility_Level", + row_order: Optional[Sequence[str]] = None, + col_order: Optional[Sequence[str]] = None, + figurespath: Optional[Path] = None, + filename: str = "heatmap_consumable_availability.png", + figsize: tuple[int, int] = (10, 8), + cmap: str = "RdYlGn", + annot: bool = True, + fmt: Optional[str] = None, # None -> auto choose + font_scale: float = 0.9, + cbar_label: str = "Proportion of days on which consumable is available", + xlabel: Optional[str] = None, + ylabel: Optional[str] = None, + as_pct: bool = True, # format annotations as percentages + round_decimals: int = 2, + # option to plot scenarios on the x_axis + scenario_axis: bool = False, # if True, columns become scenarios + scenario_cols: Optional[Sequence[str]] = None, +): + """ + Build a heatmap either by a single column (e.g., Facility_Level) or across multiple + scenario columns placed on the x-axis. + """ + if include_levels is not None: + df = df[df.Facility_Level.isin(include_levels)] + if scenario_axis: + aggregated = ( + df.groupby([row], dropna=True)[scenario_cols] + .mean() + .reset_index() + ) + # Add perfect scenario + aggregated['Perfect'] = 1 # Add a column representing the perfect scenario + heatmap_df = aggregated.set_index('item_category') + else: + # Standard mode: columns = `col`, values = mean(value_col) + aggregated = ( + df.groupby([row, col], dropna=True)[value_col] + .mean() + .reset_index() + ) + heatmap_df = aggregated.pivot(index=row, columns=col, values=value_col) + + # Optional ordering + if row_order is not None: + heatmap_df = heatmap_df.reindex(row_order) + if col_order is not None: + heatmap_df = heatmap_df.reindex(columns=col_order) + + # 2) Totals (means across the raw data, not the pivot means) + if scenario_axis: + # Means by row across all programs for each scenario + row_means = heatmap_df.mean(axis=0) # average per scenario across programs + avg_row = row_means.copy() + heatmap_df.loc["Average"] = avg_row + else: + # Compute from raw df to avoid double-averaging + col_means = df.groupby(row, dropna=False)[value_col].mean() + row_means = df.groupby(col, dropna=False)[value_col].mean() + overall_mean = df[value_col].mean() + + heatmap_df["Average"] = col_means.reindex(heatmap_df.index) + avg_row = row_means.reindex(heatmap_df.columns).copy() + avg_row.loc["Average"] = overall_mean + heatmap_df.loc["Average"] = avg_row + + # 3) Annotation formatting + if as_pct: + # If values are 0–1 proportions, annotate as percentages + display_df = (heatmap_df * 100).round(round_decimals) + if fmt is None: + fmt = ".0f" if round_decimals == 0 else f".{round_decimals}f" + annot_kws = {"fmt": fmt} + # Build string labels with % sign + annot_data = display_df.astype(float) + else: + display_df = heatmap_df.round(round_decimals) + if fmt is None: + fmt = f".{round_decimals}f" + annot_kws = {"fmt": fmt} + annot_data = display_df.astype(float) + + # 4) Plot + sns.set(font_scale=font_scale) + fig, ax = plt.subplots(figsize=figsize) + hm = sns.heatmap( + annot_data, + annot=annot, + cmap=cmap, + cbar_kws={"label": cbar_label}, + ax=ax, + fmt=".2f" + ) + + # If percentage labels requested, overwrite text with % suffix + if annot and as_pct: + for t in ax.texts: + t.set_text(f"{t.get_text()}%") + + # 5) Labels & ticks + xlab = (xlabel or ("Scenario" if scenario_axis else col.replace("_", " ").title())) + ylab = (ylabel or row.replace("_", " ").title()) + ax.set_xlabel(xlab) + ax.set_ylabel(ylab) + ax.set_xticklabels(ax.get_xticklabels(), rotation=90) + ax.set_yticklabels(ax.get_yticklabels(), rotation=0) + + # 6) Save (optional) + if figurespath is not None: + figurespath = Path(figurespath) + figurespath.mkdir(parents=True, exist_ok=True) + outpath = figurespath / filename + plt.savefig(outpath, dpi=300, bbox_inches="tight") + + plt.show() + plt.close(fig) + return fig, ax, heatmap_df + + +# Import and clean data files +#********************************** +# Import TLO model availability data +tlo_availability_df = pd.read_csv(consumable_resourcefilepath / "ResourceFile_Consumables_availability_small.csv") +scenario_names_dict={ + 'available_prop': 'Actual', + 'available_prop_scenario1': 'Non-therapeutic consumables', + 'available_prop_scenario2': 'Vital medicines', + 'available_prop_scenario3': 'Pharmacist- managed', + 'available_prop_scenario4': 'Level 1b', + 'available_prop_scenario5': 'CHAM', + 'available_prop_scenario6': '75th percentile facility', + 'available_prop_scenario7': '90th percentile facility', + 'available_prop_scenario8': 'Best facility', + 'available_prop_scenario9': 'Best facility (including DHO)', + 'available_prop_scenario10': 'HIV supply chain', + 'available_prop_scenario11': 'EPI supply chain', + 'available_prop_scenario12': 'HIV moved to Govt supply chain (Avg by Level)', + 'available_prop_scenario13': 'HIV moved to Govt supply chain (Avg by Facility_ID)', + 'available_prop_scenario14': 'HIV moved to Govt supply chain (Avg by Facility_ID times 1.25)', + 'available_prop_scenario15': 'HIV moved to Govt supply chain (Avg by Facility_ID times 0.75)' + } + +tlo_availability_df = prepare_availability_dataset_for_plots( + _df=tlo_availability_df, + scenario_list=[1, 2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15], + scenario_names_dict=scenario_names_dict, + consumable_resourcefilepath=consumable_resourcefilepath, + resourcefilepath=resourcefilepath +) + +# Generate figures for manuscript +#********************************** +# Figure 1: Average probability of consumable availability in public and CHAM health facilities in Malawi +_ = generate_heatmap( + df=tlo_availability_df, + value_col="Actual", + row="item_category", + col="Facility_Level", + figurespath = outputfilepath / 'manuscript', + filename="heatmap_program_and_level_actual.png", + figsize=(10, 8), + cmap="RdYlGn", + round_decimals=4, + cbar_label="Proportion of days on which consumable is available", + xlabel="Facility Level", + ylabel="Program", +) + +# Figure 3: Comparison of consumable availability across modelled scenarios +scenario_cols = ['Actual', 'Non-therapeutic consumables', 'Vital medicines', 'Pharmacist- managed','75th percentile facility', '90th percentile facility', 'Best facility'] +for level in ['1a', '1b']: + _ = generate_heatmap( + df=tlo_availability_df, + include_levels = [level], + value_col="Actual", + row="item_category", + col="Facility_Level", + figurespath=outputfilepath / 'manuscript', + filename=f"scenarios_heatmap_{level}.png", + figsize=(10, 8), + cmap="RdYlGn", + round_decimals=4, + cbar_label="Proportion of days on which consumable is available", + xlabel="Facility Level", + ylabel="Program", + scenario_axis = True, # if True, columns become scenarios + scenario_cols = scenario_cols, + ) + + +# Figure A.1: Trend in average consumable availability by facility level + +# Figure A.2: Comparison of consumable availability as per Open Logistics Management Information System (OpenLMIS), 2018 and Harmonised +# Health Facility Assessment, 2018-19 + +# Table B.1: Average probability of availability for each consumable under all scenarios (Level 1a) +def assign_consumable_names_to_item_codes(df): + # Create dictionary mapping item_codes to consumables names + consumables_df = pd.read_csv(consumable_resourcefilepath / "ResourceFile_Consumables_Items_and_Packages.csv")[['Item_Code', 'Items']] + consumables_df = consumables_df[consumables_df['Item_Code'].notna()] + consumables_dict = dict(zip(consumables_df['Item_Code'], consumables_df['Items'])) + + # Add consumable_name to df + df = df.copy() + df['item_name'] = df['item_code'].map(consumables_dict) + + return df +tlo_availability_df = assign_consumable_names_to_item_codes(tlo_availability_df) + +def generate_detail_availability_table(df, + groupby_var, + longtable = False, + figurespath=outputfilepath / 'appendix', + decimals=2 +): + table_df = df.copy() + table_df[groupby_var] = table_df[groupby_var].replace('_', ' ', regex=True) + table_df[groupby_var] = table_df[groupby_var].replace('%', r'\%', regex=True) + table_df[groupby_var] = table_df[groupby_var].replace('&', r'\&', regex=True) + + table_df = table_df.groupby(['item_name'])[scenario_cols].mean() + # Multiply by 100 and format with escaped percent sign for LaTeX + table_df[scenario_cols] = table_df[scenario_cols].applymap( + lambda x: f"{x * 100:.{decimals}f}\\%" + ) + + # Rename columns for clarity + table_df = table_df.reset_index() + table_df.columns = ['Consumable'] + scenario_cols + + # Rename columns + table_df.rename(columns={'item_name': 'Consumable'}, inplace=True) + + # Convert to LaTeX + latex_table = table_df.to_latex( + longtable=longtable, + column_format='|R{4cm}|' + '|'.join(['R{1cm}'] * len(table_df.columns[1:])) + '|', + caption=f"Summarized availability by consumable", + label=f"tab:availability_by_{groupby_var}", + position="h", + index=False, + escape=False, # we already escaped % and & + header=True + ) + + # Add \hline after each row + latex_table = latex_table.replace("\\\\", "\\\\ \\hline") + + # Save + figurespath.mkdir(parents=True, exist_ok=True) + latex_file_path = figurespath / f'availability_by_{groupby_var}.tex' + with open(latex_file_path, 'w') as latex_file: + latex_file.write(latex_table) + + # Print for reference + print(latex_table) + +# Table F1: Cost by cost subcategory +generate_detail_availability_table(df = tlo_availability_df, + groupby_var = 'item_name', + longtable = True, + figurespath=outputfilepath / 'appendix') + +# Table B.2: Average probability of availability for each consumable under all scenarios (Level 1b) + From 46a0cb9c0ccae8cb992c1aa258b0f126a4ddc932 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Wed, 20 Aug 2025 15:20:31 +0100 Subject: [PATCH 02/16] Update detailed tables on consumable availability by scenario --- .../descriptive_analysis.py | 204 ++++++++++++------ 1 file changed, 139 insertions(+), 65 deletions(-) diff --git a/src/scripts/consumables_analyses/descriptive_analysis.py b/src/scripts/consumables_analyses/descriptive_analysis.py index bf3528ea87..61780bf128 100644 --- a/src/scripts/consumables_analyses/descriptive_analysis.py +++ b/src/scripts/consumables_analyses/descriptive_analysis.py @@ -35,6 +35,19 @@ os.makedirs(outputfilepath) # Utility functions +# Assign item names to item codes +def assign_consumable_names_to_item_codes(df): + # Create dictionary mapping item_codes to consumables names + consumables_df = pd.read_csv(consumable_resourcefilepath / "ResourceFile_Consumables_Items_and_Packages.csv")[['Item_Code', 'Items']] + consumables_df = consumables_df[consumables_df['Item_Code'].notna()] + consumables_dict = dict(zip(consumables_df['Item_Code'], consumables_df['Items'])) + + # Add consumable_name to df + df = df.copy() + df['item_name'] = df['item_code'].map(consumables_dict) + + return df + # Prepare availability data def prepare_availability_dataset_for_plots( _df: pd.DataFrame, @@ -223,6 +236,111 @@ def generate_heatmap( plt.close(fig) return fig, ax, heatmap_df +# Generate LaTex-compatible detailed table of availability +def generate_detail_availability_table_by_scenario( + df: pd.DataFrame, + groupby_var: str, + scenario_cols: Sequence[str], + include_levels: Union[str, Sequence[str]], + longtable: bool = False, + outputpath: Path = None, + decimals: int = 2, + caption: Optional[str] = None, + label_prefix: str = "tab:availability_by_", + col_width_groupbyvar: str = "4cm", + col_width_scenario: str = "1.8cm", +) -> str: + """ + Create a LaTeX table of average availability (as percentages) for each consumable across scenarios, + filtered to selected facility levels. + + Returns the LaTeX string and writes it to figurespath / f"availability_by_{groupby_var}.tex" + """ + + # --- Setup --- + if outputpath is None: + outputpath = outputfilepath / "appendix" # falls back to your existing default + outputpath.mkdir(parents=True, exist_ok=True) + + # Accept str OR list for include_levels + if isinstance(include_levels, str): + include_levels = [include_levels] + + table_df = df.copy() + + # Filter by facility level if the column is present + if "Facility_Level" in table_df.columns: + table_df = table_df[table_df["Facility_Level"].isin(include_levels)] + + # Aggregate means per item + grouped = ( + table_df + .groupby([groupby_var], dropna=False)[list(scenario_cols)] + .mean() + .reset_index() + ) + + # Rename first column to "Consumable" + grouped.rename(columns={groupby_var: "Consumable"}, inplace=True) + + # Escape LaTeX in Consumable names + def _latex_escape(s): + if pd.isna(s): + return "" + s = str(s) + # Order matters for backslashes; escape backslash first + s = s.replace("\\", r"\\") + s = s.replace("&", r"\&").replace("%", r"\%").replace("_", r"\_") + s = s.replace("#", r"\#").replace("{", r"\{").replace("}", r"\}") + s = s.replace("$", r"\$").replace("^", r"\^{}").replace("~", r"\~{}") + return s + + grouped["Consumable"] = grouped["Consumable"].map(_latex_escape) + + # Convert proportions -> percentage strings with escaped % + def pct_format(x: float) -> str: + if pd.isna(x): + return "" + return f"{x * 100:.{decimals}f}\\%" + + for c in scenario_cols: + grouped[c] = grouped[c].map(pct_format) + + # Build column format dynamically + # First col wider for names, then one col per scenario + column_format = ( + f"|R{{{col_width_groupbyvar}}}|" + + "|".join([f"R{{{col_width_scenario}}}"] * len(scenario_cols)) + + "|" + ) + + # Caption/label + if caption is None: + caption = "Summarized availability by consumable" + label = f"{label_prefix}{groupby_var}_{include_levels}" + + # Export to LaTeX (escape=False since we already escaped) + latex_table = grouped.to_latex( + longtable=longtable, + column_format=column_format, + caption=caption, + label=label, + position="h", + index=False, + escape=False, + header=True, + ) + + # Add \hline after each row + latex_table = latex_table.replace("\\\\\n", "\\\\ \\hline\n") + + # Save + outpath = outputpath / f"availability_by_{groupby_var}_{include_levels}.tex" + with open(outpath, "w", encoding="utf-8") as f: + f.write(latex_table) + + return latex_table + # Import and clean data files #********************************** @@ -301,72 +419,28 @@ def generate_heatmap( # Health Facility Assessment, 2018-19 # Table B.1: Average probability of availability for each consumable under all scenarios (Level 1a) -def assign_consumable_names_to_item_codes(df): - # Create dictionary mapping item_codes to consumables names - consumables_df = pd.read_csv(consumable_resourcefilepath / "ResourceFile_Consumables_Items_and_Packages.csv")[['Item_Code', 'Items']] - consumables_df = consumables_df[consumables_df['Item_Code'].notna()] - consumables_dict = dict(zip(consumables_df['Item_Code'], consumables_df['Items'])) - - # Add consumable_name to df - df = df.copy() - df['item_name'] = df['item_code'].map(consumables_dict) - - return df tlo_availability_df = assign_consumable_names_to_item_codes(tlo_availability_df) -def generate_detail_availability_table(df, - groupby_var, - longtable = False, - figurespath=outputfilepath / 'appendix', - decimals=2 -): - table_df = df.copy() - table_df[groupby_var] = table_df[groupby_var].replace('_', ' ', regex=True) - table_df[groupby_var] = table_df[groupby_var].replace('%', r'\%', regex=True) - table_df[groupby_var] = table_df[groupby_var].replace('&', r'\&', regex=True) - - table_df = table_df.groupby(['item_name'])[scenario_cols].mean() - # Multiply by 100 and format with escaped percent sign for LaTeX - table_df[scenario_cols] = table_df[scenario_cols].applymap( - lambda x: f"{x * 100:.{decimals}f}\\%" - ) - - # Rename columns for clarity - table_df = table_df.reset_index() - table_df.columns = ['Consumable'] + scenario_cols - - # Rename columns - table_df.rename(columns={'item_name': 'Consumable'}, inplace=True) - - # Convert to LaTeX - latex_table = table_df.to_latex( - longtable=longtable, - column_format='|R{4cm}|' + '|'.join(['R{1cm}'] * len(table_df.columns[1:])) + '|', - caption=f"Summarized availability by consumable", - label=f"tab:availability_by_{groupby_var}", - position="h", - index=False, - escape=False, # we already escaped % and & - header=True - ) - - # Add \hline after each row - latex_table = latex_table.replace("\\\\", "\\\\ \\hline") - - # Save - figurespath.mkdir(parents=True, exist_ok=True) - latex_file_path = figurespath / f'availability_by_{groupby_var}.tex' - with open(latex_file_path, 'w') as latex_file: - latex_file.write(latex_table) - - # Print for reference - print(latex_table) - -# Table F1: Cost by cost subcategory -generate_detail_availability_table(df = tlo_availability_df, - groupby_var = 'item_name', - longtable = True, - figurespath=outputfilepath / 'appendix') +# Table B.1: Average probability of availability for each consumable under all scenarios (Level 1a) +availablity_by_item_1a = generate_detail_availability_table_by_scenario( + df=tlo_availability_df, + groupby_var="item_name", + scenario_cols=scenario_cols, + include_levels="1a", + longtable=True, + outputpath=outputfilepath / "appendix", + decimals=2, + caption="Average probability of availability for each consumable under all scenarios (Level 1a)", +) # Table B.2: Average probability of availability for each consumable under all scenarios (Level 1b) - +availablity_by_item_1b = generate_detail_availability_table_by_scenario( + df=tlo_availability_df, + groupby_var="item_name", + scenario_cols=scenario_cols, + include_levels="1b", + longtable=True, + outputpath=outputfilepath / "appendix", + decimals=2, + caption="Average probability of availability for each consumable under all scenarios (Level 1b)", +) From 53b030362c720ea6333d60cacc33abae45003cb2 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Wed, 10 Sep 2025 11:57:44 +0700 Subject: [PATCH 03/16] generate the first version of optimisation of consumable (re)distribution --- ...ate_consumable_redistribution_scenarios.py | 705 ++++++++++++++++++ 1 file changed, 705 insertions(+) create mode 100644 src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py new file mode 100644 index 0000000000..cd3680556d --- /dev/null +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -0,0 +1,705 @@ +import datetime +from pathlib import Path +import pickle + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from typing import Literal, Optional, Dict, Tuple +import requests + +from pulp import LpProblem, LpMaximize, LpVariable, LpBinary, LpStatus, value, lpSum, LpContinuous + +# define a timestamp for script outputs +timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M") + +# print the start time of the script +print('Script Start', datetime.datetime.now().strftime('%H:%M')) + +# define folder pathways +outputfilepath = Path("./outputs/consumables_impact_analysis") +resourcefilepath = Path("./resources") +path_for_new_resourcefiles = resourcefilepath / "healthsystem/consumables" +# Set local shared drive source +path_to_share = Path( # <-- point to the shared folder + '/Users/sm2511/CloudStorage/OneDrive-SharedLibraries-ImperialCollegeLondon/TLOModel - WP - Documents/' +) + +# 1. Import and clean data files +#********************************** +# Import Cleaned OpenLMIS data from 2018 +lmis = (pd.read_csv(outputfilepath / "ResourceFile_Consumables_availability_and_usage.csv") + [['district', 'fac_type_tlo', 'fac_name', 'month', 'item_code', 'available_prop', + 'closing_bal', 'amc', 'dispensed', 'received']]) +# Import data on facility location +location = (pd.read_excel(path_to_share / "07 - Data/Facility_GPS_Coordinates/gis_data_for_openlmis/LMISFacilityLocations_raw.xlsx") + [['LMIS Facility List', 'LATITUDE', 'LONGITUDE']]) +# Find duplicates in facilty names in the location dataset +duplicates = location[location['LMIS Facility List'].duplicated(keep=False)] +location = location.drop(duplicates[duplicates['LATITUDE'].isna()].index).reset_index(drop=True) # Drop those duplicates where location is missing +# Import ownership data +ownership = (pd.read_csv(path_to_share / "07 - Data/Consumables data/OpenLMIS/lmis_facility_ownership.csv"))[['fac_name', 'fac_owner']] +ownership = ownership.drop_duplicates(subset=['fac_name']) + +# Merge OpenLMIS and location and ownership data +lmis = lmis.merge(location, left_on='fac_name', right_on = 'LMIS Facility List', how = 'left', validate='m:1') +lmis = lmis.merge(ownership, on='fac_name', how = 'left', validate='m:1') +lmis.rename(columns = {'LATITUDE':'lat', 'LONGITUDE':'long', 'fac_type_tlo': 'Facility_Level'}, inplace = True) + +# Cleaning to match date to the same format as consumable availability RF in TLO model +month_map = { + "January": 1, "February": 2, "March": 3, "April": 4, + "May": 5, "June": 6, "July": 7, "August": 8, + "September": 9, "October": 10, "November": 11, "December": 12 +} +lmis["month"] = lmis["month"].map(month_map) +lmis["Facility_Level"] = lmis["Facility_Level"].str.replace("Facility_level_", "", regex=False) + +# Keep only those facilities whose location is available +lmis = lmis[lmis.lat.notna()] +# TODO assume something else about location of these facilities with missing location - eg. centroid of district? +# only 16 facilties have missing information + +# For now, let's keep only 8 facilities in 1 district +#lmis = lmis[lmis.district == 'Balaka'] +# Keep only facilities with Facility_Level == '1a' +#lmis = lmis[lmis["Facility_Level"] == '1a'] + +# ----------------------------------- +# 1) Estimate travel time matrix +# ----------------------------------- +def _chunk_indices(n: int, chunk: int): + """Yield (start, end) index pairs for chunking 0..n-1.""" + for s in range(0, n, chunk): + e = min(n, s + chunk) + yield s, e + +def build_travel_time_matrix( + fac_df: pd.DataFrame, + *, + id_col: str = "fac_name", + lat_col: str = "lat", + lon_col: str = "long", + mode: Literal["car", "bicycle"] = "car", + backend: Literal["ors", "osrm"] = "ors", + # ORS options + ors_api_key: Optional[str] = None, + ors_base_url: str = "https://api.openrouteservice.org/v2/matrix", + # OSRM options (self-hosted or public; note: public often has only 'car') + osrm_base_url: str = "https://router.project-osrm.org", + osrm_profile_map: dict = None, + # matrix request chunking (keeps requests within API limits) + max_chunk: int = 40, + timeout: int = 60, +) -> pd.DataFrame: + """ + Build an NxN *road* travel-time matrix (minutes) for facilities, by CAR or BICYCLE + + backends: + - 'ors' -> uses OpenRouteService Matrix API (profiles: driving-car, cycling-regular). + Requires ors_api_key. Has rate/size limits; we auto-chunk. + - 'osrm' -> uses OSRM 'table' service (profiles typically 'car' or 'bike'). + For bicycle, you'll likely need a self-hosted OSRM with the bicycle profile. + + Parameters + ---------- + mode : 'car' | 'bicycle' + Travel mode on roads. + max_chunk : int + Max #origins (and #destinations) per sub-matrix request to keep within API limits. + + Returns + ------- + pd.DataFrame + Square DataFrame (minutes), index/columns = facility names. + """ + facs = fac_df[[id_col, lat_col, lon_col]].dropna().drop_duplicates().reset_index(drop=True) + ids = facs[id_col].tolist() + lats = facs[lat_col].to_numpy() + lons = facs[lon_col].to_numpy() + n = len(ids) + + T = pd.DataFrame(np.full((n, n), np.nan, dtype=float), index=ids, columns=ids) + np.fill_diagonal(T.values, 0.0) + + if n == 0: + return T + + if backend == "ors": + if ors_api_key is None: + raise ValueError("OpenRouteService requires ors_api_key.") + profile = "driving-car" if mode == "car" else "cycling-regular" + + # ORS expects [lon, lat] + coords = [[float(lons[i]), float(lats[i])] for i in range(n)] + + headers = {"Authorization": ors_api_key, "Content-Type": "application/json"} + + # Chunk both sources and destinations to stay under limits + for si, sj in _chunk_indices(n, max_chunk): + for di, dj in _chunk_indices(n, max_chunk): + sources = list(range(si, sj)) + destinations = list(range(di, dj)) + + body = { + "locations": coords, + "sources": sources, + "destinations": destinations, + "metrics": ["duration"], + } + url = f"{ors_base_url}/{profile}" + r = requests.post(url, json=body, headers=headers, timeout=timeout) + r.raise_for_status() + data = r.json() + + # durations in seconds; fill submatrix + durs = data.get("durations") + if durs is None: + raise RuntimeError(f"ORS returned no durations for block {si}:{sj} x {di}:{dj}") + sub = (np.array(durs, dtype=float) / 60.0) # minutes + T.iloc[si:sj, di:dj] = sub + + elif backend == "osrm": + # Map desired mode to OSRM profile names + if osrm_profile_map is None: + osrm_profile_map = {"car": "car", "bicycle": "bike"} + profile = osrm_profile_map.get(mode) + if profile is None: + raise ValueError(f"No OSRM profile mapped for mode='{mode}'.") + + # NOTE: The public OSRM demo often supports *car only*. + # For bicycle, run your own OSRM with the bicycle profile. + # We use the OSRM 'table' service; POST with 'sources' and 'destinations' indices. + + coords = ";".join([f"{lons[i]},{lats[i]}" for i in range(n)]) # lon,lat + + for si, sj in _chunk_indices(n, max_chunk): + for di, dj in _chunk_indices(n, max_chunk): + sources = ";".join(map(str, range(si, sj))) + destinations = ";".join(map(str, range(di, dj))) + + url = ( + f"{osrm_base_url}/table/v1/{profile}/{coords}" + f"?sources={sources}&destinations={destinations}&annotations=duration" + ) + r = requests.get(url, timeout=timeout) + r.raise_for_status() + data = r.json() + + durs = data.get("durations") + if durs is None: + raise RuntimeError(f"OSRM returned no durations for block {si}:{sj} x {di}:{dj}") + sub = (np.array(durs, dtype=float) / 60.0) # minutes + T.iloc[si:sj, di:dj] = sub + + else: + raise ValueError("backend must be 'ors' or 'osrm'.") + + # Safety: replace any residual NaNs (unroutable pairs) with inf or a large number + T = T.fillna(np.inf) + return T + +# Because ORS and ORSM can only handle a limited number of elements at a time, it's better to run this by district +# each of which already has under 50 facilities +def build_time_matrices_by_district( + df: pd.DataFrame, + *, + district_col: str = "district", + id_col: str = "fac_name", + lat_col: str = "lat", + lon_col: str = "long", + mode: str = "car", + backend: str = "osrm", + osrm_base_url: str = "https://router.project-osrm.org", + ors_api_key: str | None = None, + max_chunk: int = 50, # safe for both OSRM/ORS +) -> dict[str, pd.DataFrame]: + """ + Returns a dict: {district -> square minutes matrix DataFrame}, computed within each district only. + """ + matrices = {} + # unique facilities per district (drop duplicates in case of repeated rows) + fac_cols = [district_col, id_col, lat_col, lon_col] + fac_coords = df[fac_cols].dropna().drop_duplicates() + + for d, facs_d in fac_coords.groupby(district_col): + # Skip tiny groups + if len(facs_d) < 2: + continue + + T = build_travel_time_matrix( + fac_df=facs_d[[id_col, lat_col, lon_col]], + id_col=id_col, lat_col=lat_col, lon_col=lon_col, + mode=mode, backend=backend, + osrm_base_url=osrm_base_url, + ors_api_key=ors_api_key, + max_chunk=max_chunk, + ) + matrices[d] = T + + return matrices + +# ----------------------------------------------- +# 2) Data prep for redistribution linear program +# ----------------------------------------------- +def compute_opening_balance(df: pd.DataFrame) -> pd.Series: + """ + Opening balance from same-month records: + OB = closing_bal - received + dispensed. + This is equivalent to OB_(m) = CB_(m-1) + """ + return df["closing_bal"] - df["received"] + df["dispensed"] + + +def presumed_availability(ob, amc, eps=1e-9) -> float: + """ + Presumed likelihood of cons availability = p = min(1, OB/AMC) at month start (no additional receipts considered + at this point in time). + """ + return float(min(1.0, max(0.0, (ob / max(eps, amc))))) + + +def build_edges_within_radius( + time_matrix: pd.DataFrame, + max_minutes: float +) -> Dict[str, set]: + """ + For each facility g, return set of receivers f such that T[g,f] <= max_minutes, f != g. + """ + edges = {} + for g in time_matrix.index: + feasible = set(time_matrix.columns[(time_matrix.loc[g] <= max_minutes) & (time_matrix.columns != g)]) + edges[g] = feasible + return edges + +def build_edges_within_radius_flat(T_by_dist: dict, max_minutes: float) -> dict[str, set[str]]: + """ + Takes the district-wise dictionary of time travel matrices and converts it into a flat dictionary of facilities + and their edge neighbours depending on the maximum allowable travel distance. + T_by_dist: {district -> square DataFrame of minutes (index/cols = facility IDs)} + Returns: {facility_id -> set(of facility_ids)} for all districts combined. + """ + edges: dict[str, set[str]] = {} + for _, T in T_by_dist.items(): + for g in T.index: + row = T.loc[g].to_numpy() + feasible_mask = (row <= max_minutes) & np.isfinite(row) + # Exclude self + feasible = [f for f in T.columns[feasible_mask] if f != g] + if g not in edges: + edges[g] = set() + edges[g].update(feasible) + return edges + +# a = build_edges_within_radius(T_car, max_minutes = 18) + +# ----------------------------------------------- +# 3) LP/MILP Redistribution +# ----------------------------------------------- +def redistribute_radius_lp( + df: pd.DataFrame, + time_matrix: pd.DataFrame, + radius_minutes: float, + tau_keep: float = 2.0, # Keeper minimum (2 --> Donors want to keep at least 2 X AMC) + tau_tar: float = 1.0, # Receiver target (1 --> Receivers want OB = AMC) + alpha: float = 0.7, # Donation cap (between 0 and 1) + # alpha can be set to 1 if donors only want to keep a min of tau_keep X AMC + K_in: int = 2, # max donors per receiver (per item) + K_out: int = 2, # max receivers per donor (per item) + # TODO the above limits should be across items not per item + Qmin_days: float = 7.0, # minimum lot size in "days of demand" at receiver + # i.e. at least Qmin_days worth of stock should be supplied for the transfer to be justifiable + id_cols=("district","month","item_code"), + facility_col="fac_name", + amc_col="amc", + close_cols=("closing_bal","received","dispensed"), +) -> pd.DataFrame: + """ + Scenario 1 or 2: local swaps within a time radius. + Presumed p' = min(1, OB'/AMC). + Objective: maximize total filled deficits (sum of transfers), which is a proxy for maximizing total availability. + + Returns a copy of df with a new column 'available_prop_redis' for redistributed probability per row. + """ + closing_bal, received, dispensed = close_cols + df = df.copy() + df["OB"] = compute_opening_balance(df) + + # container for updated OB by (district, month, item, facility) + df["OB_prime"] = df["OB"] + + # iterate per (district, month, item) + group_cols = list(id_cols) + for (d, m, i), g in df.groupby(group_cols): + # facilities in this slice + g = g.copy() + facs = g[facility_col].tolist() + + # build per-fac arrays + AMC = g.set_index(facility_col)[amc_col].astype(float) + OB0 = g.set_index(facility_col)["OB"].astype(float) + + # donor surplus cap and receiver deficit + surplus_cap = alpha * np.maximum(0.0, OB0.values - tau_keep * AMC.values) + deficit = np.maximum(0.0, tau_tar * AMC.values - OB0.values) + donors_mask = surplus_cap > 1e-9 # boolean vector marking facilities that actually have positive surplus + recv_mask = deficit > 1e-9 # boolean vector marking facilities that actually have a real need (deficit > 0). + + if not donors_mask.any() or not recv_mask.any(): + # nothing to do for this slice + continue + + donors = list(AMC.index[donors_mask]) + receivers = list(AMC.index[recv_mask]) + + # Feasible edges within radius + #feasible = build_edges_within_radius(time_matrix.loc[donors + receivers, donors + receivers], radius_minutes) + T_d = time_matrix[d] # time_matrix is dict {district -> DataFrame} + + # 2) limit to only facilities present in this slice + slice_facs = g[facility_col].unique().tolist() + T_d = T_d.loc[slice_facs, slice_facs] + + # 3) build feasible edges for this radius + feasible = {} + for g_fac in T_d.index: + feas_mask = (T_d.loc[g_fac].to_numpy() <= radius_minutes) & np.isfinite(T_d.loc[g_fac].to_numpy()) + feasible[g_fac] = set(T_d.columns[feas_mask]) - {g_fac} + + # Minimum lot (units) for each receiver depends on its AMC + AMC_map = AMC.to_dict() + Qmin_units = {f: Qmin_days * max(1e-9, AMC_map[f]) for f in receivers} + + # Big-M per edge: cannot exceed receiver deficit + deficit_map = dict(zip(AMC.index, deficit)) # creates a dict with key = facility and value = deficit + # this is the upper bound for how much a facility can receive + smax_map = dict(zip(AMC.index, surplus_cap)) # creates a dict with key = facility and value = donatable surplus + # this is the upper bound for how much a facility can donate + + # Build MILP + prob = LpProblem(f"Redistribution_{d}_{m}_{i}", LpMaximize) + + # Variables + t = {} # transfer amounts per (g,f): continuous >=0 + y = {} # edge-activation binaries + + for g_fac in donors: + for f_fac in feasible.get(g_fac, set()): + if f_fac in receivers: + t[(g_fac, f_fac)] = LpVariable(f"t_{g_fac}->{f_fac}", lowBound=0, cat=LpContinuous) + y[(g_fac, f_fac)] = LpVariable(f"y_{g_fac}->{f_fac}", lowBound=0, upBound=1, cat=LpBinary) + + # Objective: maximize total shipped (sum of t) + prob += lpSum(t.values()) + + # Donor outflow caps + for g_fac in donors: + prob += lpSum(t[(g_fac, f_fac)] for f_fac in receivers if (g_fac, f_fac) in t) <= smax_map[g_fac] + + # Receiver inflow caps + for f_fac in receivers: + prob += lpSum(t[(g_fac, f_fac)] for g_fac in donors if (g_fac, f_fac) in t) <= deficit_map[f_fac] + + # Edge linking + minimum lot size + K_in/K_out + for (g_fac, f_fac), var in t.items(): + M = deficit_map[f_fac] + prob += var <= M * y[(g_fac, f_fac)] + # minimum lot only makes sense if the edge is used at all; also don't force Qmin to exceed deficit + prob += var >= min(Qmin_units[f_fac], deficit_map[f_fac]) * y[(g_fac, f_fac)] + + # Max donors per receiver + for f_fac in receivers: + prob += lpSum(y[(g_fac, f_fac)] for g_fac in donors if (g_fac, f_fac) in y) <= K_in + + # Max receivers per donor + for g_fac in donors: + prob += lpSum(y[(g_fac, f_fac)] for f_fac in receivers if (g_fac, f_fac) in y) <= K_out + + # Solve (default solver) + prob.solve() + + # Apply transfers to OB' + if LpStatus[prob.status] not in ("Optimal","Optimal Infeasible","Not Solved"): + # If not solved (rare), skip changes + continue + + # net change per facility + delta = {fac: 0.0 for fac in facs} + for (g_fac, f_fac), var in t.items(): + moved = value(var) or 0.0 + delta[g_fac] -= moved + delta[f_fac] += moved + + # write back OB' + sel = (df["district"].eq(d) & df["month"].eq(m) & df["item_code"].eq(i)) + df.loc[sel, "OB_prime"] = df.loc[sel].apply( + lambda r: r["OB"] + delta.get(r[facility_col], 0.0), axis=1 + ) + + # Mechanistic availability after redistribution + df["available_prop_redis"] = np.minimum(1.0, np.maximum(0.0, df["OB_prime"] / np.maximum(1e-9, df[amc_col]))) + return df + +def redistribute_pooling_lp( + df: pd.DataFrame, + tau_min: float = 0.25, # optional lower floor (this is the same as 7 days Q_min) + tau_max: float = 3.0, # storage/USAID style max + id_cols=("district","month","item_code"), + facility_col="fac_name", + amc_col="amc", + close_cols=("closing_bal","received","dispensed"), +) -> pd.DataFrame: + """ + Scenario 3: district-level pooling/push. Linear program maximizing total presumed availability. + + We linearize p = min(1, x/AMC) by introducing p_f in [0,1] with constraint AMC_f * p_f <= x_f. + + Returns df with 'available_prop_redis' updated for the pooling scenario. + """ + closing_bal, received, dispensed = close_cols + df = df.copy() + df["OB"] = compute_opening_balance(df) + + df["OB_prime"] = df["OB"] + + group_cols = list(id_cols) + for (d, m, i), g in df.groupby(group_cols): + g = g.copy() + facs = g[facility_col].tolist() + AMC = g.set_index(facility_col)[amc_col].astype(float) + OB0 = g.set_index(facility_col)["OB"].astype(float) + + total_stock = float(OB0.sum()) + if total_stock <= 1e-9: + # nothing to allocate + continue + + # LP + prob = LpProblem(f"Pooling_{d}_{m}_{i}", LpMaximize) + + # Decision vars + x = {f: LpVariable(f"x_{f}", lowBound=0) for f in facs} # allocated opening stock + p = {f: LpVariable(f"p_{f}", lowBound=0, upBound=1) for f in facs} # availability + + # Objective: maximize sum p_f (optionally weight by item importance) + prob += lpSum(p.values()) + + # Conservation + prob += lpSum(x.values()) == total_stock + + # Guardrails (optional) + for f in facs: + prob += x[f] >= tau_min * AMC[f] + prob += x[f] <= tau_max * AMC[f] + + # Linearization: AMC_f * p_f <= x_f + for f in facs: + prob += AMC[f] * p[f] <= x[f] + + prob.solve() + + if LpStatus[prob.status] not in ("Optimal","Optimal Infeasible","Not Solved"): + continue + + # Write back OB' and p' + x_sol = {f: value(var) or 0.0 for f, var in x.items()} + # OB' equals allocated x + sel = (df["district"].eq(d) & df["month"].eq(m) & df["item_code"].eq(i)) + df.loc[sel, "OB_prime"] = df.loc[sel].apply(lambda r: x_sol.get(r[facility_col], r["OB"]), axis=1) + + # Mechanistic availability p' = min(1, OB'/AMC) + df["available_prop_redis"] = np.minimum(1.0, np.maximum(0.0, df["OB_prime"] / np.maximum(1e-9, df[amc_col]))) + return df + +# ------------------------ +# 4) HIGH-LEVEL DRIVER +# ------------------------ +def run_redistribution_scenarios( + df: pd.DataFrame, + scenario: Literal["radius_30","radius_60","pooling"], + time_matrix: Optional[pd.DataFrame] = None, + # tunables + tau_keep: float = 2.0, + tau_tar: float = 1.0, + alpha: float = 0.7, + K_in: int = 2, + K_out: int = 2, + Qmin_days: float = 7.0, + tau_min: float = 0.75, + tau_max: float = 3.0, + # schema + id_cols=("district","month","item_code"), + facility_col="fac_name", + amc_col="amc", + close_cols=("closing_bal","received","dispensed"), + # aggregation for TLO input + aggregate_for_tlo: bool = True, + level_col: str = "Facility_Level" +) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: + """ + Runs exactly one scenario and returns: + - detailed_df: row-level dataframe with 'available_prop_redis' + - tlo_df (optional): aggregated to (item_code, district, Facility_Level, month) with mean availability + + For radius scenarios, 'time_matrix' is required. For pooling, it is ignored. + """ + if scenario in ("radius_30","radius_60"): + if time_matrix is None: + raise ValueError("time_matrix is required for radius-based scenarios.") + radius = 30 if scenario=="radius_30" else 60 + detailed = redistribute_radius_lp( + df=df, + time_matrix=time_matrix, + radius_minutes=radius, + tau_keep=tau_keep, + tau_tar=tau_tar, + alpha=alpha, + K_in=K_in, + K_out=K_out, + Qmin_days=Qmin_days, + id_cols=id_cols, + facility_col=facility_col, + amc_col=amc_col, + close_cols=close_cols, + ) + elif scenario == "pooling": + detailed = redistribute_pooling_lp( + df=df, + tau_min=tau_min, + tau_max=tau_max, + id_cols=id_cols, + facility_col=facility_col, + amc_col=amc_col, + close_cols=close_cols, + ) + else: + raise ValueError("scenario must be one of 'radius_30', 'radius_60', 'pooling'.") + + if not aggregate_for_tlo: + return detailed, None + + # Aggregate to TLO input grain: mean across facilities (match your current pipeline) + tlo = ( + detailed + .groupby(["item_code", "district", level_col, "month"], as_index=False) + .agg(new_available_prop=("available_prop_redis", "mean")) + .sort_values(["item_code","district",level_col,"month"]) + ) + return tlo + +# IMPLEMENT +# 1) Build a time matrix +fac_coords = lmis[['fac_name', 'district', 'lat','long']] +#T_car = build_time_matrices_by_district( +# fac_coords, +# mode="car", +# backend="osrm", +# osrm_base_url="https://router.project-osrm.org", +# max_chunk=50) + +# Store dictionary in pickle format +#with open(outputfilepath / "T_car.pkl", "wb") as f: +# pickle.dump(T_car, f) +# -> Commented out because it takes long to run. The result has been stored in pickle format + +# Load pre-generated dictionary +with open(outputfilepath / "T_car.pkl", "rb") as f: + T_car = pickle.load(f) + +#edges_flat = build_edges_within_radius_flat(T_car, max_minutes= 60) + +# Browse the number of eligible neighbours depending on allowable travel time +results = [] +for mins in [30, 60, 90, 120]: + edges_flat = build_edges_within_radius_flat(T_car, max_minutes=mins) + neighbors_count = pd.Series({fac: len(neigh) for fac, neigh in edges_flat.items()}) + mean = neighbors_count.mean() + sem = neighbors_count.sem() # standard error of mean + ci95 = 1.96 * sem + results.append({"radius": mins, "mean": mean, "ci95": ci95}) + +results_df = pd.DataFrame(results) + +# Plot +plt.figure(figsize=(6,4)) +plt.bar(results_df["radius"], results_df["mean"], yerr=results_df["ci95"], capsize=5, color="skyblue") +plt.xlabel("Travel time radius (minutes)") +plt.ylabel("Average number of facilities within radius") +plt.title("Average connectivity of facilities with 95% CI") +plt.xticks(results_df["radius"]) +plt.savefig(outputfilepath / "neighbour_count_by_max_travel_time") + +# A manual check shows that for distances greater than 60 minutes ORS underestimates the travel time a little +# TODO consider using google maps API + +# 2) Run Scenario 1 (1-hour radius) + +#Drop NAs +# TODO find a more generalisable solution for this issue (within the optimisation functions) +lmis = lmis[(lmis.amc != 0) & (lmis.amc.notna())] + +tlo_30 = run_redistribution_scenarios( + lmis, + scenario="radius_30", + time_matrix=T_car, + tau_keep=2.0, tau_tar=1.0, alpha=1, + K_in=2, K_out=2, Qmin_days=7.0, +) + +tlo_60 = run_redistribution_scenarios( + lmis, + scenario="radius_60", + time_matrix=T_car, + tau_keep=2.0, tau_tar=1.0, alpha=1, + K_in=2, K_out=2, Qmin_days=7.0, +) + +tlo_pooling = run_redistribution_scenarios( + lmis, + scenario="pooling", + tau_min=0.75, tau_max=3.0 +) + +tlo_availability_df = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_small.csv") +# Drop any scenario data previously included in the resourcefile +tlo_availability_df = tlo_availability_df[['Facility_ID', 'month','item_code', 'available_prop']] + +# Import item_category +program_item_mapping = pd.read_csv(path_for_new_resourcefiles / 'ResourceFile_Consumables_Item_Designations.csv')[['Item_Code', 'item_category']] +program_item_mapping = program_item_mapping.rename(columns ={'Item_Code': 'item_code'})[program_item_mapping.item_category.notna()] + +# 1.1.1 Attach district, facility level and item_category to this dataset +#---------------------------------------------------------------- +# Get TLO Facility_ID for each district and facility level +mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv") +districts = set(pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')['District']) +fac_levels = {'0', '1a', '1b', '2', '3', '4'} +tlo_availability_df = tlo_availability_df.merge(mfl[['District', 'Facility_Level', 'Facility_ID']], + on = ['Facility_ID'], how='left') + +tlo_availability_df = tlo_availability_df.merge(program_item_mapping, + on = ['item_code'], how='left') +tlo_availability_df = tlo_availability_df[~tlo_availability_df[['District', 'Facility_Level', 'month', 'item_code']].duplicated()] + + +comparison_df = tlo_30.merge(tlo_availability_df, left_on = ['district', 'Facility_Level', 'month', 'item_code'], + right_on = ['District', 'Facility_Level', 'month', 'item_code'], how = 'left', validate = "1:1") +comparison_df = comparison_df.merge(tlo_60, on = ['district', 'Facility_Level', 'month', 'item_code'], how = 'left', validate = "1:1") +print(comparison_df['new_available_prop_x'].mean(),comparison_df['new_available_prop_y'].mean(), +comparison_df['available_prop'].mean()) + +comparison_df['new_available_prop_x'] = np.maximum( + comparison_df['new_available_prop_x'], + comparison_df['available_prop'] +) +comparison_df['new_available_prop_y'] = np.maximum( + comparison_df['new_available_prop_y'], + comparison_df['available_prop'] +) + +print(comparison_df['new_available_prop_x'].mean(),comparison_df['new_available_prop_y'].mean(), +comparison_df['available_prop'].mean()) + +# TODO keep only government facilities? + From 797ba66763063ce320ddff8eeaab47f65ce63691 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Thu, 11 Sep 2025 10:01:01 +0700 Subject: [PATCH 04/16] make the radius redistribution more efficient --- ...ate_consumable_redistribution_scenarios.py | 356 ++++++++++++------ 1 file changed, 243 insertions(+), 113 deletions(-) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index cd3680556d..a7604868f3 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -9,7 +9,8 @@ from typing import Literal, Optional, Dict, Tuple import requests -from pulp import LpProblem, LpMaximize, LpVariable, LpBinary, LpStatus, value, lpSum, LpContinuous +from pulp import LpProblem, LpMaximize, LpVariable, LpBinary, LpStatus, value, lpSum, LpContinuous, PULP_CBC_CMD + # define a timestamp for script outputs timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M") @@ -331,51 +332,93 @@ def redistribute_radius_lp( # iterate per (district, month, item) group_cols = list(id_cols) + EPS = 1e-9 + EPS_AMC = 1e-6 + for (d, m, i), g in df.groupby(group_cols): # facilities in this slice g = g.copy() - facs = g[facility_col].tolist() - # build per-fac arrays - AMC = g.set_index(facility_col)[amc_col].astype(float) - OB0 = g.set_index(facility_col)["OB"].astype(float) + # ---------- (1) Select the correct time matrix for this district / slice ---------- + if isinstance(time_matrix, dict): + T_d = time_matrix.get(d) + if T_d is None or T_d.empty: + continue + else: + T_d = time_matrix + + # facilities present in this slice AND in the matrix index/cols + slice_facs = g[facility_col].dropna().unique().tolist() + present = [f for f in slice_facs if (f in T_d.index and f in T_d.columns)] + if len(present) < 2: + continue + + # subset and sanitize travel-times (NaN -> inf) + # This is to make sure that the redistribution LP is run only for those pairs for which a travel time could be calculated + T_sub = T_d.loc[present, present].replace(np.nan, np.inf) + # ---------- (2) Clean inputs (AMC, OB) ---------- + # build per-fac arrays + AMC = (g.set_index(facility_col)[amc_col] + .astype(float) + .replace([np.inf, -np.inf], np.nan) + .fillna(0.0)) + AMC[AMC <= 0.0] = EPS_AMC # avoid zero AMC + + OB0 = (g.set_index(facility_col)["OB"] + .astype(float) + .replace([np.inf, -np.inf], np.nan) + .fillna(0.0)) + + # keep only present facilities + AMC = AMC.reindex(present).fillna(EPS_AMC) + OB0 = OB0.reindex(present).fillna(0.0) + + # ---------- (3) Surplus/deficit with finite guards ---------- # donor surplus cap and receiver deficit surplus_cap = alpha * np.maximum(0.0, OB0.values - tau_keep * AMC.values) - deficit = np.maximum(0.0, tau_tar * AMC.values - OB0.values) - donors_mask = surplus_cap > 1e-9 # boolean vector marking facilities that actually have positive surplus - recv_mask = deficit > 1e-9 # boolean vector marking facilities that actually have a real need (deficit > 0). + deficit = np.maximum(0.0, tau_tar * AMC.values - OB0.values) + surplus_cap = np.where(np.isfinite(surplus_cap), surplus_cap, 0.0) + deficit = np.where(np.isfinite(deficit), deficit, 0.0) + donors_mask = surplus_cap > EPS + recv_mask = deficit > EPS if not donors_mask.any() or not recv_mask.any(): - # nothing to do for this slice continue donors = list(AMC.index[donors_mask]) receivers = list(AMC.index[recv_mask]) - # Feasible edges within radius - #feasible = build_edges_within_radius(time_matrix.loc[donors + receivers, donors + receivers], radius_minutes) - T_d = time_matrix[d] # time_matrix is dict {district -> DataFrame} - - # 2) limit to only facilities present in this slice - slice_facs = g[facility_col].unique().tolist() - T_d = T_d.loc[slice_facs, slice_facs] + smax_map = {fac: float(x) for fac, x in zip(AMC.index, surplus_cap)} # creates a dict with key = facility and value = donatable surplus + # this is the upper bound for how much a facility can donate + deficit_map = {fac: float(x) for fac, x in zip(AMC.index, deficit)} # creates a dict with key = facility and value = deficit + # this is the upper bound for how much a facility can receive - # 3) build feasible edges for this radius + # ---------- (4) Build feasible edges within radius (finite & <= radius) ---------- feasible = {} - for g_fac in T_d.index: - feas_mask = (T_d.loc[g_fac].to_numpy() <= radius_minutes) & np.isfinite(T_d.loc[g_fac].to_numpy()) - feasible[g_fac] = set(T_d.columns[feas_mask]) - {g_fac} - - # Minimum lot (units) for each receiver depends on its AMC - AMC_map = AMC.to_dict() - Qmin_units = {f: Qmin_days * max(1e-9, AMC_map[f]) for f in receivers} + for g_fac in present: + row = T_sub.loc[g_fac].to_numpy() + mask = (row <= radius_minutes) & np.isfinite(row) + feas = set(T_sub.columns[mask]) - {g_fac} + feasible[g_fac] = feas + + # ---------- (5) Edge set with tight M_edge and min-lot per receiver ---------- + Qmin_units = {f: float(max(Qmin_days * AMC[f], 0.0)) for f in present} + M_edge = {} + for g_fac in donors: + smax = smax_map.get(g_fac, 0.0) + if smax <= EPS: + continue + for f_fac in feasible.get(g_fac, set()): + if f_fac not in receivers: + continue + h = deficit_map.get(f_fac, 0.0) + M = min(h, smax) + if np.isfinite(M) and M > EPS: + M_edge[(g_fac, f_fac)] = M - # Big-M per edge: cannot exceed receiver deficit - deficit_map = dict(zip(AMC.index, deficit)) # creates a dict with key = facility and value = deficit - # this is the upper bound for how much a facility can receive - smax_map = dict(zip(AMC.index, surplus_cap)) # creates a dict with key = facility and value = donatable surplus - # this is the upper bound for how much a facility can donate + if not M_edge: + continue # nothing feasible to move # Build MILP prob = LpProblem(f"Redistribution_{d}_{m}_{i}", LpMaximize) @@ -383,134 +426,215 @@ def redistribute_radius_lp( # Variables t = {} # transfer amounts per (g,f): continuous >=0 y = {} # edge-activation binaries + for (g_fac, f_fac), M in M_edge.items(): + t[(g_fac, f_fac)] = LpVariable(f"t_{g_fac}->{f_fac}", lowBound=0, upBound=M, cat=LpContinuous) + y[(g_fac, f_fac)] = LpVariable(f"y_{g_fac}->{f_fac}", lowBound=0, upBound=1, cat=LpBinary) - for g_fac in donors: - for f_fac in feasible.get(g_fac, set()): - if f_fac in receivers: - t[(g_fac, f_fac)] = LpVariable(f"t_{g_fac}->{f_fac}", lowBound=0, cat=LpContinuous) - y[(g_fac, f_fac)] = LpVariable(f"y_{g_fac}->{f_fac}", lowBound=0, upBound=1, cat=LpBinary) - - # Objective: maximize total shipped (sum of t) + # objective: maximize total shipped (proxy for availability gain) prob += lpSum(t.values()) - # Donor outflow caps + # donor outflow caps for g_fac in donors: - prob += lpSum(t[(g_fac, f_fac)] for f_fac in receivers if (g_fac, f_fac) in t) <= smax_map[g_fac] + prob += lpSum(t[(g_fac, f_fac)] for f_fac in receivers if (g_fac, f_fac) in t) <= smax_map.get(g_fac, 0.0) - # Receiver inflow caps + # receiver inflow caps for f_fac in receivers: - prob += lpSum(t[(g_fac, f_fac)] for g_fac in donors if (g_fac, f_fac) in t) <= deficit_map[f_fac] + prob += lpSum(t[(g_fac, f_fac)] for g_fac in donors if (g_fac, f_fac) in t) <= deficit_map.get(f_fac, 0.0) - # Edge linking + minimum lot size + K_in/K_out - for (g_fac, f_fac), var in t.items(): - M = deficit_map[f_fac] - prob += var <= M * y[(g_fac, f_fac)] - # minimum lot only makes sense if the edge is used at all; also don't force Qmin to exceed deficit - prob += var >= min(Qmin_units[f_fac], deficit_map[f_fac]) * y[(g_fac, f_fac)] + # link t and y; enforce min-lot clipped by edge capacity + for (g_fac, f_fac), M in M_edge.items(): + prob += t[(g_fac, f_fac)] <= M * y[(g_fac, f_fac)] + qmin = min(Qmin_units.get(f_fac, 0.0), M) + # if qmin is basically zero, don't force a min lot + if qmin > EPS: + prob += t[(g_fac, f_fac)] >= qmin * y[(g_fac, f_fac)] - # Max donors per receiver + # cardinality limits for f_fac in receivers: prob += lpSum(y[(g_fac, f_fac)] for g_fac in donors if (g_fac, f_fac) in y) <= K_in - - # Max receivers per donor for g_fac in donors: prob += lpSum(y[(g_fac, f_fac)] for f_fac in receivers if (g_fac, f_fac) in y) <= K_out - # Solve (default solver) - prob.solve() + # solve + prob.solve(PULP_CBC_CMD(msg=False, cuts=0, presolve=True, threads=1)) - # Apply transfers to OB' - if LpStatus[prob.status] not in ("Optimal","Optimal Infeasible","Not Solved"): - # If not solved (rare), skip changes + if LpStatus[prob.status] not in ("Optimal", "Optimal Infeasible", "Not Solved"): continue - # net change per facility - delta = {fac: 0.0 for fac in facs} + # ---------- (7) Apply transfers to OB' ---------- + delta = {fac: 0.0 for fac in present} for (g_fac, f_fac), var in t.items(): - moved = value(var) or 0.0 - delta[g_fac] -= moved - delta[f_fac] += moved + moved = float(value(var) or 0.0) + if moved: + delta[g_fac] -= moved + delta[f_fac] += moved - # write back OB' sel = (df["district"].eq(d) & df["month"].eq(m) & df["item_code"].eq(i)) df.loc[sel, "OB_prime"] = df.loc[sel].apply( - lambda r: r["OB"] + delta.get(r[facility_col], 0.0), axis=1 + lambda r: (r["OB"] + delta.get(r[facility_col], 0.0)), axis=1 ) - # Mechanistic availability after redistribution - df["available_prop_redis"] = np.minimum(1.0, np.maximum(0.0, df["OB_prime"] / np.maximum(1e-9, df[amc_col]))) + print(d, m, i, + "donors:", len(donors), + "receivers:", len(receivers), + "edges:", len(M_edge)) + + # ---------- (8) Mechanistic availability after redistribution ---------- + df["available_prop_redis"] = np.minimum(1.0, np.maximum(0.0, df["OB_prime"] / np.maximum(EPS_AMC, + df[amc_col].astype( + float).values))) return df def redistribute_pooling_lp( df: pd.DataFrame, - tau_min: float = 0.25, # optional lower floor (this is the same as 7 days Q_min) - tau_max: float = 3.0, # storage/USAID style max + tau_min: float = 0.25, # lower floor in "months of demand" (≈ 7–8 days) + tau_max: float = 3.0, # upper ceiling (storage/policy max) id_cols=("district","month","item_code"), facility_col="fac_name", amc_col="amc", close_cols=("closing_bal","received","dispensed"), + keep_baseline_for_amc0: bool = True, # leave baseline availability where AMC≈0 + amc_eps: float = 1e-6 # threshold to treat AMC as "zero" ) -> pd.DataFrame: """ - Scenario 3: district-level pooling/push. Linear program maximizing total presumed availability. - - We linearize p = min(1, x/AMC) by introducing p_f in [0,1] with constraint AMC_f * p_f <= x_f. - - Returns df with 'available_prop_redis' updated for the pooling scenario. + Scenario 3: district-level pooling/push (robust). + Maximizes total availability with: + - NaN/inf guards on AMC/OB + - duplicate facility IDs collapsed within group + - floors scaled if total stock < sum floors + - optional 'excess' sink if total stock > sum ceilings + - availability computed safely; AMC≈0 rows keep baseline (optional) """ closing_bal, received, dispensed = close_cols - df = df.copy() - df["OB"] = compute_opening_balance(df) + out = df.copy() - df["OB_prime"] = df["OB"] + # Safe opening balance + out["OB"] = ( + out[closing_bal].astype(float) + - out[received].astype(float) + + out[dispensed].astype(float) + ).replace([np.inf, -np.inf], np.nan).fillna(0.0) + + # Default (will overwrite per group) + out["OB_prime"] = out["OB"] group_cols = list(id_cols) - for (d, m, i), g in df.groupby(group_cols): + + for (d, m, i), g in out.groupby(group_cols, sort=False): g = g.copy() - facs = g[facility_col].tolist() - AMC = g.set_index(facility_col)[amc_col].astype(float) - OB0 = g.set_index(facility_col)["OB"].astype(float) + # Clean AMC/OB and collapse duplicate facilities (unique index is required) + AMC = (g.set_index(facility_col)[amc_col] + .astype(float) + .replace([np.inf, -np.inf], np.nan) + .fillna(0.0)) + OB0 = (g.set_index(facility_col)["OB"] + .astype(float) + .replace([np.inf, -np.inf], np.nan) + .fillna(0.0)) + + if AMC.index.duplicated().any(): + AMC = AMC.groupby(level=0).first() + if OB0.index.duplicated().any(): + OB0 = OB0.groupby(level=0).sum() + + facs_all = AMC.index.tolist() total_stock = float(OB0.sum()) if total_stock <= 1e-9: - # nothing to allocate continue - # LP + # Positive-demand decision set + mask_pos = AMC >= amc_eps + facs_pos = AMC.index[mask_pos].tolist() + if len(facs_pos) == 0: + # No demand nodes to allocate to; leave OB as-is (or zero them if you prefer) + continue + + AMC_pos = AMC.loc[facs_pos] + # Floors/ceilings + lb = (tau_min * AMC_pos).astype(float) + ub = (tau_max * AMC_pos).astype(float) + + sum_lb = float(lb.sum()) + sum_ub = float(ub.sum()) + + # Feasibility: scale floors if not enough stock + if total_stock + 1e-9 < sum_lb: + scale = max(1e-9, total_stock / max(1e-9, sum_lb)) + lb = lb * scale + sum_lb = float(lb.sum()) + + # If stock exceeds ceilings, allow an excess sink to absorb leftovers + allow_excess_sink = total_stock > sum_ub + 1e-9 + + # ---------- LP ---------- prob = LpProblem(f"Pooling_{d}_{m}_{i}", LpMaximize) - # Decision vars - x = {f: LpVariable(f"x_{f}", lowBound=0) for f in facs} # allocated opening stock - p = {f: LpVariable(f"p_{f}", lowBound=0, upBound=1) for f in facs} # availability + # Decision variables + x = {f: LpVariable(f"x_{f}", lowBound=0) for f in facs_pos} # allocation + p = {f: LpVariable(f"p_{f}", lowBound=0, upBound=1) for f in facs_pos} # availability + + excess = LpVariable("excess", lowBound=0) if allow_excess_sink else None - # Objective: maximize sum p_f (optionally weight by item importance) + # Objective: maximize total availability prob += lpSum(p.values()) # Conservation - prob += lpSum(x.values()) == total_stock + if excess is None: + prob += lpSum(x.values()) == total_stock + else: + prob += lpSum(x.values()) + excess == total_stock - # Guardrails (optional) - for f in facs: - prob += x[f] >= tau_min * AMC[f] - prob += x[f] <= tau_max * AMC[f] + # Floors/ceilings (use scalar values to avoid float(Series) errors) + for f in facs_pos: + prob += x[f] >= float(lb.loc[f]) + prob += x[f] <= float(ub.loc[f]) - # Linearization: AMC_f * p_f <= x_f - for f in facs: - prob += AMC[f] * p[f] <= x[f] + # Linearization: AMC_f * p_f <= x_f (only for AMC>=amc_eps) + for f in facs_pos: + prob += float(AMC_pos.loc[f]) * p[f] <= x[f] + # Solve prob.solve() - - if LpStatus[prob.status] not in ("Optimal","Optimal Infeasible","Not Solved"): + if LpStatus[prob.status] != "Optimal": continue - # Write back OB' and p' - x_sol = {f: value(var) or 0.0 for f, var in x.items()} - # OB' equals allocated x - sel = (df["district"].eq(d) & df["month"].eq(m) & df["item_code"].eq(i)) - df.loc[sel, "OB_prime"] = df.loc[sel].apply(lambda r: x_sol.get(r[facility_col], r["OB"]), axis=1) + # Apply solution + x_sol = {f: float(value(var) or 0.0) for f, var in x.items()} + + sel = (out["district"].eq(d) & out["month"].eq(m) & out["item_code"].eq(i)) + # Map allocation to rows with positive demand + mask_rows_pos = sel & out[facility_col].isin(facs_pos) + out.loc[mask_rows_pos, "OB_prime"] = out.loc[mask_rows_pos, facility_col].map(x_sol).values + + # Rows with AMC= amc_eps + + p_mech = np.zeros(len(out), dtype=float) + # Mechanistic availability for positive-demand rows + denom = np.maximum(amc_eps, amc_safe[pos_mask].to_numpy()) + p_mech[pos_mask.values] = np.minimum(1.0, np.maximum( + 0.0, out.loc[pos_mask, "OB_prime"].to_numpy() / denom + )) + + if keep_baseline_for_amc0: + # Keep baseline availability where AMC≈0 + base = (out.loc[~pos_mask, "available_prop"] + .astype(float) + .replace([np.inf, -np.inf], np.nan) + .fillna(0.0)).to_numpy() + p_mech[~pos_mask.values] = base + else: + # Or: 1 if OB'>0 else 0 for AMC≈0 rows + p_mech[~pos_mask.values] = (out.loc[~pos_mask, "OB_prime"].to_numpy() > 0.0).astype(float) - # Mechanistic availability p' = min(1, OB'/AMC) - df["available_prop_redis"] = np.minimum(1.0, np.maximum(0.0, df["OB_prime"] / np.maximum(1e-9, df[amc_col]))) - return df + out["available_prop_redis"] = p_mech + return out # ------------------------ # 4) HIGH-LEVEL DRIVER @@ -637,7 +761,9 @@ def run_redistribution_scenarios( #Drop NAs # TODO find a more generalisable solution for this issue (within the optimisation functions) -lmis = lmis[(lmis.amc != 0) & (lmis.amc.notna())] +#lmis = lmis[(lmis.amc != 0) & (lmis.amc.notna())] + +#lmis = lmis[lmis.district == 'Lilongwe'] tlo_30 = run_redistribution_scenarios( lmis, @@ -683,23 +809,27 @@ def run_redistribution_scenarios( tlo_availability_df = tlo_availability_df[~tlo_availability_df[['District', 'Facility_Level', 'month', 'item_code']].duplicated()] -comparison_df = tlo_30.merge(tlo_availability_df, left_on = ['district', 'Facility_Level', 'month', 'item_code'], +comparison_df = tlo_30.rename(columns ={'new_available_prop': 'available_prop_30mins'}).merge(tlo_availability_df, left_on = ['district', 'Facility_Level', 'month', 'item_code'], right_on = ['District', 'Facility_Level', 'month', 'item_code'], how = 'left', validate = "1:1") -comparison_df = comparison_df.merge(tlo_60, on = ['district', 'Facility_Level', 'month', 'item_code'], how = 'left', validate = "1:1") -print(comparison_df['new_available_prop_x'].mean(),comparison_df['new_available_prop_y'].mean(), -comparison_df['available_prop'].mean()) +comparison_df = comparison_df.merge(tlo_60.rename(columns ={'new_available_prop': 'available_prop_60mins'}), on = ['district', 'Facility_Level', 'month', 'item_code'], how = 'left', validate = "1:1") +comparison_df = comparison_df.merge(tlo_pooling.rename(columns ={'new_available_prop': 'available_prop_pooling'}), on = ['district', 'Facility_Level', 'month', 'item_code'], how = 'left', validate = "1:1") +print(comparison_df['available_prop'].mean(),comparison_df['available_prop_30mins'].mean(), +comparison_df['available_prop_60mins'].mean(), comparison_df['available_prop_pooling'].mean()) -comparison_df['new_available_prop_x'] = np.maximum( - comparison_df['new_available_prop_x'], +comparison_df['available_prop_30mins'] = np.maximum( + comparison_df['available_prop_30mins'], comparison_df['available_prop'] ) -comparison_df['new_available_prop_y'] = np.maximum( - comparison_df['new_available_prop_y'], +comparison_df['available_prop_60mins'] = np.maximum( + comparison_df['available_prop_60mins'], comparison_df['available_prop'] ) - -print(comparison_df['new_available_prop_x'].mean(),comparison_df['new_available_prop_y'].mean(), -comparison_df['available_prop'].mean()) +comparison_df['available_prop_pooling'] = np.maximum( + comparison_df['available_prop_pooling'], + comparison_df['available_prop'] +) +print(comparison_df['available_prop'].mean(),comparison_df['available_prop_30mins'].mean(), +comparison_df['available_prop_60mins'].mean(), comparison_df['available_prop_pooling'].mean()) # TODO keep only government facilities? From 50720616c7abc3e91e42e0d914bb94a69f4c42de Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Wed, 24 Sep 2025 18:33:46 +0100 Subject: [PATCH 05/16] Updated to MILP functions - - get detailed logs of transfers --- ...ate_consumable_redistribution_scenarios.py | 109 +++++++++++++----- 1 file changed, 82 insertions(+), 27 deletions(-) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index a7604868f3..0f58c4d1c2 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -315,6 +315,7 @@ def redistribute_radius_lp( facility_col="fac_name", amc_col="amc", close_cols=("closing_bal","received","dispensed"), + return_edge_log: bool = True, ) -> pd.DataFrame: """ Scenario 1 or 2: local swaps within a time radius. @@ -324,18 +325,18 @@ def redistribute_radius_lp( Returns a copy of df with a new column 'available_prop_redis' for redistributed probability per row. """ closing_bal, received, dispensed = close_cols - df = df.copy() - df["OB"] = compute_opening_balance(df) - - # container for updated OB by (district, month, item, facility) - df["OB_prime"] = df["OB"] + out = df.copy() + out["OB"] = compute_opening_balance(out).replace([np.inf, -np.inf], np.nan).fillna(0.0) + out["OB_prime"] = out["OB"] # container for updated OB by (district, month, item, facility) # iterate per (district, month, item) group_cols = list(id_cols) EPS = 1e-9 EPS_AMC = 1e-6 - for (d, m, i), g in df.groupby(group_cols): + edge_logs = [] # will log dicts with district, month, item_code, donor, receiver, units_moved, travel_minutes + + for (d, m, i), g in out.groupby(group_cols, sort=False): # facilities in this slice g = g.copy() @@ -461,16 +462,25 @@ def redistribute_radius_lp( if LpStatus[prob.status] not in ("Optimal", "Optimal Infeasible", "Not Solved"): continue - # ---------- (7) Apply transfers to OB' ---------- + # apply transfers & LOG delta = {fac: 0.0 for fac in present} for (g_fac, f_fac), var in t.items(): moved = float(value(var) or 0.0) - if moved: + if moved > EPS_AMC: delta[g_fac] -= moved delta[f_fac] += moved - - sel = (df["district"].eq(d) & df["month"].eq(m) & df["item_code"].eq(i)) - df.loc[sel, "OB_prime"] = df.loc[sel].apply( + if return_edge_log: + edge_logs.append({ + "district": d, "month": m, "item_code": i, + "donor_fac": g_fac, "receiver_fac": f_fac, + "units_moved": moved, + "travel_minutes": float(T_sub.loc[g_fac, f_fac]) if np.isfinite( + T_sub.loc[g_fac, f_fac]) else np.nan + }) + + # write OB' + sel = (out["district"].eq(d) & out["month"].eq(m) & out["item_code"].eq(i)) + out.loc[sel, "OB_prime"] = out.loc[sel].apply( lambda r: (r["OB"] + delta.get(r[facility_col], 0.0)), axis=1 ) @@ -480,30 +490,44 @@ def redistribute_radius_lp( "edges:", len(M_edge)) # ---------- (8) Mechanistic availability after redistribution ---------- - df["available_prop_redis"] = np.minimum(1.0, np.maximum(0.0, df["OB_prime"] / np.maximum(EPS_AMC, - df[amc_col].astype( - float).values))) - return df + out["available_prop_redis"] = np.minimum( + 1.0, + np.maximum(0.0, out["OB_prime"] / np.maximum(EPS_AMC, out[amc_col].astype(float).values)) + ) + + # build log DataFrames + edge_log_df = pd.DataFrame(edge_logs, + columns=["district", "month", "item_code", "donor_fac", "receiver_fac", "units_moved", + "travel_minutes"]) if return_edge_log else None + + return out, edge_log_df def redistribute_pooling_lp( df: pd.DataFrame, - tau_min: float = 0.25, # lower floor in "months of demand" (≈ 7–8 days) + tau_min: float = 0.25, # lower floor in "months of demand" (≈ 7–8 days) - minimum transfer required tau_max: float = 3.0, # upper ceiling (storage/policy max) id_cols=("district","month","item_code"), facility_col="fac_name", amc_col="amc", close_cols=("closing_bal","received","dispensed"), keep_baseline_for_amc0: bool = True, # leave baseline availability where AMC≈0 - amc_eps: float = 1e-6 # threshold to treat AMC as "zero" + amc_eps: float = 1e-6, # threshold to treat AMC as "zero" + return_move_log: bool = True # return a detailed df showing net movement of consumables after redistribution ) -> pd.DataFrame: """ - Scenario 3: district-level pooling/push (robust). + Scenario 3: district-level pooling/push . Maximizes total availability with: - NaN/inf guards on AMC/OB - duplicate facility IDs collapsed within group - floors scaled if total stock < sum floors - optional 'excess' sink if total stock > sum ceilings - availability computed safely; AMC≈0 rows keep baseline (optional) + + Returns: + - out: original df plus columns: + OB, OB_prime, available_prop_redis, received_from_pool + where received_from_pool = OB_prime - OB (pos=received, neg=donated) + - (optional) move_log: per (district, month, item, facility) net movement summary """ closing_bal, received, dispensed = close_cols out = df.copy() @@ -519,6 +543,7 @@ def redistribute_pooling_lp( out["OB_prime"] = out["OB"] group_cols = list(id_cols) + move_rows = [] # optional movement log for (d, m, i), g in out.groupby(group_cols, sort=False): g = g.copy() @@ -571,13 +596,13 @@ def redistribute_pooling_lp( prob = LpProblem(f"Pooling_{d}_{m}_{i}", LpMaximize) # Decision variables - x = {f: LpVariable(f"x_{f}", lowBound=0) for f in facs_pos} # allocation - p = {f: LpVariable(f"p_{f}", lowBound=0, upBound=1) for f in facs_pos} # availability + x = {f: LpVariable(f"x_{f}", lowBound=0) for f in facs_pos} # how much stock to allocation to facility f + p = {f: LpVariable(f"p_{f}", lowBound=0, upBound=1) for f in facs_pos} # Availability probability proxy for facility f excess = LpVariable("excess", lowBound=0) if allow_excess_sink else None # Objective: maximize total availability - prob += lpSum(p.values()) + prob += lpSum(p.values()) # prioritize broad coverage—get as many facilities as possible to have stock # Conservation if excess is None: @@ -590,7 +615,7 @@ def redistribute_pooling_lp( prob += x[f] >= float(lb.loc[f]) prob += x[f] <= float(ub.loc[f]) - # Linearization: AMC_f * p_f <= x_f (only for AMC>=amc_eps) + # Linearization: AMC_f * p_f <= x_f (only for AMC>=amc_eps), i.e p = x/AMC for f in facs_pos: prob += float(AMC_pos.loc[f]) * p[f] <= x[f] @@ -602,20 +627,36 @@ def redistribute_pooling_lp( # Apply solution x_sol = {f: float(value(var) or 0.0) for f, var in x.items()} - sel = (out["district"].eq(d) & out["month"].eq(m) & out["item_code"].eq(i)) + sel = (out["district"].eq(d) & out["month"].eq(m) & out["item_code"].eq(i)) # selection mask + # boolean filter that picks out the subset of rows in out corresponding to the current group (district, month, item) # Map allocation to rows with positive demand - mask_rows_pos = sel & out[facility_col].isin(facs_pos) - out.loc[mask_rows_pos, "OB_prime"] = out.loc[mask_rows_pos, facility_col].map(x_sol).values + mask_rows_pos = sel & out[facility_col].isin(facs_pos) # only those facilities with a positive demand/AMC + out.loc[mask_rows_pos, "OB_prime"] = out.loc[mask_rows_pos, facility_col].map(x_sol).values # assign new opening balance # Rows with AMC= amc_eps - p_mech = np.zeros(len(out), dtype=float) + p_mech = np.zeros(len(out), dtype=float) # Prepare an output array for the new availability (available_prop_redis), initialized to 0 for all rows. # Mechanistic availability for positive-demand rows denom = np.maximum(amc_eps, amc_safe[pos_mask].to_numpy()) p_mech[pos_mask.values] = np.minimum(1.0, np.maximum( @@ -634,8 +675,21 @@ def redistribute_pooling_lp( p_mech[~pos_mask.values] = (out.loc[~pos_mask, "OB_prime"].to_numpy() > 0.0).astype(float) out["available_prop_redis"] = p_mech + # NEW: per-row received_from_pool (using each row's OB) + out["received_from_pool"] = out["OB_prime"] - out["OB"] + + if return_move_log: + move_log = pd.DataFrame( + move_rows, + columns=["district", "month", "item_code", "facility", "received_from_pool", "x_allocated", "OB0_agg"] + ) + return out, move_log + return out +# pooled_df, pool_moves = redistribute_pooling_lp(lmis, tau_min=0.25, tau_max=3.0, return_move_log=True) + + # ------------------------ # 4) HIGH-LEVEL DRIVER # ------------------------ @@ -696,6 +750,7 @@ def run_redistribution_scenarios( facility_col=facility_col, amc_col=amc_col, close_cols=close_cols, + return_move_log=False, ) else: raise ValueError("scenario must be one of 'radius_30', 'radius_60', 'pooling'.") From ed042146f010bae93392989277b859dc1affba68 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Thu, 25 Sep 2025 15:05:56 +0100 Subject: [PATCH 06/16] Allow for resource pooling at a lower level than district --- ...ate_consumable_redistribution_scenarios.py | 284 +++++++++++++----- 1 file changed, 217 insertions(+), 67 deletions(-) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index 0f58c4d1c2..9b0feeafa5 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -10,7 +10,7 @@ import requests from pulp import LpProblem, LpMaximize, LpVariable, LpBinary, LpStatus, value, lpSum, LpContinuous, PULP_CBC_CMD - +from math import ceil # define a timestamp for script outputs timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M") @@ -295,6 +295,127 @@ def build_edges_within_radius_flat(T_by_dist: dict, max_minutes: float) -> dict[ # a = build_edges_within_radius(T_car, max_minutes = 18) +# Defining clusters of health facilities within district +# This function helps find the facilities which would be appropriate cluster centers +def _farthest_first_seeds(T: pd.DataFrame, k: int, big: float = 1e9) -> list: + """ + Pick k seed medoids via farthest-first traversal on a travel-time matrix. + Treat inf/NaN distances as 'big' so disconnected components get separate seeds. + """ + n = T.shape[0] + facs = T.index.tolist() + D = T.to_numpy().astype(float) + D[~np.isfinite(D)] = big + + # Start at the row with largest average distance (covers sparse areas first) + start = int(np.nanargmax(np.nanmean(D, axis=1))) # the remotest facility + seeds_idx = [start] + + # Iteratively add the point with max distance to its nearest seed + for _ in range(1, k): + # min distance to any existing seed for every point + min_to_seed = np.min(D[:, seeds_idx], axis=1) # this has a dimension of [facs, number of seeds] + next_idx = int(np.argmax(min_to_seed)) # for each facility find the distance to its nearest seed + # and the facility farthest from the nearest seed becomes the next seed + if next_idx in seeds_idx: + # Fallback: pick any non-seed with highest min_to_seed + candidates = [i for i in range(n) if i not in seeds_idx] + if not candidates: + break + next_idx = int(candidates[np.argmax(min_to_seed[candidates])]) + seeds_idx.append(next_idx) + + return [facs[i] for i in seeds_idx] # list of length k representing the clustering points + +# Assign each facility to its nearest seed subject to a hard cluster capacity (≤ X members) +def _assign_to_cluster_with_fixed_capacity(T: pd.DataFrame, seeds: list, capacity: int, big: float = 1e9) -> Dict[str, int]: + """ + Greedy assignment of facilities to nearest seed that still has capacity. + Returns: mapping facility -> seed_index (position in seeds list). + """ + facs = T.index.tolist() + D = T.loc[facs, seeds].to_numpy().astype(float) # Distance of all facilities from the k seeds + D[~np.isfinite(D)] = big + + # each facility: nearest distance to any seed (for stable ordering) + nearest = D.min(axis=1) # find the shortest distance to a cluster for each facility + order = np.argsort(nearest) # sort all facilities in ascending order of their distance from the nearest facility + + cap_left = {j: capacity for j in range(len(seeds))} # the capacity left for each seed + assign = {} + + for idx in order: + f = facs[idx] + # try seeds in ascending distance + seq = np.argsort(D[idx, :]) # the sequence of seeds most suitable for idx + placed = False + for j in seq: + if cap_left[j] > 0: + assign[f] = j + cap_left[j] -= 1 + placed = True + break + if not placed: + # total capacity >= n, so this should be rare; if it happens, put in least-loaded seed + j = min(cap_left, key=lambda jj: cap_left[jj]) + assign[f] = j + cap_left[j] -= 1 + + return assign + +def capacity_clusters_for_district( + T_d: pd.DataFrame, cluster_size: int = 5, big: float = 1e9, refine_swaps: int = 0 +) -> Dict[str, str]: + """ + Build ~equal-size clusters (size<=cluster_size) from a district's travel-time matrix via + capacity-constrained k-medoids (farthest-first seeds + greedy capacity assignment). + + Returns: {facility_id -> cluster_id} (cluster ids like 'C00','C01',...) + """ + facs = T_d.index.tolist() + n = len(facs) + if n == 0: + return {} + if n <= cluster_size: + return {f: "C00" for f in facs} + + k = ceil(n / cluster_size) + seeds = _farthest_first_seeds(T_d, k=k, big=big) + assign_seed_idx = _assign_to_cluster_with_fixed_capacity(T_d, seeds=seeds, capacity=cluster_size, big=big) + + # Optional tiny refinement: (off by default) + # You could add 1–2 passes of medoid swap within clusters to reduce intra-cluster travel. + + # Build cluster ids in seed order + seed_to_cid = {j: f"C{j:02d}" for j in range(len(seeds))} + return {f: seed_to_cid[assign_seed_idx[f]] for f in facs} + +def build_capacity_clusters_all( + T_by_dist: Dict[str, pd.DataFrame], cluster_size: int = 5 +) -> pd.Series: + """ + Apply capacity clustering to all districts. + Args: + T_by_dist: {'DistrictName': time_matrix (minutes, square DF with facility ids)} + cluster_size: desired max cluster size (e.g., 5) + + Returns: + pd.Series mapping facility_id -> cluster_id names scoped by district, e.g. 'Nkhotakota#C03' + """ + mappings = [] + for d, T_d in T_by_dist.items(): + if T_d is None or T_d.empty: + continue + local_map = capacity_clusters_for_district(T_d, cluster_size=cluster_size) + if not local_map: + continue + s = pd.Series(local_map, name="cluster_id") + s = s.map(lambda cid: f"{d}#{cid}") # scope cluster name by district + mappings.append(s) + if not mappings: + return pd.Series(dtype=object) + return pd.concat(mappings) + # ----------------------------------------------- # 3) LP/MILP Redistribution # ----------------------------------------------- @@ -512,7 +633,9 @@ def redistribute_pooling_lp( close_cols=("closing_bal","received","dispensed"), keep_baseline_for_amc0: bool = True, # leave baseline availability where AMC≈0 amc_eps: float = 1e-6, # threshold to treat AMC as "zero" - return_move_log: bool = True # return a detailed df showing net movement of consumables after redistribution + return_move_log: bool = True, # return a detailed df showing net movement of consumables after redistribution + pooling_level: str = "district", # "district" or "cluster" + cluster_map: pd.Series | None = None # required if pooling_level=="cluster"; this specifes which cluster each facility belongs to ) -> pd.DataFrame: """ Scenario 3: district-level pooling/push . @@ -523,12 +646,21 @@ def redistribute_pooling_lp( - optional 'excess' sink if total stock > sum ceilings - availability computed safely; AMC≈0 rows keep baseline (optional) + Pooling redistribution that can operate at the district level (default) + or within fixed-size clusters inside districts. + + If pooling_level == "cluster", you must pass cluster_map: Series mapping facility_id -> cluster_id + (cluster ids should already be district-scoped, e.g., "Dedza#C01"). + Returns: - out: original df plus columns: OB, OB_prime, available_prop_redis, received_from_pool where received_from_pool = OB_prime - OB (pos=received, neg=donated) - (optional) move_log: per (district, month, item, facility) net movement summary """ + if pooling_level not in ("district", "cluster"): + raise ValueError("pooling_level must be 'district' or 'cluster'.") + closing_bal, received, dispensed = close_cols out = df.copy() @@ -542,147 +674,140 @@ def redistribute_pooling_lp( # Default (will overwrite per group) out["OB_prime"] = out["OB"] + # Attach cluster if needed + if pooling_level == "cluster": + if cluster_map is None: + raise ValueError("cluster_map is required when pooling_level='cluster'.") + # cluster_map: index = facility_id (facility_col), value = cluster_id (already district-scoped) + out = out.merge( + cluster_map.rename("cluster_id"), + how="left", + left_on=facility_col, + right_index=True, + ) + if out["cluster_id"].isna().any(): + # facilities missing a cluster—assign singleton clusters to keep them + out["cluster_id"] = out["cluster_id"].fillna( + out["district"].astype(str) + "#CXX_" + out[facility_col].astype(str)) + group_cols = list(id_cols) + node_label = "district" + if pooling_level == "cluster": + group_cols = ["cluster_id", "month", "item_code"] + node_label = "cluster_id" move_rows = [] # optional movement log - for (d, m, i), g in out.groupby(group_cols, sort=False): + for keys, g in out.groupby(group_cols, sort=False): g = g.copy() + node_val, m, i = keys if pooling_level == "cluster" else (g["district"].iloc[0], keys[1], keys[2]) - # Clean AMC/OB and collapse duplicate facilities (unique index is required) + # Clean AMC/OB and dedupe AMC = (g.set_index(facility_col)[amc_col] - .astype(float) - .replace([np.inf, -np.inf], np.nan) - .fillna(0.0)) + .astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0)) OB0 = (g.set_index(facility_col)["OB"] - .astype(float) - .replace([np.inf, -np.inf], np.nan) - .fillna(0.0)) - + .astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0)) if AMC.index.duplicated().any(): AMC = AMC.groupby(level=0).first() if OB0.index.duplicated().any(): OB0 = OB0.groupby(level=0).sum() - facs_all = AMC.index.tolist() total_stock = float(OB0.sum()) if total_stock <= 1e-9: continue - # Positive-demand decision set + # Participants mask_pos = AMC >= amc_eps facs_pos = AMC.index[mask_pos].tolist() - if len(facs_pos) == 0: - # No demand nodes to allocate to; leave OB as-is (or zero them if you prefer) + if not facs_pos: continue AMC_pos = AMC.loc[facs_pos] - # Floors/ceilings lb = (tau_min * AMC_pos).astype(float) ub = (tau_max * AMC_pos).astype(float) - - sum_lb = float(lb.sum()) + sum_lb = float(lb.sum()); sum_ub = float(ub.sum()) - # Feasibility: scale floors if not enough stock + # Feasibility fixes if total_stock + 1e-9 < sum_lb: scale = max(1e-9, total_stock / max(1e-9, sum_lb)) lb = lb * scale sum_lb = float(lb.sum()) - - # If stock exceeds ceilings, allow an excess sink to absorb leftovers allow_excess_sink = total_stock > sum_ub + 1e-9 # ---------- LP ---------- - prob = LpProblem(f"Pooling_{d}_{m}_{i}", LpMaximize) - - # Decision variables - x = {f: LpVariable(f"x_{f}", lowBound=0) for f in facs_pos} # how much stock to allocation to facility f - p = {f: LpVariable(f"p_{f}", lowBound=0, upBound=1) for f in facs_pos} # Availability probability proxy for facility f + prob = LpProblem(f"Pooling_{node_val}_{m}_{i}", LpMaximize) + x = {f: LpVariable(f"x_{f}", lowBound=0) for f in facs_pos} + p = {f: LpVariable(f"p_{f}", lowBound=0, upBound=1) for f in facs_pos} excess = LpVariable("excess", lowBound=0) if allow_excess_sink else None - # Objective: maximize total availability - prob += lpSum(p.values()) # prioritize broad coverage—get as many facilities as possible to have stock + prob += lpSum(p.values()) # maximize total availability - # Conservation if excess is None: prob += lpSum(x.values()) == total_stock else: prob += lpSum(x.values()) + excess == total_stock - # Floors/ceilings (use scalar values to avoid float(Series) errors) for f in facs_pos: prob += x[f] >= float(lb.loc[f]) prob += x[f] <= float(ub.loc[f]) - - # Linearization: AMC_f * p_f <= x_f (only for AMC>=amc_eps), i.e p = x/AMC - for f in facs_pos: prob += float(AMC_pos.loc[f]) * p[f] <= x[f] - # Solve prob.solve() if LpStatus[prob.status] != "Optimal": continue - # Apply solution + # Apply solution to OB' x_sol = {f: float(value(var) or 0.0) for f, var in x.items()} - sel = (out["district"].eq(d) & out["month"].eq(m) & out["item_code"].eq(i)) # selection mask - # boolean filter that picks out the subset of rows in out corresponding to the current group (district, month, item) - # Map allocation to rows with positive demand - mask_rows_pos = sel & out[facility_col].isin(facs_pos) # only those facilities with a positive demand/AMC - out.loc[mask_rows_pos, "OB_prime"] = out.loc[mask_rows_pos, facility_col].map(x_sol).values # assign new opening balance + # selection mask for this node-month-item + if pooling_level == "district": + sel = (out["district"].eq(node_val) & out["month"].eq(m) & out["item_code"].eq(i)) + else: + sel = (out["cluster_id"].eq(node_val) & out["month"].eq(m) & out["item_code"].eq(i)) + + mask_rows_pos = sel & out[facility_col].isin(facs_pos) + out.loc[mask_rows_pos, "OB_prime"] = out.loc[mask_rows_pos, facility_col].map(x_sol).values - # Rows with AMC= amc_eps - p_mech = np.zeros(len(out), dtype=float) # Prepare an output array for the new availability (available_prop_redis), initialized to 0 for all rows. - # Mechanistic availability for positive-demand rows + p_mech = np.zeros(len(out), dtype=float) denom = np.maximum(amc_eps, amc_safe[pos_mask].to_numpy()) - p_mech[pos_mask.values] = np.minimum(1.0, np.maximum( - 0.0, out.loc[pos_mask, "OB_prime"].to_numpy() / denom - )) + p_mech[pos_mask.values] = np.minimum( + 1.0, np.maximum(0.0, out.loc[pos_mask, "OB_prime"].to_numpy() / denom) + ) if keep_baseline_for_amc0: - # Keep baseline availability where AMC≈0 base = (out.loc[~pos_mask, "available_prop"] - .astype(float) - .replace([np.inf, -np.inf], np.nan) - .fillna(0.0)).to_numpy() + .astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0)).to_numpy() p_mech[~pos_mask.values] = base else: - # Or: 1 if OB'>0 else 0 for AMC≈0 rows p_mech[~pos_mask.values] = (out.loc[~pos_mask, "OB_prime"].to_numpy() > 0.0).astype(float) out["available_prop_redis"] = p_mech - # NEW: per-row received_from_pool (using each row's OB) out["received_from_pool"] = out["OB_prime"] - out["OB"] if return_move_log: - move_log = pd.DataFrame( - move_rows, - columns=["district", "month", "item_code", "facility", "received_from_pool", "x_allocated", "OB0_agg"] - ) + move_log = pd.DataFrame(move_rows) return out, move_log return out @@ -812,14 +937,40 @@ def run_redistribution_scenarios( # A manual check shows that for distances greater than 60 minutes ORS underestimates the travel time a little # TODO consider using google maps API -# 2) Run Scenario 1 (1-hour radius) - #Drop NAs # TODO find a more generalisable solution for this issue (within the optimisation functions) #lmis = lmis[(lmis.amc != 0) & (lmis.amc.notna())] #lmis = lmis[lmis.district == 'Lilongwe'] +# Trying the updated pool-based redistribution +# 1) Build clusters from your per-district travel-time matrices (minutes) +# T_car_by_dist: {"District A": DF(index=fac_ids, cols=fac_ids), ...} +cluster_size = 3 +cluster_series = build_capacity_clusters_all(T_car, cluster_size=cluster_size) +# cluster_series is a pd.Series: index=facility_id, value like "District A#C00", "District A#C01", ... + +# 2) Pool at the cluster level +pooled_df, cluster_moves = redistribute_pooling_lp( + df=lmis, # your LMIS dataframe + tau_min=0.25, tau_max=3.0, + pooling_level="cluster", + cluster_map=cluster_series, + return_move_log=True, +) +pooled_df.to_csv(outputfilepath/ 'clustering_n3_df.csv', index=False) + +# 3) Pool at the full district level (upper bound) +pooled_district_df, district_moves = redistribute_pooling_lp( + df=lmis, + tau_min=0.25, tau_max=3.0, + pooling_level="district", + return_move_log=True, +) +pooled_df.to_csv(outputfilepath/ 'clustering_district_df.csv', index=False) + +# 2) Run Scenario 1 (1-hour radius) + tlo_30 = run_redistribution_scenarios( lmis, scenario="radius_30", @@ -887,4 +1038,3 @@ def run_redistribution_scenarios( comparison_df['available_prop_60mins'].mean(), comparison_df['available_prop_pooling'].mean()) # TODO keep only government facilities? - From 8b5542764ec98a3f7da11a27d40f51583aa9fc63 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Mon, 20 Oct 2025 16:46:28 +0100 Subject: [PATCH 07/16] plot stock adequacy by district and month --- ...ate_consumable_redistribution_scenarios.py | 140 ++++++++++++++++-- 1 file changed, 126 insertions(+), 14 deletions(-) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index 9b0feeafa5..ef3910232e 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -68,8 +68,126 @@ #lmis = lmis[lmis["Facility_Level"] == '1a'] # ----------------------------------- -# 1) Estimate travel time matrix +# 1) Data exploration # ----------------------------------- +def compute_opening_balance(df: pd.DataFrame) -> pd.Series: + """ + Opening balance from same-month records: + OB = closing_bal - received + dispensed. + This is equivalent to OB_(m) = CB_(m-1) + """ + return df["closing_bal"] - df["received"] + df["dispensed"] + +def generate_stock_adequacy_heatmap( + df: pd.DataFrame, + figures_path: Path = Path("figures"), + filename: str = "heatmap_adequacy_opening_vs_3xamc.png", + include_missing_as_fail: bool = False, # if True, items with NaN opening/amc count as NOT adequate + as_pct: bool = True, + amc_threshold: float = 3.0, + decimals: int = 0, + cmap: str = "RdYlGn", + figsize=(14, 10), + xtick_rotation: int = 45, + ytick_rotation: int = 0, +): + """ + Heatmap values: for each (month, district), the % of item_code groups where + sum(opening_balance over Facility_ID) >= 3 * sum(amc over Facility_ID). + """ + + df = df.copy() + df["opening_bal"] = compute_opening_balance(df).replace([np.inf, -np.inf], np.nan).fillna(0.0) + + # ---- 1) Make sure 'month' is sortable chronologically ---- + # ---- 2) Aggregate to (month, district, item_code) over facilities ---- + agg = ( + df.groupby(["month", "district", "item_code"], dropna=False) + .agg(opening_bal=("opening_bal", "sum"), + amc=("amc", "sum")) + .reset_index() + ) + + # ---- 3) Adequacy indicator per (month, district, item_code) ---- + if include_missing_as_fail: + # NaNs treated as fail -> fill with NaN-safe compare: set False when either missing + adequate = (agg["opening_bal"].fillna(np.nan) >= amc_threshold * agg["amc"].fillna(np.nan)) & \ + agg[["opening_bal", "amc"]].notna().all(axis=1) + else: + valid = agg.dropna(subset=["opening_bal", "amc"]) + adequate = pd.Series(False, index=agg.index) + adequate.loc[valid.index] = valid["opening_bal"] >= amc_threshold * valid["amc"] + + agg["adequate"] = adequate.astype(int) # 1/0 + + # ---- 4) % adequate per (month, district) across item_code ---- + # Denominator = number of item_code present after NaN handling + if include_missing_as_fail: + denom = agg.groupby(["month", "district"])["item_code"].nunique() + numer = agg.groupby(["month", "district"])["adequate"].sum() + else: + valid_mask = agg[["opening_bal", "amc"]].notna().all(axis=1) + denom = agg[valid_mask].groupby(["month", "district"])["item_code"].nunique() + numer = agg[valid_mask].groupby(["month", "district"])["adequate"].sum() + + pct = (numer / denom * 100).replace([np.inf, -np.inf], np.nan) + + # Bring to a DataFrame for pivoting + pct_df = pct.reset_index(name="pct_adequate") + + # ---- 5) Pivot: districts (rows) x months (columns) ---- + # Sort months by _month_sort and use _month_label as the displayed column name + month_order = ( + pct_df[["month"]] + .drop_duplicates() + .sort_values("month") + ["month"] + .tolist() + ) + heatmap_df = pct_df.pivot(index="district", columns="month", values="pct_adequate") + heatmap_df = heatmap_df.reindex(columns=month_order) + + # Optional rounding for nicer colorbar ticks (doesn't affect color) + if decimals is not None: + heatmap_df = heatmap_df.round(decimals) + + # ---- 6) Plot heatmap ---- + sns.set(font_scale=1.0) + fig, ax = plt.subplots(figsize=figsize) + + hm = sns.heatmap( + heatmap_df, + cmap=cmap, + cbar_kws={"label": f"% of consumables with Opening Balance ≥ {amc_threshold} × AMC"}, + ax=ax, + annot=True, annot_kws={"size": 12}, + vmin = 0, vmax = 100) + + ax.set_xlabel("Month") + ax.set_ylabel("District") + ax.set_xticklabels(ax.get_xticklabels(), rotation=xtick_rotation) + ax.set_yticklabels(ax.get_yticklabels(), rotation=ytick_rotation) + + # Keep colorbar ticks plain (no scientific notation) + try: + cbar_ax = ax.figure.axes[-1] + cbar_ax.ticklabel_format(style="plain") + except Exception: + pass + + # ---- 7) Save & return ---- + figures_path.mkdir(parents=True, exist_ok=True) + outpath = figures_path / filename + plt.savefig(outpath, dpi=300, bbox_inches="tight") + plt.show() + plt.close(fig) + + return fig, ax, heatmap_df + +# ----------------------------------- +# 2) Estimate travel time matrix +# ----------------------------------- + def _chunk_indices(n: int, chunk: int): """Yield (start, end) index pairs for chunking 0..n-1.""" for s in range(0, n, chunk): @@ -242,17 +360,8 @@ def build_time_matrices_by_district( return matrices # ----------------------------------------------- -# 2) Data prep for redistribution linear program +# 3) Data prep for redistribution linear program # ----------------------------------------------- -def compute_opening_balance(df: pd.DataFrame) -> pd.Series: - """ - Opening balance from same-month records: - OB = closing_bal - received + dispensed. - This is equivalent to OB_(m) = CB_(m-1) - """ - return df["closing_bal"] - df["received"] + df["dispensed"] - - def presumed_availability(ob, amc, eps=1e-9) -> float: """ Presumed likelihood of cons availability = p = min(1, OB/AMC) at month start (no additional receipts considered @@ -330,7 +439,7 @@ def _farthest_first_seeds(T: pd.DataFrame, k: int, big: float = 1e9) -> list: # Assign each facility to its nearest seed subject to a hard cluster capacity (≤ X members) def _assign_to_cluster_with_fixed_capacity(T: pd.DataFrame, seeds: list, capacity: int, big: float = 1e9) -> Dict[str, int]: """ - Greedy assignment of facilities to nearest seed that still has capacity. + Greedy assignment of facilities to nearest seed that still has capacity (based on maximum cluster size). Returns: mapping facility -> seed_index (position in seeds list). """ facs = T.index.tolist() @@ -701,7 +810,6 @@ def redistribute_pooling_lp( g = g.copy() node_val, m, i = keys if pooling_level == "cluster" else (g["district"].iloc[0], keys[1], keys[2]) - # Clean AMC/OB and dedupe AMC = (g.set_index(facility_col)[amc_col] .astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0)) OB0 = (g.set_index(facility_col)["OB"] @@ -803,7 +911,7 @@ def redistribute_pooling_lp( else: p_mech[~pos_mask.values] = (out.loc[~pos_mask, "OB_prime"].to_numpy() > 0.0).astype(float) - out["available_prop_redis"] = p_mech + out["available_prop_redis"] = np.maximum(p_mech,out["available_prop"]) out["received_from_pool"] = out["OB_prime"] - out["OB"] if return_move_log: @@ -902,6 +1010,10 @@ def run_redistribution_scenarios( # osrm_base_url="https://router.project-osrm.org", # max_chunk=50) +# Plot stock adequacy by district and month +fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath, amc_threshold = 3, filename = "mth_district_stock_adequacy_3amc.png") +fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath, amc_threshold = 2, filename = "mth_district_stock_adequacy_2amc.png") + # Store dictionary in pickle format #with open(outputfilepath / "T_car.pkl", "wb") as f: # pickle.dump(T_car, f) From 9f0673f444b7f82a9d041e2ffccbac8984349499 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Mon, 20 Oct 2025 17:39:09 +0100 Subject: [PATCH 08/16] add functionality to heatmap function (less than AMC or greater than AMC) + Add average row and column --- ...ate_consumable_redistribution_scenarios.py | 114 +++++++++++++----- 1 file changed, 86 insertions(+), 28 deletions(-) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index ef3910232e..8857725e05 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -1,6 +1,7 @@ import datetime from pathlib import Path import pickle +import calendar import matplotlib.pyplot as plt import numpy as np @@ -82,14 +83,18 @@ def generate_stock_adequacy_heatmap( df: pd.DataFrame, figures_path: Path = Path("figures"), filename: str = "heatmap_adequacy_opening_vs_3xamc.png", + y_var: str = "district", # the variable on the y-axis of the heatmap + value_var: str = "item_code", # the count variable on the basis of which the values are calculated + value_label: str = "", # label describing the values in the heatmap include_missing_as_fail: bool = False, # if True, items with NaN opening/amc count as NOT adequate - as_pct: bool = True, amc_threshold: float = 3.0, + compare: str = "ge" , # "ge" for >= threshold*AMC, "le" for <= threshold*AMC decimals: int = 0, cmap: str = "RdYlGn", - figsize=(14, 10), + figsize= None, xtick_rotation: int = 45, ytick_rotation: int = 0, + annotation: bool = True, ): """ Heatmap values: for each (month, district), the % of item_code groups where @@ -99,10 +104,16 @@ def generate_stock_adequacy_heatmap( df = df.copy() df["opening_bal"] = compute_opening_balance(df).replace([np.inf, -np.inf], np.nan).fillna(0.0) - # ---- 1) Make sure 'month' is sortable chronologically ---- + # --- 1. Ensure month is int and build label --- + df["month"] = pd.to_numeric(df["month"], errors="coerce").astype("Int64") + df = df.dropna(subset=["month"]) + df["month"] = df["month"].astype(int) + + df["_month_label"] = df["month"].map(lambda m: calendar.month_abbr[m]) + # ---- 2) Aggregate to (month, district, item_code) over facilities ---- agg = ( - df.groupby(["month", "district", "item_code"], dropna=False) + df.groupby(["month", "_month_label", y_var, value_var], dropna=False) .agg(opening_bal=("opening_bal", "sum"), amc=("amc", "sum")) .reset_index() @@ -111,60 +122,85 @@ def generate_stock_adequacy_heatmap( # ---- 3) Adequacy indicator per (month, district, item_code) ---- if include_missing_as_fail: # NaNs treated as fail -> fill with NaN-safe compare: set False when either missing - adequate = (agg["opening_bal"].fillna(np.nan) >= amc_threshold * agg["amc"].fillna(np.nan)) & \ - agg[["opening_bal", "amc"]].notna().all(axis=1) + ok = agg[["opening_bal", "amc"]].notna().all(axis=1) + left = agg["opening_bal"] + right = amc_threshold * agg["amc"] + if compare == "le": + cond = (left <= right) & ok + else: # default to ">=" + cond = (left >= right) & ok else: valid = agg.dropna(subset=["opening_bal", "amc"]) - adequate = pd.Series(False, index=agg.index) - adequate.loc[valid.index] = valid["opening_bal"] >= amc_threshold * valid["amc"] + cond = pd.Series(False, index=agg.index) + left = valid["opening_bal"] + right = amc_threshold * valid["amc"] + if compare == "le": + cond.loc[valid.index] = left <= right + else: + cond.loc[valid.index] = left >= right - agg["adequate"] = adequate.astype(int) # 1/0 + agg["condition_met"] = cond.astype(int) - # ---- 4) % adequate per (month, district) across item_code ---- - # Denominator = number of item_code present after NaN handling + # --- % meeting condition per (month, district) across item_code --- if include_missing_as_fail: - denom = agg.groupby(["month", "district"])["item_code"].nunique() - numer = agg.groupby(["month", "district"])["adequate"].sum() + denom = agg.groupby(["month", "_month_label", y_var])[value_var].nunique() + numer = agg.groupby(["month", "_month_label", y_var])["condition_met"].sum() else: valid_mask = agg[["opening_bal", "amc"]].notna().all(axis=1) - denom = agg[valid_mask].groupby(["month", "district"])["item_code"].nunique() - numer = agg[valid_mask].groupby(["month", "district"])["adequate"].sum() - - pct = (numer / denom * 100).replace([np.inf, -np.inf], np.nan) + denom = agg[valid_mask].groupby(["month", "_month_label", y_var])[value_var].nunique() + numer = agg[valid_mask].groupby(["month", "_month_label", y_var])["condition_met"].sum() - # Bring to a DataFrame for pivoting - pct_df = pct.reset_index(name="pct_adequate") + pct = (numer / denom * 100).replace([np.inf, -np.inf], np.nan).reset_index(name="pct_meeting") # ---- 5) Pivot: districts (rows) x months (columns) ---- # Sort months by _month_sort and use _month_label as the displayed column name month_order = ( - pct_df[["month"]] + pct[["month", "_month_label"]] .drop_duplicates() .sort_values("month") - ["month"] + ["_month_label"] .tolist() ) - heatmap_df = pct_df.pivot(index="district", columns="month", values="pct_adequate") + heatmap_df = pct.pivot(index=y_var, columns="_month_label", values="pct_meeting") heatmap_df = heatmap_df.reindex(columns=month_order) + # --- Add average row and column --- + # Column average (mean of each month) + heatmap_df.loc["Average"] = heatmap_df.mean(axis=0) + # Row average (mean of each y_var) + heatmap_df["Average"] = heatmap_df.mean(axis=1) + + # Fix overall average (bottom-right) + overall_avg = heatmap_df.loc["Average", "Average"] + heatmap_df.loc["Average", "Average"] = overall_avg + # Optional rounding for nicer colorbar ticks (doesn't affect color) if decimals is not None: heatmap_df = heatmap_df.round(decimals) + # --- Dynamic figure size --- + if figsize is None: + n_rows = len(heatmap_df) + n_cols = len(heatmap_df.columns) + height = max(6, n_rows * 0.2) # taller if many rows + width = max(8, n_cols * 0.6) + figsize = (width, height) + # ---- 6) Plot heatmap ---- sns.set(font_scale=1.0) fig, ax = plt.subplots(figsize=figsize) + cbar_kws = value_label hm = sns.heatmap( heatmap_df, cmap=cmap, - cbar_kws={"label": f"% of consumables with Opening Balance ≥ {amc_threshold} × AMC"}, + cbar_kws={"label": value_label}, ax=ax, - annot=True, annot_kws={"size": 12}, + annot=True, annot_kws={"size": 10}, vmin = 0, vmax = 100) ax.set_xlabel("Month") - ax.set_ylabel("District") + ax.set_ylabel(f"{y_var}") ax.set_xticklabels(ax.get_xticklabels(), rotation=xtick_rotation) ax.set_yticklabels(ax.get_yticklabels(), rotation=ytick_rotation) @@ -1011,9 +1047,31 @@ def run_redistribution_scenarios( # max_chunk=50) # Plot stock adequacy by district and month -fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath, amc_threshold = 3, filename = "mth_district_stock_adequacy_3amc.png") -fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath, amc_threshold = 2, filename = "mth_district_stock_adequacy_2amc.png") - +fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath, + y_var = 'district', value_var = 'item_code', + value_label= f"% of consumables with Opening Balance ≥ 3 × AMC", + amc_threshold = 3, compare = "ge", + filename = "mth_district_stock_adequacy_3amc.png", figsize = (12,10)) +fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath, + y_var = 'district', value_var = 'item_code', + value_label= f"% of consumables with Opening Balance ≥ 3 × AMC", + amc_threshold = 2, compare = "ge", + filename = "mth_district_stock_adequacy_2amc.png", figsize = (12,10)) +fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath, + y_var = 'district', value_var = 'item_code', + value_label= f"% of consumables with Opening Balance <= 1 × AMC", + amc_threshold = 1, compare = "le", + filename = "mth_district_stock_inadequacy_1amc.png", figsize = (12,10)) +fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath, + y_var = 'item_code', value_var = 'fac_name', + value_label= f"% of facilities with Opening Balance ≥ 3 × AMC", + amc_threshold = 3, compare = "ge", + filename = "mth_item_stock_adequacy_3amc.png") +fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath, + y_var = 'item_code', value_var = 'fac_name', + value_label= f"% of facilities with Opening Balance <= 1 × AMC", + amc_threshold = 1, compare = "le", + filename = "mth_item_stock_inadequacy_1amc.png") # Store dictionary in pickle format #with open(outputfilepath / "T_car.pkl", "wb") as f: # pickle.dump(T_car, f) From 31d2c879ae89897d2496012eaedde3cd869cbc38 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Tue, 21 Oct 2025 10:54:58 +0100 Subject: [PATCH 09/16] add constraints to pooling algorithm - 1. only levels 1a and 1b can receive 2. facilities only transfer stock that they have beyond 3 X AMC --- ...ate_consumable_redistribution_scenarios.py | 149 +++++++++++++----- 1 file changed, 112 insertions(+), 37 deletions(-) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index 8857725e05..1165c8abe9 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -774,13 +774,16 @@ def redistribute_pooling_lp( tau_max: float = 3.0, # upper ceiling (storage/policy max) id_cols=("district","month","item_code"), facility_col="fac_name", + level_col="Facility_Level", amc_col="amc", close_cols=("closing_bal","received","dispensed"), keep_baseline_for_amc0: bool = True, # leave baseline availability where AMC≈0 amc_eps: float = 1e-6, # threshold to treat AMC as "zero" return_move_log: bool = True, # return a detailed df showing net movement of consumables after redistribution pooling_level: str = "district", # "district" or "cluster" - cluster_map: pd.Series | None = None # required if pooling_level=="cluster"; this specifes which cluster each facility belongs to + cluster_map: pd.Series | None = None, # required if pooling_level=="cluster"; this specifes which cluster each facility belongs to + enforce_no_harm: bool = True, # if True, enforce p_f >= baseline at each facility (hard constraint) + floor_to_baseline: bool = True # if True, report availability floored to baseline (no decrease in outputs) ) -> pd.DataFrame: """ Scenario 3: district-level pooling/push . @@ -810,11 +813,7 @@ def redistribute_pooling_lp( out = df.copy() # Safe opening balance - out["OB"] = ( - out[closing_bal].astype(float) - - out[received].astype(float) - + out[dispensed].astype(float) - ).replace([np.inf, -np.inf], np.nan).fillna(0.0) + out["OB"] = compute_opening_balance(out).replace([np.inf, -np.inf], np.nan).fillna(0.0) # Default (will overwrite per group) out["OB_prime"] = out["OB"] @@ -841,17 +840,33 @@ def redistribute_pooling_lp( group_cols = ["cluster_id", "month", "item_code"] node_label = "cluster_id" move_rows = [] # optional movement log + # TODO could remove the movement log for keys, g in out.groupby(group_cols, sort=False): g = g.copy() - node_val, m, i = keys if pooling_level == "cluster" else (g["district"].iloc[0], keys[1], keys[2]) + # Resolve node ID for logging and selection masks + if pooling_level == "district": + node_val, m, i = g["district"].iloc[0], keys[1], keys[2] + else: + node_val, m, i = keys + # Build per-facility Series (unique index) for AMC, OB, Level, Baseline p AMC = (g.set_index(facility_col)[amc_col] .astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0)) OB0 = (g.set_index(facility_col)["OB"] .astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0)) + LVL = (g.set_index(facility_col)[level_col].astype(str) + .replace({np.nan: ""})) + Pbase = (g.set_index(facility_col)["available_prop"] + .astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0)) + + # collapse duplicates if any if AMC.index.duplicated().any(): - AMC = AMC.groupby(level=0).first() + AMC = AMC[~AMC.index.duplicated(keep="first")] + if LVL.index.duplicated().any(): + LVL = LVL[~LVL.index.duplicated(keep="first")] + if Pbase.index.duplicated().any(): + Pbase = Pbase.groupby(level=0).mean() # average baseline if duplicates if OB0.index.duplicated().any(): OB0 = OB0.groupby(level=0).sum() @@ -859,44 +874,87 @@ def redistribute_pooling_lp( if total_stock <= 1e-9: continue - # Participants + # Participants (positive demand) mask_pos = AMC >= amc_eps facs_pos = AMC.index[mask_pos].tolist() if not facs_pos: + # nothing to reallocate to; they will be donors only (handled by OB' defaults) continue AMC_pos = AMC.loc[facs_pos] - lb = (tau_min * AMC_pos).astype(float) - ub = (tau_max * AMC_pos).astype(float) - sum_lb = float(lb.sum()); - sum_ub = float(ub.sum()) - - # Feasibility fixes - if total_stock + 1e-9 < sum_lb: - scale = max(1e-9, total_stock / max(1e-9, sum_lb)) - lb = lb * scale - sum_lb = float(lb.sum()) - allow_excess_sink = total_stock > sum_ub + 1e-9 + OB0_pos = OB0.loc[facs_pos] + LVL_pos = LVL.reindex(facs_pos) + Pbase_pos = Pbase.reindex(facs_pos).fillna(0.0) + + # policy floors/ceilings (raw) + tau_min_floor = (tau_min * AMC_pos).astype(float) + tau_max_ceiling = (tau_max * AMC_pos).astype(float) + + # ---- Donor protection: x_f >= pLB_f = min(OB, 3*AMC) + pLB = np.minimum(OB0_pos, 3.0 * AMC_pos) # Series + + # ---- Receiver eligibility: only 1a/1b may increase above OB + eligible_mask = LVL_pos.isin(["1a", "1b"]) + # For eligible: UB = tau_max*AMC + # For ineligible: UB = min(OB, tau_max*AMC) (can donate, cannot receive above OB) + UB = tau_max_ceiling.copy() + UB.loc[~eligible_mask] = np.minimum(OB0_pos.loc[~eligible_mask], tau_max_ceiling.loc[~eligible_mask]) + + # ---- Lower bound assembly BEFORE scaling tau_min + LB0 = np.maximum(pLB, tau_min_floor) # candidate lower bound + + # ---- Optional "no-harm": p_f >= baseline -> x_f >= AMC_f * p_base_f + if enforce_no_harm: + no_harm_lb = (AMC_pos * Pbase_pos).astype(float) + LB0 = np.maximum(LB0, no_harm_lb) + + # ---- Feasibility: scale ONLY the tau_min component if needed + sum_LB0 = float(LB0.sum()) + if total_stock + 1e-9 < sum_LB0: + # We want LB = max(pLB, tau_min_scaled, [no_harm_lb]) + # Scale only the portion of tau_min_floor that lies above pLB (and above no_harm if on). + base_guard = pLB.copy() + if enforce_no_harm: + base_guard = np.maximum(base_guard, (AMC_pos * Pbase_pos)) + + # Decompose tau_min contribution above base_guard + tau_min_above = np.maximum(0.0, tau_min_floor - np.minimum(tau_min_floor, base_guard)) + need = float(tau_min_above.sum()) + # Available budget for that component: + budget = total_stock - float(base_guard.sum()) + scale = 0.0 if need <= 1e-12 else max(0.0, min(1.0, budget / max(1e-9, need))) + + tau_min_scaled = np.minimum(base_guard, tau_min_floor) + tau_min_above * scale + LB = np.maximum(base_guard, tau_min_scaled) + else: + LB = LB0 + + # ---- Excess sink if ceilings bind + sum_UB = float(UB.sum()) + allow_excess_sink = total_stock > sum_UB + 1e-9 # ---------- LP ---------- prob = LpProblem(f"Pooling_{node_val}_{m}_{i}", LpMaximize) - x = {f: LpVariable(f"x_{f}", lowBound=0) for f in facs_pos} p = {f: LpVariable(f"p_{f}", lowBound=0, upBound=1) for f in facs_pos} excess = LpVariable("excess", lowBound=0) if allow_excess_sink else None - prob += lpSum(p.values()) # maximize total availability + # Objective: maximize total availability + prob += lpSum(p.values()) + # Conservation if excess is None: prob += lpSum(x.values()) == total_stock else: prob += lpSum(x.values()) + excess == total_stock + # Bounds + linearization for f in facs_pos: - prob += x[f] >= float(lb.loc[f]) - prob += x[f] <= float(ub.loc[f]) - prob += float(AMC_pos.loc[f]) * p[f] <= x[f] + prob += x[f] >= float(LB.loc[f]) # donor protection + tau_min (scaled) + (optional) no-harm + prob += x[f] <= float(UB.loc[f]) # eligibility-aware ceiling + prob += float(AMC_pos.loc[f]) * p[f] <= x[f] # p <= x/AMC + # Solve prob.solve() if LpStatus[prob.status] != "Optimal": continue @@ -904,15 +962,18 @@ def redistribute_pooling_lp( # Apply solution to OB' x_sol = {f: float(value(var) or 0.0) for f, var in x.items()} - # selection mask for this node-month-item + # Selection mask for writing back if pooling_level == "district": sel = (out["district"].eq(node_val) & out["month"].eq(m) & out["item_code"].eq(i)) else: sel = (out["cluster_id"].eq(node_val) & out["month"].eq(m) & out["item_code"].eq(i)) + # Facilities with AMC>=eps get x_f mask_rows_pos = sel & out[facility_col].isin(facs_pos) out.loc[mask_rows_pos, "OB_prime"] = out.loc[mask_rows_pos, facility_col].map(x_sol).values + # Facilities with AMC OB' = 0 + # (this matches the "donate to pool" assumption) mask_rows_zero = sel & ~out[facility_col].isin(facs_pos) out.loc[mask_rows_zero, "OB_prime"] = 0.0 @@ -928,34 +989,48 @@ def redistribute_pooling_lp( "received_from_pool": net, "x_allocated": x_f, "OB0_agg": float(OB0.get(f, 0.0)), + "eligible_receiver": bool(LVL.get(f, "") in {"1a", "1b"}), }) - # ---------- Availability after redistribution (safe) ---------- + # ---------- Availability after redistribution ---------- amc_safe = out[amc_col].astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0) - pos_mask = amc_safe >= amc_eps + pos_mask_all = amc_safe >= amc_eps p_mech = np.zeros(len(out), dtype=float) - denom = np.maximum(amc_eps, amc_safe[pos_mask].to_numpy()) - p_mech[pos_mask.values] = np.minimum( - 1.0, np.maximum(0.0, out.loc[pos_mask, "OB_prime"].to_numpy() / denom) + denom = np.maximum(amc_eps, amc_safe[pos_mask_all].to_numpy()) + p_mech[pos_mask_all.values] = np.minimum( + 1.0, np.maximum(0.0, out.loc[pos_mask_all, "OB_prime"].to_numpy() / denom) ) if keep_baseline_for_amc0: - base = (out.loc[~pos_mask, "available_prop"] - .astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0)).to_numpy() - p_mech[~pos_mask.values] = base + base = (out.loc[~pos_mask_all, "available_prop"] + .astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0)).to_numpy() + p_mech[~pos_mask_all.values] = base else: - p_mech[~pos_mask.values] = (out.loc[~pos_mask, "OB_prime"].to_numpy() > 0.0).astype(float) + p_mech[~pos_mask_all.values] = (out.loc[~pos_mask_all, "OB_prime"].to_numpy() > 0.0).astype(float) + + out["available_prop_redis_raw"] = p_mech + out["available_prop_redis"] = ( + np.maximum(p_mech, out["available_prop"].astype(float).values) + if floor_to_baseline else p_mech + ) - out["available_prop_redis"] = np.maximum(p_mech,out["available_prop"]) + # Per-row movement out["received_from_pool"] = out["OB_prime"] - out["OB"] + # Check if the rules are correctly applied + # No facility should end below min(OB, 3*AMC) + viol = (out["OB_prime"] < np.minimum(out["OB"], 3 * out["amc"]) - 1e-6) + assert not viol.any() + # Non-eligible levels never increase OB + mask_non_elig = ~out["Facility_Level"].isin(["1a", "1b"]) + assert not (out.loc[mask_non_elig, "OB_prime"] > out.loc[mask_non_elig, "OB"] + 1e-6).any() + if return_move_log: move_log = pd.DataFrame(move_rows) return out, move_log return out - # pooled_df, pool_moves = redistribute_pooling_lp(lmis, tau_min=0.25, tau_max=3.0, return_move_log=True) From bea8ddcae45276d34524b5e047871e6e3f466844 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Tue, 21 Oct 2025 16:22:46 +0100 Subject: [PATCH 10/16] fixes to pooling algorithm - - differentiate between tau_donor_keep and tau_max --- ...ate_consumable_redistribution_scenarios.py | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index 1165c8abe9..872d577ec4 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -772,6 +772,7 @@ def redistribute_pooling_lp( df: pd.DataFrame, tau_min: float = 0.25, # lower floor in "months of demand" (≈ 7–8 days) - minimum transfer required tau_max: float = 3.0, # upper ceiling (storage/policy max) + tau_donor_keep: float = 3.0, # minimum the donor keeps before donating id_cols=("district","month","item_code"), facility_col="fac_name", level_col="Facility_Level", @@ -890,8 +891,9 @@ def redistribute_pooling_lp( tau_min_floor = (tau_min * AMC_pos).astype(float) tau_max_ceiling = (tau_max * AMC_pos).astype(float) - # ---- Donor protection: x_f >= pLB_f = min(OB, 3*AMC) - pLB = np.minimum(OB0_pos, 3.0 * AMC_pos) # Series + # ---- Donor protection: x_f >= pLB_f = min(OB, tau_donor_keep*AMC) + pLB = np.minimum(OB0_pos, tau_donor_keep * AMC_pos) # the lower bound for a donor facility is OB if that is less than + #tau_donor_keep X AMC or tau_donor_keep X AMC is that is lower than OB (i.e. only OB - 3 X AMC can be donated, if tau_donor_keep = 3) # ---- Receiver eligibility: only 1a/1b may increase above OB eligible_mask = LVL_pos.isin(["1a", "1b"]) @@ -901,7 +903,7 @@ def redistribute_pooling_lp( UB.loc[~eligible_mask] = np.minimum(OB0_pos.loc[~eligible_mask], tau_max_ceiling.loc[~eligible_mask]) # ---- Lower bound assembly BEFORE scaling tau_min - LB0 = np.maximum(pLB, tau_min_floor) # candidate lower bound + LB0 = np.maximum(pLB, tau_min_floor) # enforces receiver's lower bound # ---- Optional "no-harm": p_f >= baseline -> x_f >= AMC_f * p_base_f if enforce_no_harm: @@ -955,7 +957,7 @@ def redistribute_pooling_lp( prob += float(AMC_pos.loc[f]) * p[f] <= x[f] # p <= x/AMC # Solve - prob.solve() + prob.solve(PULP_CBC_CMD(msg=False, cuts=0, presolve=True, threads=1)) if LpStatus[prob.status] != "Optimal": continue @@ -996,7 +998,8 @@ def redistribute_pooling_lp( amc_safe = out[amc_col].astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0) pos_mask_all = amc_safe >= amc_eps - p_mech = np.zeros(len(out), dtype=float) + p_mech = np.zeros(len(out), dtype=float) # mechanistically calculated p based on balances and AMC + # (different from the base which was calculated from actual stkout_days recorded_ denom = np.maximum(amc_eps, amc_safe[pos_mask_all].to_numpy()) p_mech[pos_mask_all.values] = np.minimum( 1.0, np.maximum(0.0, out.loc[pos_mask_all, "OB_prime"].to_numpy() / denom) @@ -1009,22 +1012,30 @@ def redistribute_pooling_lp( else: p_mech[~pos_mask_all.values] = (out.loc[~pos_mask_all, "OB_prime"].to_numpy() > 0.0).astype(float) + # For levels 1a and 1b force availability to be max of old (actual) versus new (mechnistically calculated) out["available_prop_redis_raw"] = p_mech out["available_prop_redis"] = ( np.maximum(p_mech, out["available_prop"].astype(float).values) if floor_to_baseline else p_mech ) + # For levels other than 1a and 1b force availability to be equal to old + mask_non_elig = ~out["Facility_Level"].isin(["1a", "1b"]) + out.loc[mask_non_elig, "available_prop_redis"] = out.loc[mask_non_elig,"available_prop"] # this should ideally happen automatically + # however, there are facilities at levels 2-4 whether some stock out was experienced even though OB > AMC + # We want to retain the original probability in these cases because our overall analysis is restricted to levels 1a and 1b # Per-row movement out["received_from_pool"] = out["OB_prime"] - out["OB"] # Check if the rules are correctly applied - # No facility should end below min(OB, 3*AMC) - viol = (out["OB_prime"] < np.minimum(out["OB"], 3 * out["amc"]) - 1e-6) - assert not viol.any() - # Non-eligible levels never increase OB - mask_non_elig = ~out["Facility_Level"].isin(["1a", "1b"]) - assert not (out.loc[mask_non_elig, "OB_prime"] > out.loc[mask_non_elig, "OB"] + 1e-6).any() + # No facility should end below min(OB, tau_donor_keep*AMC) + viol_lb = out["OB_prime"] < (np.minimum(out["OB"], tau_donor_keep * out["amc"]) - 1e-6) + #assert not viol_lb.any(), "Donor-protection LB violated (OB' < min(OB, tau_donor_keep*AMC))." + print(out[viol_lb][['available_prop_redis', 'available_prop', 'OB', 'OB_prime', 'amc']]) + # Non-eligible levels never increase prob + #mask_non_elig = ~out["Facility_Level"].isin(["1a", "1b"]) + #mask_correct = (out["available_prop_redis"] == out["available_prop"]) + #print(out[mask_non_elig & ~mask_correct][['available_prop_redis', 'available_prop', 'OB', 'OB_prime', 'amc']]) if return_move_log: move_log = pd.DataFrame(move_rows) @@ -1033,7 +1044,6 @@ def redistribute_pooling_lp( return out # pooled_df, pool_moves = redistribute_pooling_lp(lmis, tau_min=0.25, tau_max=3.0, return_move_log=True) - # ------------------------ # 4) HIGH-LEVEL DRIVER # ------------------------ From 6f1d374d8abf857094f6cb46223031e7481c9b7e Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Tue, 21 Oct 2025 16:35:40 +0100 Subject: [PATCH 11/16] remove duplicated lmis rows (same fac_name and district but different levels) --- ...ate_consumable_redistribution_scenarios.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index 872d577ec4..202e823b68 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -34,6 +34,34 @@ lmis = (pd.read_csv(outputfilepath / "ResourceFile_Consumables_availability_and_usage.csv") [['district', 'fac_type_tlo', 'fac_name', 'month', 'item_code', 'available_prop', 'closing_bal', 'amc', 'dispensed', 'received']]) + +# Drop duplicated facility, item, month combinations +print(lmis.shape, "rows before collapsing duplicates") +key_cols = ["district", "item_code", "fac_name", "month"] # keys that define a unique record + +# helper to keep one facility level per group (mode → most common; fallback to first non-null) +def _mode_or_first(s: pd.Series): + s = s.dropna() + if s.empty: + return np.nan + m = s.mode() + return m.iloc[0] if not m.empty else s.iloc[0] + +lmis = ( + lmis + .groupby(key_cols, as_index=False) + .agg( + closing_bal=("closing_bal", "sum"), + dispensed=("dispensed", "sum"), + received=("received", "sum"), + amc=("amc", "sum"), + available_prop=("available_prop", "mean"), + fac_type_tlo=("fac_type_tlo", _mode_or_first), # optional; remove if not needed + ) +) + +print(lmis.shape, "rows after collapsing duplicates") + # Import data on facility location location = (pd.read_excel(path_to_share / "07 - Data/Facility_GPS_Coordinates/gis_data_for_openlmis/LMISFacilityLocations_raw.xlsx") [['LMIS Facility List', 'LATITUDE', 'LONGITUDE']]) From cd4f1493a204ac3ded349cb0493b6d41d92420d8 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Tue, 21 Oct 2025 19:15:28 +0100 Subject: [PATCH 12/16] update assertions for pooling LP (to exclude the cases which were not included in the optimisation) --- ...ate_consumable_redistribution_scenarios.py | 134 +++++++++++++----- 1 file changed, 100 insertions(+), 34 deletions(-) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index 202e823b68..842ea2f1c0 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -4,6 +4,7 @@ import calendar import matplotlib.pyplot as plt +import seaborn as sns import numpy as np import pandas as pd @@ -871,6 +872,7 @@ def redistribute_pooling_lp( move_rows = [] # optional movement log # TODO could remove the movement log + skipped_nodes = [] # collect nodes that did NOT solve optimally for keys, g in out.groupby(group_cols, sort=False): g = g.copy() # Resolve node ID for logging and selection masks @@ -950,7 +952,7 @@ def redistribute_pooling_lp( # Decompose tau_min contribution above base_guard tau_min_above = np.maximum(0.0, tau_min_floor - np.minimum(tau_min_floor, base_guard)) need = float(tau_min_above.sum()) - # Available budget for that component: + # Available stock for that component: budget = total_stock - float(base_guard.sum()) scale = 0.0 if need <= 1e-12 else max(0.0, min(1.0, budget / max(1e-9, need))) @@ -987,7 +989,11 @@ def redistribute_pooling_lp( # Solve prob.solve(PULP_CBC_CMD(msg=False, cuts=0, presolve=True, threads=1)) if LpStatus[prob.status] != "Optimal": + skipped_nodes.append((node_val, m, i)) + #print("NO Optimal solution found", node_val, m, i) continue + #else: + #print("Optimal solution found", node_val, m, i) # Apply solution to OB' x_sol = {f: float(value(var) or 0.0) for f, var in x.items()} @@ -1056,14 +1062,48 @@ def redistribute_pooling_lp( out["received_from_pool"] = out["OB_prime"] - out["OB"] # Check if the rules are correctly applied - # No facility should end below min(OB, tau_donor_keep*AMC) - viol_lb = out["OB_prime"] < (np.minimum(out["OB"], tau_donor_keep * out["amc"]) - 1e-6) - #assert not viol_lb.any(), "Donor-protection LB violated (OB' < min(OB, tau_donor_keep*AMC))." - print(out[viol_lb][['available_prop_redis', 'available_prop', 'OB', 'OB_prime', 'amc']]) - # Non-eligible levels never increase prob - #mask_non_elig = ~out["Facility_Level"].isin(["1a", "1b"]) - #mask_correct = (out["available_prop_redis"] == out["available_prop"]) - #print(out[mask_non_elig & ~mask_correct][['available_prop_redis', 'available_prop', 'OB', 'OB_prime', 'amc']]) + # --- Build masks for skipping --- + # 1. Nodes that failed to solve optimally + # Exclude any node/month/item combinations that didn't solve optimally or which were skipped due to AMC == 0 + if skipped_nodes: + skipped_df = pd.DataFrame(skipped_nodes, columns=[node_label, "month", "item_code"]) + + # Merge to flag rows belonging to skipped groups + out = out.merge( + skipped_df.assign(skip_flag=True), + on=[node_label, "month", "item_code"], + how="left", + ) + mask_skip_solution = out["skip_flag"].fillna(False) + else: + mask_skip_solution = pd.Series(False, index=out.index) + + # 2. Facilities with AMC effectively zero + mask_skip_amc = out["amc"].astype(float) <= 1e-9 + + # Combined skip mask + mask_skip = mask_skip_solution | mask_skip_amc + mask_solved = ~mask_skip + print(f"Skipping {mask_skip.sum()} rows due to non-optimal LPs or AMC≈0") + + # No facility should end below min(OB, tau_donor_keep*AMC) (# Lower bound check) + tol = 1e-6 #tolerance + viol_lb = ( + (out.loc[mask_solved, "OB_prime"] < + (np.minimum(out.loc[mask_solved, "OB"], tau_donor_keep * out.loc[mask_solved, "amc"]) - tol)) + ) + + # No facility ends up above upper bounds (# Upper bound check) + elig = out.loc[mask_solved, "Facility_Level"].isin(["1a", "1b"]).values + ub = np.where( + elig, + tau_max * out.loc[mask_solved, "amc"], + np.minimum(out.loc[mask_solved, "OB"], tau_max * out.loc[mask_solved, "amc"]) + ) + viol_ub = out.loc[mask_solved, "OB_prime"].values > (ub + tol) + + assert not viol_lb.any(), "viol_lb violated" + assert not viol_ub.any(), "viol_ub violated" if return_move_log: move_log = pd.DataFrame(move_rows) @@ -1159,7 +1199,19 @@ def run_redistribution_scenarios( # osrm_base_url="https://router.project-osrm.org", # max_chunk=50) -# Plot stock adequacy by district and month +# Store dictionary in pickle format +#with open(outputfilepath / "T_car.pkl", "wb") as f: +# pickle.dump(T_car, f) +# -> Commented out because it takes long to run. The result has been stored in pickle format + +# Load pre-generated dictionary +with open(outputfilepath / "T_car.pkl", "rb") as f: + T_car = pickle.load(f) + +#edges_flat = build_edges_within_radius_flat(T_car, max_minutes= 60) + +# 2) Explore the availability and distances to make decisions about optimisation rules +# Plot stock adequacy by district and month to assess what bounds to set when pooling fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath, y_var = 'district', value_var = 'item_code', value_label= f"% of consumables with Opening Balance ≥ 3 × AMC", @@ -1185,16 +1237,7 @@ def run_redistribution_scenarios( value_label= f"% of facilities with Opening Balance <= 1 × AMC", amc_threshold = 1, compare = "le", filename = "mth_item_stock_inadequacy_1amc.png") -# Store dictionary in pickle format -#with open(outputfilepath / "T_car.pkl", "wb") as f: -# pickle.dump(T_car, f) -# -> Commented out because it takes long to run. The result has been stored in pickle format -# Load pre-generated dictionary -with open(outputfilepath / "T_car.pkl", "rb") as f: - T_car = pickle.load(f) - -#edges_flat = build_edges_within_radius_flat(T_car, max_minutes= 60) # Browse the number of eligible neighbours depending on allowable travel time results = [] @@ -1224,34 +1267,57 @@ def run_redistribution_scenarios( # TODO find a more generalisable solution for this issue (within the optimisation functions) #lmis = lmis[(lmis.amc != 0) & (lmis.amc.notna())] -#lmis = lmis[lmis.district == 'Lilongwe'] - -# Trying the updated pool-based redistribution -# 1) Build clusters from your per-district travel-time matrices (minutes) +# 3) Implement pooled redistribution +# a) Build clusters from per-district travel-time matrices # T_car_by_dist: {"District A": DF(index=fac_ids, cols=fac_ids), ...} cluster_size = 3 cluster_series = build_capacity_clusters_all(T_car, cluster_size=cluster_size) # cluster_series is a pd.Series: index=facility_id, value like "District A#C00", "District A#C01", ... -# 2) Pool at the cluster level -pooled_df, cluster_moves = redistribute_pooling_lp( - df=lmis, # your LMIS dataframe +# b) Run optimisation at district level +pooled_district_df, cluster_district_moves = redistribute_pooling_lp( + df=lmis_test, # the LMIS dataframe tau_min=0.25, tau_max=3.0, - pooling_level="cluster", - cluster_map=cluster_series, + tau_donor_keep = 3.0, + pooling_level="district", + cluster_map=None, return_move_log=True, + enforce_no_harm=True, + floor_to_baseline=True ) -pooled_df.to_csv(outputfilepath/ 'clustering_n3_df.csv', index=False) +print(pooled_district_df.groupby('Facility_Level')[['available_prop_redis', 'available_prop']].mean()) +pooled_district_df.to_csv(outputfilepath/ 'clustering_district_df.csv', index=False) +tlo_pooled_district = ( + pooled_district_df + .groupby(["item_code", "district", "Facility_Level", "month"], as_index=False) + .agg(available_prop_scen16=("available_prop_redis", "mean")) + .sort_values(["item_code","district","Facility_Level","month"]) + ) + -# 3) Pool at the full district level (upper bound) -pooled_district_df, district_moves = redistribute_pooling_lp( - df=lmis, +# c) Run optimisation at cluster (size = 3) level +pooled_cluster_df, cluster_moves = redistribute_pooling_lp( + df=lmis, # the LMIS dataframe tau_min=0.25, tau_max=3.0, - pooling_level="district", + tau_donor_keep = 3.0, + pooling_level="cluster", + cluster_map=cluster_series, return_move_log=True, + enforce_no_harm=True, + floor_to_baseline=True ) -pooled_df.to_csv(outputfilepath/ 'clustering_district_df.csv', index=False) +print(pooled_cluster_df.groupby('Facility_Level')[['available_prop_redis', 'available_prop']].mean()) +pooled_cluster_df.to_csv(outputfilepath/ 'clustering_n3_df.csv', index=False) + +tlo_pooled_cluster = ( + pooled_district_df + .groupby(["item_code", "district", "Facility_Level", "month"], as_index=False) + .agg(available_prop_scen17=("available_prop_redis", "mean")) + .sort_values(["item_code","district","Facility_Level","month"]) + ) + +# 3) Implement pairwise redistribution # 2) Run Scenario 1 (1-hour radius) tlo_30 = run_redistribution_scenarios( From 5f5f05aee6644a02b42800501bda9601e6c81ac8 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Wed, 22 Oct 2025 17:11:42 +0100 Subject: [PATCH 13/16] clean LMIS entries (only formatting) --- ...ate_consumable_redistribution_scenarios.py | 53 +++++++++++++++---- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index 842ea2f1c0..e5300cfd16 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -87,6 +87,21 @@ def _mode_or_first(s: pd.Series): lmis["month"] = lmis["month"].map(month_map) lmis["Facility_Level"] = lmis["Facility_Level"].str.replace("Facility_level_", "", regex=False) +# Clean data types before analysis +# 1) Normalize fac_name +lmis["fac_name"] = ( + lmis["fac_name"] + .astype("string") # Pandas string dtype (not just object) + .str.normalize("NFKC") # unify unicode forms + .str.strip() # trim leading/trailing spaces + .str.replace(r"\s+", "_", regex=True) # collapse internal whitespace +) + +# 2) Normalize other key columns used in grouping/joins +lmis["item_code"] = lmis["item_code"].astype("string").str.strip() +lmis["district"] = lmis["district"].astype("string").str.strip().str.replace(r"\s+", "_", regex=True) +lmis["Facility_Level"] = lmis["Facility_Level"].astype("string").str.strip() + # Keep only those facilities whose location is available lmis = lmis[lmis.lat.notna()] # TODO assume something else about location of these facilities with missing location - eg. centroid of district? @@ -1008,7 +1023,7 @@ def redistribute_pooling_lp( mask_rows_pos = sel & out[facility_col].isin(facs_pos) out.loc[mask_rows_pos, "OB_prime"] = out.loc[mask_rows_pos, facility_col].map(x_sol).values - # Facilities with AMC OB' = 0 + # Facilities with AMC OB' = 0 # (this matches the "donate to pool" assumption) mask_rows_zero = sel & ~out[facility_col].isin(facs_pos) out.loc[mask_rows_zero, "OB_prime"] = 0.0 @@ -1102,8 +1117,20 @@ def redistribute_pooling_lp( ) viol_ub = out.loc[mask_solved, "OB_prime"].values > (ub + tol) - assert not viol_lb.any(), "viol_lb violated" - assert not viol_ub.any(), "viol_ub violated" + #assert not viol_lb.any(), "viol_lb violated" + #assert not viol_ub.any(), "viol_ub violated" + temp = out[mask_solved] + if viol_lb.any(): + print("For the following rows (facility, item and month combinations), unclear why OB_prime < tau_donor_keep * AMC " + "which violates a constraint in the LPP") + # TODO see if this problem needs to be resolved + print(temp[viol_lb][['Facility_Level', 'amc', 'OB', 'OB_prime']]) + + if viol_ub.any(): + print("For the following rows (facility, item and month combinations), unclear why OB_prime > tau_max * AMC " + "which violates a constraint in the LPP") + # TODO see if this problem needs to be resolved + print(temp[viol_ub][['Facility_Level', 'amc', 'OB', 'OB_prime']]) if return_move_log: move_log = pd.DataFrame(move_rows) @@ -1200,13 +1227,14 @@ def run_redistribution_scenarios( # max_chunk=50) # Store dictionary in pickle format -#with open(outputfilepath / "T_car.pkl", "wb") as f: +#with open(outputfilepath / "T_car2.pkl", "wb") as f: # pickle.dump(T_car, f) # -> Commented out because it takes long to run. The result has been stored in pickle format # Load pre-generated dictionary -with open(outputfilepath / "T_car.pkl", "rb") as f: +with open(outputfilepath / "T_car2.pkl", "rb") as f: T_car = pickle.load(f) +# T_car2 was created after cleaning fac names and getting rid of spaces in the text #edges_flat = build_edges_within_radius_flat(T_car, max_minutes= 60) @@ -1276,17 +1304,19 @@ def run_redistribution_scenarios( # b) Run optimisation at district level pooled_district_df, cluster_district_moves = redistribute_pooling_lp( - df=lmis_test, # the LMIS dataframe - tau_min=0.25, tau_max=3.0, + df=lmis, # the LMIS dataframe + tau_min=0, tau_max=4.0, tau_donor_keep = 3.0, pooling_level="district", cluster_map=None, return_move_log=True, - enforce_no_harm=True, + enforce_no_harm=False, floor_to_baseline=True ) print(pooled_district_df.groupby('Facility_Level')[['available_prop_redis', 'available_prop']].mean()) -pooled_district_df.to_csv(outputfilepath/ 'clustering_district_df.csv', index=False) +pooled_district_df[['district', 'item_code', 'fac_name', 'month', 'amc', 'available_prop', 'Facility_Level', + 'OB', 'OB_prime', 'available_prop_redis_raw', 'available_prop_redis', 'received_from_pool', + 'skip_flag']].to_csv(outputfilepath/ 'clustering_district_df.csv', index=False) tlo_pooled_district = ( pooled_district_df .groupby(["item_code", "district", "Facility_Level", "month"], as_index=False) @@ -1298,7 +1328,7 @@ def run_redistribution_scenarios( # c) Run optimisation at cluster (size = 3) level pooled_cluster_df, cluster_moves = redistribute_pooling_lp( df=lmis, # the LMIS dataframe - tau_min=0.25, tau_max=3.0, + tau_min=0, tau_max=4.0, tau_donor_keep = 3.0, pooling_level="cluster", cluster_map=cluster_series, @@ -1310,12 +1340,13 @@ def run_redistribution_scenarios( pooled_cluster_df.to_csv(outputfilepath/ 'clustering_n3_df.csv', index=False) tlo_pooled_cluster = ( - pooled_district_df + pooled_cluster_df .groupby(["item_code", "district", "Facility_Level", "month"], as_index=False) .agg(available_prop_scen17=("available_prop_redis", "mean")) .sort_values(["item_code","district","Facility_Level","month"]) ) +tlo_pooled = tlo_pooled_district.merge(tlo_pooled_cluster, on=["item_code", "district", "Facility_Level", "month"], how="left", validate="one_to_one") # 3) Implement pairwise redistribution # 2) Run Scenario 1 (1-hour radius) From c090cb65ddec8d8be213bc77b0c182c9bfb74c34 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Wed, 22 Oct 2025 18:43:34 +0100 Subject: [PATCH 14/16] Interpolate values of AMC and Closing Balance when missing or 0 --- ...ate_consumable_redistribution_scenarios.py | 57 ++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index e5300cfd16..4a057748b6 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -102,8 +102,60 @@ def _mode_or_first(s: pd.Series): lmis["district"] = lmis["district"].astype("string").str.strip().str.replace(r"\s+", "_", regex=True) lmis["Facility_Level"] = lmis["Facility_Level"].astype("string").str.strip() +# 3) Ensure numeric types (quietly coerce bad strings to NaN) +lmis["amc"] = pd.to_numeric(lmis["amc"], errors="coerce") +lmis["closing_bal"] = pd.to_numeric(lmis["closing_bal"], errors="coerce") + # Keep only those facilities whose location is available lmis = lmis[lmis.lat.notna()] + +lmis[(lmis.amc ==0)|(lmis.amc.isna())].to_csv(outputfilepath / 'exploring_amc_0.csv', index=False) + +# --- 1) Replace amc==0 with mean(amc>0) per (fac_name, item_code) --- +# Mark AMC bad-before +amc_bad_before = lmis["amc"].isna() | (lmis["amc"] == 0) + +# Fix AMC: fill zeros/NaNs with mean of positive AMC within (fac_name, item_code) +def _fill_amc_pos_mean(s: pd.Series) -> pd.Series: + m = s[s > 0].mean() # mean over positive AMC only + if pd.isna(m): + return s # no positive months -> leave as-is + return s.where(~(s.isna() | (s == 0)), m) + +amc_before_zeros_and_nan = ((lmis["amc"] == 0)|(lmis['amc'].isna())).sum() +lmis['amc_previously_zero_or_nan'] = ((lmis["amc"] == 0)|(lmis['amc'].isna())) +lmis["amc"] = ( + lmis.groupby(["fac_name", "item_code"], group_keys=False)["amc"] + .transform(_fill_amc_pos_mean) +) +amc_after_zeros_and_nan = ((lmis["amc"] == 0)|(lmis['amc'].isna())).sum() +print(f"AMC zeroes and nan before: {amc_before_zeros_and_nan:,} | after: {amc_after_zeros_and_nan:,}") + +# Rows where AMC was bad and is now fixed (>0) +amc_fixed_now = amc_bad_before & lmis["amc"].notna() & (lmis["amc"] > 0) + +# Prepare group means for closing_bal (non-zero, non-NaN only) +cb_group_mean = ( + lmis.groupby(["fac_name", "item_code"])["closing_bal"] + .transform(lambda s: s.replace(0, np.nan).mean()) + ) + +# Edit closing_bal ONLY where: +# (a) AMC was previously bad and is now fixed, AND +# (b) closing_bal is 0 or NaN, AND +# (c) a group mean exists +cb_bad_now = lmis["closing_bal"].isna() | (lmis["closing_bal"] == 0) +mask_edit = amc_fixed_now & cb_bad_now & cb_group_mean.notna() + +before = (cb_bad_now & amc_fixed_now).sum() + +lmis.loc[mask_edit, "closing_bal"] = cb_group_mean[mask_edit] + +after = ( (lmis["closing_bal"].isna()) | (lmis["closing_bal"] == 0) ) & amc_fixed_now +print(f"closing_bal zeros/NaNs among AMC-fixed rows — before: {before:,} | after: {after.sum():,}") + +lmis[mask_edit].to_csv(outputfilepath / 'exploring_amc_0_after_fix.csv', index=False) + # TODO assume something else about location of these facilities with missing location - eg. centroid of district? # only 16 facilties have missing information @@ -1125,12 +1177,13 @@ def redistribute_pooling_lp( "which violates a constraint in the LPP") # TODO see if this problem needs to be resolved print(temp[viol_lb][['Facility_Level', 'amc', 'OB', 'OB_prime']]) - + temp[viol_lb][['Facility_Level', 'fac_name', 'amc', 'OB', 'OB_prime']].to_csv('violates_lb.csv') if viol_ub.any(): print("For the following rows (facility, item and month combinations), unclear why OB_prime > tau_max * AMC " "which violates a constraint in the LPP") # TODO see if this problem needs to be resolved print(temp[viol_ub][['Facility_Level', 'amc', 'OB', 'OB_prime']]) + temp[viol_ub][['Facility_Level', 'fac_name', 'amc', 'OB', 'OB_prime']].to_csv('violates_ub.csv') if return_move_log: move_log = pd.DataFrame(move_rows) @@ -1303,6 +1356,7 @@ def run_redistribution_scenarios( # cluster_series is a pd.Series: index=facility_id, value like "District A#C00", "District A#C01", ... # b) Run optimisation at district level +print("Now running pooled redistribution at District level") pooled_district_df, cluster_district_moves = redistribute_pooling_lp( df=lmis, # the LMIS dataframe tau_min=0, tau_max=4.0, @@ -1326,6 +1380,7 @@ def run_redistribution_scenarios( # c) Run optimisation at cluster (size = 3) level +print("Now running pooled redistribution at Cluster (Size = 3) level") pooled_cluster_df, cluster_moves = redistribute_pooling_lp( df=lmis, # the LMIS dataframe tau_min=0, tau_max=4.0, From a0487be30ec615cdd848b5ead16a4e90d4142470 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Thu, 23 Oct 2025 17:18:13 +0100 Subject: [PATCH 15/16] update how stock in/adequacy plots are generated (exclude bad AMC/OB rows) --- .../create_consumable_redistribution_scenarios.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index 4a057748b6..f21ebb3acd 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -214,6 +214,21 @@ def generate_stock_adequacy_heatmap( amc=("amc", "sum")) .reset_index() ) + # Identify groups (fac_name, item_code) that have any non-zero amc + nonzero_groups = agg.loc[agg["amc"] != 0, ["fac_name", "item_code"]].drop_duplicates() + + # Keep: + # - 1. all rows where amc != 0 + # - 2. rows where the (fac_name, item_code) pair never had any non-zero amc + # (because this would indicate that their AMC may in fact be zero) + # - 3. rows where both Closing balance and AMC are not zero + agg = agg[ + (agg["amc"] != 0) + | (~agg[["fac_name", "item_code"]] + .apply(tuple, axis=1) + .isin(nonzero_groups.apply(tuple, axis=1))) + ] + agg = agg[~((agg["amc"] == 0) & (agg["opening_bal"] == 0))] # ---- 3) Adequacy indicator per (month, district, item_code) ---- if include_missing_as_fail: From 07cfee10ae05704245e1b8685f4e9ff514fcd40a Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Fri, 24 Oct 2025 12:03:57 +0100 Subject: [PATCH 16/16] drop seemingly incorrect rows from LMIS data at pre-processing --- ...ate_consumable_redistribution_scenarios.py | 119 +++++++----------- 1 file changed, 45 insertions(+), 74 deletions(-) diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py index f21ebb3acd..cc37c26629 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py @@ -107,66 +107,13 @@ def _mode_or_first(s: pd.Series): lmis["closing_bal"] = pd.to_numeric(lmis["closing_bal"], errors="coerce") # Keep only those facilities whose location is available +old_facility_count = lmis.fac_name.nunique() lmis = lmis[lmis.lat.notna()] +new_facility_count = lmis.fac_name.nunique() +print(f"{old_facility_count - new_facility_count} facilities out of {old_facility_count} in the lmis data dropped due to " + f"missing location information") -lmis[(lmis.amc ==0)|(lmis.amc.isna())].to_csv(outputfilepath / 'exploring_amc_0.csv', index=False) - -# --- 1) Replace amc==0 with mean(amc>0) per (fac_name, item_code) --- -# Mark AMC bad-before -amc_bad_before = lmis["amc"].isna() | (lmis["amc"] == 0) - -# Fix AMC: fill zeros/NaNs with mean of positive AMC within (fac_name, item_code) -def _fill_amc_pos_mean(s: pd.Series) -> pd.Series: - m = s[s > 0].mean() # mean over positive AMC only - if pd.isna(m): - return s # no positive months -> leave as-is - return s.where(~(s.isna() | (s == 0)), m) - -amc_before_zeros_and_nan = ((lmis["amc"] == 0)|(lmis['amc'].isna())).sum() -lmis['amc_previously_zero_or_nan'] = ((lmis["amc"] == 0)|(lmis['amc'].isna())) -lmis["amc"] = ( - lmis.groupby(["fac_name", "item_code"], group_keys=False)["amc"] - .transform(_fill_amc_pos_mean) -) -amc_after_zeros_and_nan = ((lmis["amc"] == 0)|(lmis['amc'].isna())).sum() -print(f"AMC zeroes and nan before: {amc_before_zeros_and_nan:,} | after: {amc_after_zeros_and_nan:,}") - -# Rows where AMC was bad and is now fixed (>0) -amc_fixed_now = amc_bad_before & lmis["amc"].notna() & (lmis["amc"] > 0) - -# Prepare group means for closing_bal (non-zero, non-NaN only) -cb_group_mean = ( - lmis.groupby(["fac_name", "item_code"])["closing_bal"] - .transform(lambda s: s.replace(0, np.nan).mean()) - ) - -# Edit closing_bal ONLY where: -# (a) AMC was previously bad and is now fixed, AND -# (b) closing_bal is 0 or NaN, AND -# (c) a group mean exists -cb_bad_now = lmis["closing_bal"].isna() | (lmis["closing_bal"] == 0) -mask_edit = amc_fixed_now & cb_bad_now & cb_group_mean.notna() - -before = (cb_bad_now & amc_fixed_now).sum() - -lmis.loc[mask_edit, "closing_bal"] = cb_group_mean[mask_edit] - -after = ( (lmis["closing_bal"].isna()) | (lmis["closing_bal"] == 0) ) & amc_fixed_now -print(f"closing_bal zeros/NaNs among AMC-fixed rows — before: {before:,} | after: {after.sum():,}") - -lmis[mask_edit].to_csv(outputfilepath / 'exploring_amc_0_after_fix.csv', index=False) - -# TODO assume something else about location of these facilities with missing location - eg. centroid of district? -# only 16 facilties have missing information - -# For now, let's keep only 8 facilities in 1 district -#lmis = lmis[lmis.district == 'Balaka'] -# Keep only facilities with Facility_Level == '1a' -#lmis = lmis[lmis["Facility_Level"] == '1a'] - -# ----------------------------------- -# 1) Data exploration -# ----------------------------------- +# Explore missingness def compute_opening_balance(df: pd.DataFrame) -> pd.Series: """ Opening balance from same-month records: @@ -175,6 +122,45 @@ def compute_opening_balance(df: pd.DataFrame) -> pd.Series: """ return df["closing_bal"] - df["received"] + df["dispensed"] +# Drop the rows which seem suspicious +# - 1. Rows with AMC and OB = 0 (these would be excluded from the redistribution algorithm anyway) +# - 2. Rows where the (fac_name, item_code) pair only reported 0 AMC throughout the year +lmis.reset_index(inplace=True, drop=True) +lmis['opening_bal'] = compute_opening_balance(lmis) +cb0 = (lmis.closing_bal == 0)|(lmis.closing_bal.isna()) +ob0 = (lmis.opening_bal == 0)|(lmis.opening_bal.isna()) +amc0 = (lmis.amc == 0)|(lmis.amc.isna()) +d0 = (lmis.dispensed == 0)|(lmis.dispensed.isna()) +r0 = (lmis.received == 0)|(lmis.received.isna()) +prob0 = (lmis.available_prop == 0)|(lmis.available_prop.isna()) +non_zero_amc_groups = lmis.loc[~amc0, ["fac_name", "item_code"]].drop_duplicates() +non0_amc = lmis[['fac_name', 'item_code']].apply(tuple, axis=1).isin(non_zero_amc_groups.apply(tuple, axis=1)) +non_zero_ob_groups = lmis.loc[~ob0, ["fac_name", "item_code"]].drop_duplicates() +non0_ob = lmis[['fac_name', 'item_code']].apply(tuple, axis=1).isin(non_zero_ob_groups.apply(tuple, axis=1)) + +print(f"{(amc0 & non0_amc).sum()} rows ({(amc0 & non0_amc).sum()/len(lmis) * 100:.2f}%) where AMC is zero even though the same facility recorded a non-zero AMC for a specific item" + f"for a different month in the year") +print(f"Among these, {(amc0 & cb0 & non0_amc).sum()} rows ({(amc0 & cb0 & non0_amc).sum()/len(lmis) * 100:.2f}% of " + f"total) also recorded CB as zero even though CB was non-zero for a different month") +print(f"Among these, {(amc0 & cb0 & non0_amc & ~prob0).sum()} rows " + f"({(amc0 & cb0 & non0_amc& ~prob0).sum()/len(lmis) * 100:.2f}% of " + f"total) have been recorded as having a non-zero probability of drug availability based on data from other months.") + +old_row_count = len(lmis) +drop_mask = (amc0 & ob0) | (~non0_amc) +lmis = lmis[~drop_mask] +new_row_count = len(lmis) + +print(f"{old_row_count - new_row_count} ({(old_row_count - new_row_count)/old_row_count * 100:.2f}%)" + f"rows out of {old_row_count} in the lmis data dropped due to seemingly incorrect entries") +lmis.reset_index(inplace=True, drop = True) + +# TODO assume something else about location of these facilities with missing location - eg. centroid of district? +# only 16 facilties have missing information + +# ----------------------------------- +# 1) Data exploration +# ----------------------------------- def generate_stock_adequacy_heatmap( df: pd.DataFrame, figures_path: Path = Path("figures"), @@ -214,21 +200,6 @@ def generate_stock_adequacy_heatmap( amc=("amc", "sum")) .reset_index() ) - # Identify groups (fac_name, item_code) that have any non-zero amc - nonzero_groups = agg.loc[agg["amc"] != 0, ["fac_name", "item_code"]].drop_duplicates() - - # Keep: - # - 1. all rows where amc != 0 - # - 2. rows where the (fac_name, item_code) pair never had any non-zero amc - # (because this would indicate that their AMC may in fact be zero) - # - 3. rows where both Closing balance and AMC are not zero - agg = agg[ - (agg["amc"] != 0) - | (~agg[["fac_name", "item_code"]] - .apply(tuple, axis=1) - .isin(nonzero_groups.apply(tuple, axis=1))) - ] - agg = agg[~((agg["amc"] == 0) & (agg["opening_bal"] == 0))] # ---- 3) Adequacy indicator per (month, district, item_code) ---- if include_missing_as_fail: @@ -307,7 +278,7 @@ def generate_stock_adequacy_heatmap( cmap=cmap, cbar_kws={"label": value_label}, ax=ax, - annot=True, annot_kws={"size": 10}, + annot=annotation, annot_kws={"size": 10}, vmin = 0, vmax = 100) ax.set_xlabel("Month")