From b092fd730f1b95bfab02ec76955add91e4e7695b Mon Sep 17 00:00:00 2001
From: Justin Johnson <jajohns@umn.edu>
Date: Wed, 26 Nov 2025 09:41:21 -0600
Subject: [PATCH 1/6] Checkpoint from VS Code for coding agent session

---
 seals/seals_utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/seals/seals_utils.py b/seals/seals_utils.py
index 44ca65b..c0984a0 100644
--- a/seals/seals_utils.py
+++ b/seals/seals_utils.py
@@ -1529,6 +1529,8 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan
 
     ### Define the allocation of the total to individual cells
     
+    # TODO If the user provides a regional_change_classes_path that has each year as a column, instead of the current format which has 
+    
     # Creates a dict for each zone_id: to_allocate, which will be reclassified onto the zone ids.
     for year_c, year in enumerate(years):
         allocate_per_zone_dict = {}
@@ -1539,6 +1541,7 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan
                 hb.log('Processing ' + column + ' for ' + scenario_label + ',  writing to ' + output_path)
                 regions_column_id = regions_column_id.replace('label', 'id')
                 
+                # TODO Right here, we see that the merged dataframes have years in the columns (instead of a single column with year as a variable). But we actually want landuse class in the columns. Pivot this table so it works.
 
                 for i, change in merged[column].items():
                     zone_id = int(merged[regions_column_id][i])

From ca8b299299f5a4f2dcb03ddc63fc7061711d41b5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 26 Nov 2025 15:46:20 +0000
Subject: [PATCH 2/6] Implement support for year-based CSV format in
 convert_regional_change_to_coarse

Co-authored-by: jandrewjohnson <23742350+jandrewjohnson@users.noreply.github.com>
---
 seals/seals_utils.py | 145 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 115 insertions(+), 30 deletions(-)

diff --git a/seals/seals_utils.py b/seals/seals_utils.py
index c0984a0..d662097 100644
--- a/seals/seals_utils.py
+++ b/seals/seals_utils.py
@@ -1476,13 +1476,16 @@ def load_blocks_list(p, input_dir):
 
 def convert_regional_change_to_coarse(regional_change_vector_path, regional_change_classes_path, coarse_ha_per_cell_path, scenario_label, output_dir, output_filename_end, columns_to_process, regions_column_label, years, region_ids_raster_path=None,  distribution_algorithm='proportional', coarse_change_raster_path=None):
     # Converts a regional vector change map to a coarse gridded representation. 
-    # - Regiona_change_vector_path is a gpkg with the boundaries of the regions that are changing.
+    # - regional_change_vector_path is a gpkg with the boundaries of the regions that are changing.
     # - regional_change_classes_path is a csv with columns for each changing landuse class (in columns to process) that reports net change in hectares per polygon.
-    # - Coarse_ha_per_cell_path is a raster that reports the number of hectares per cell in the coarse grid. Necessary to work with unprojected data, which is our assumed form.
-    # - Output path is the path to save the output raster.
-    # - Columns to process is a list of the columns headers in the gpkg that represent net changes in LU classes.
+    #   This CSV can be in two formats:
+    #     1. Class-based format (original): columns are land-use classes (e.g., 'cropland', 'forest'), rows are regions
+    #     2. Year-based format (new): columns are years (e.g., '2030', '2050'), rows are regions + class combinations with a 'class' column
+    # - coarse_ha_per_cell_path is a raster that reports the number of hectares per cell in the coarse grid. Necessary to work with unprojected data, which is our assumed form.
+    # - output_dir is the directory where output rasters will be saved.
+    # - columns_to_process is a list of the column headers that represent net changes in LU classes.
     # - distribution_algorithm is a string that indicates how to distribute the change across the cells. default is "proportional".
-    # - Coarse_change_raster_path is an optional path that, if provided, will be combined with the regional change to produce a combined change raster.  
+    # - coarse_change_raster_path is an optional path that, if provided, will be combined with the regional change to produce a combined change raster.  
     
     # Read both inputs                          
     regional_change_vector = gpd.read_file(regional_change_vector_path)    
@@ -1529,40 +1532,122 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan
 
     ### Define the allocation of the total to individual cells
     
-    # TODO If the user provides a regional_change_classes_path that has each year as a column, instead of the current format which has 
+    # Detect if the CSV has years as columns (year-based format) or classes as columns (class-based format)
+    # Year-based format: columns are years (e.g., '2030', '2050'), rows have region + class
+    # Class-based format: columns are classes (e.g., 'cropland', 'forest'), rows have region only
+    
+    # Check if any of the columns_to_process appear in the merged dataframe columns
+    # If not, we assume it's a year-based format that needs pivoting
+    has_class_columns = any(col in merged.columns for col in columns_to_process)
+    
+    # Check if any year appears as a column (as string or int)
+    year_columns = [col for col in merged.columns if str(col).replace('.', '').replace('-', '').isdigit()]
+    has_year_columns = len(year_columns) > 0
+    
+    if not has_class_columns and has_year_columns:
+        # Year-based format detected - need to pivot the data
+        # Expected format: region_id/label, class_label, year1, year2, ..., yearN
+        # We need to transform it to: region_id/label, class1, class2, ..., classN (for each year)
+        
+        # Find the class column name (usually 'class', 'lulc_class', 'class_label', etc.)
+        class_column_candidates = ['class', 'lulc_class', 'class_label', 'land_class', 'landuse_class']
+        class_column = None
+        for candidate in class_column_candidates:
+            if candidate in merged.columns:
+                class_column = candidate
+                break
+        
+        if class_column is None:
+            # Try to find any column that might contain class information
+            for col in merged.columns:
+                if 'class' in col.lower() and col != regions_column_label:
+                    class_column = col
+                    break
+        
+        if class_column is None:
+            raise NameError('Could not detect class column in year-based format CSV. Expected columns like "class", "lulc_class", or "class_label".')
+        
+        hb.log('Detected year-based format in CSV. Pivoting data from class column: ' + class_column)
     
     # Creates a dict for each zone_id: to_allocate, which will be reclassified onto the zone ids.
     for year_c, year in enumerate(years):
         allocate_per_zone_dict = {}
-        for column in columns_to_process:
-            output_path = os.path.join(output_dir, column + output_filename_end)
+        
+        # If year-based format, we need to work with data for this specific year
+        if not has_class_columns and has_year_columns:
+            # Find the column for this year (could be string or int)
+            year_col = None
+            for col in year_columns:
+                if str(col) == str(year) or int(col) == int(year):
+                    year_col = col
+                    break
+            
+            if year_col is None:
+                hb.log('Warning: Year ' + str(year) + ' not found in CSV columns. Available years: ' + str(year_columns))
+                continue
             
-            if not hb.path_exists(output_path):
-                hb.log('Processing ' + column + ' for ' + scenario_label + ',  writing to ' + output_path)
-                regions_column_id = regions_column_id.replace('label', 'id')
+            # For each class we need to process, extract the data from the pivoted structure
+            for column in columns_to_process:
+                output_path = os.path.join(output_dir, column + output_filename_end)
                 
-                # TODO Right here, we see that the merged dataframes have years in the columns (instead of a single column with year as a variable). But we actually want landuse class in the columns. Pivot this table so it works.
-
-                for i, change in merged[column].items():
-                    zone_id = int(merged[regions_column_id][i])
+                if not hb.path_exists(output_path):
+                    hb.log('Processing ' + column + ' for year ' + str(year) + ' in ' + scenario_label + ',  writing to ' + output_path)
+                    regions_column_id = regions_column_id.replace('label', 'id')
+                    
+                    # Filter merged dataframe for this specific class
+                    class_data = merged[merged[class_column] == column]
                     
-                    if int(zone_id) in n_cells_per_zone:
-                        n_cells = n_cells_per_zone[int(zone_id)] # BAD HACK, should be generalized to know ahead of time if it's an int or string
-                    else:
-                        n_cells = 0
+                    # Now iterate over the filtered data to get regional values for this year and class
+                    for idx, row in class_data.iterrows():
+                        zone_id = int(row[regions_column_id])
+                        change = row[year_col]
+                        
+                        if int(zone_id) in n_cells_per_zone:
+                            n_cells = n_cells_per_zone[int(zone_id)]
+                        else:
+                            n_cells = 0
+                        
+                        if n_cells > 0 and change != 0 and not pd.isna(change):
+                            result = change / n_cells
+                        else:
+                            result = 0.0
+                        
+                        if 'nan' in str(result).lower() or pd.isna(result):
+                            allocate_per_zone_dict[zone_id] = 0.0
+                        else:
+                            allocate_per_zone_dict[zone_id] = result
+                    
+                    print('Allocate per zone dict for ' + column + ' (year ' + str(year) + '): ' + str(allocate_per_zone_dict))
+                    hb.reclassify_raster_hb(region_ids_raster_path, allocate_per_zone_dict, output_path, output_data_type=7, match_path=None, invoke_full_callback=False, existing_values='zero', verbose=False)
+        else:
+            # Original class-based format logic
+            for column in columns_to_process:
+                output_path = os.path.join(output_dir, column + output_filename_end)
+                
+                if not hb.path_exists(output_path):
+                    hb.log('Processing ' + column + ' for ' + scenario_label + ',  writing to ' + output_path)
+                    regions_column_id = regions_column_id.replace('label', 'id')
+
+                    for i, change in merged[column].items():
+                        zone_id = int(merged[regions_column_id][i])
                         
-                    if n_cells > 0  and change != 0:
-                        result = change / n_cells
-                    else:
-                        result = 0.0
-
-                    if 'nan' in str(result).lower():
-                        allocate_per_zone_dict[zone_id] = 0.0
-                    else: 
-                        allocate_per_zone_dict[zone_id] = result
+                        if int(zone_id) in n_cells_per_zone:
+                            n_cells = n_cells_per_zone[int(zone_id)] # BAD HACK, should be generalized to know ahead of time if it's an int or string
+                        else:
+                            n_cells = 0
                             
-                print('Allocate per zone dict for ' + column + ': ' + str(allocate_per_zone_dict))
-                hb.reclassify_raster_hb(region_ids_raster_path, allocate_per_zone_dict, output_path, output_data_type=7, match_path=None, invoke_full_callback=False, existing_values='zero', verbose=False)
+                        if n_cells > 0  and change != 0:
+                            result = change / n_cells
+                        else:
+                            result = 0.0
+
+                        if 'nan' in str(result).lower():
+                            allocate_per_zone_dict[zone_id] = 0.0
+                        else: 
+                            allocate_per_zone_dict[zone_id] = result
+                                
+                    print('Allocate per zone dict for ' + column + ': ' + str(allocate_per_zone_dict))
+                    hb.reclassify_raster_hb(region_ids_raster_path, allocate_per_zone_dict, output_path, output_data_type=7, match_path=None, invoke_full_callback=False, existing_values='zero', verbose=False)
                     
 def combine_coarsified_regional_with_coarse_estimate(coarsified_path, coarse_estimate_path, combination_algorithm, output_path):
     ### ABANDONED?

From d03ec65d506b7e75c447bcf71ad6ea72f9691bf0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 26 Nov 2025 15:50:38 +0000
Subject: [PATCH 3/6] Address code review feedback: improve error handling and
 consistency

Co-authored-by: jandrewjohnson <23742350+jandrewjohnson@users.noreply.github.com>
---
 seals/seals_utils.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/seals/seals_utils.py b/seals/seals_utils.py
index d662097..089a57e 100644
--- a/seals/seals_utils.py
+++ b/seals/seals_utils.py
@@ -1578,9 +1578,15 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan
             # Find the column for this year (could be string or int)
             year_col = None
             for col in year_columns:
-                if str(col) == str(year) or int(col) == int(year):
-                    year_col = col
-                    break
+                try:
+                    if str(col) == str(year) or int(col) == int(year):
+                        year_col = col
+                        break
+                except (ValueError, TypeError):
+                    # Skip columns that can't be converted to int
+                    if str(col) == str(year):
+                        year_col = col
+                        break
             
             if year_col is None:
                 hb.log('Warning: Year ' + str(year) + ' not found in CSV columns. Available years: ' + str(year_columns))
@@ -1607,17 +1613,17 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan
                         else:
                             n_cells = 0
                         
-                        if n_cells > 0 and change != 0 and not pd.isna(change):
+                        if n_cells > 0 and not pd.isna(change) and change != 0:
                             result = change / n_cells
                         else:
                             result = 0.0
                         
-                        if 'nan' in str(result).lower() or pd.isna(result):
+                        if pd.isna(result):
                             allocate_per_zone_dict[zone_id] = 0.0
                         else:
                             allocate_per_zone_dict[zone_id] = result
                     
-                    print('Allocate per zone dict for ' + column + ' (year ' + str(year) + '): ' + str(allocate_per_zone_dict))
+                    hb.log('Allocate per zone dict for ' + column + ' (year ' + str(year) + '): ' + str(allocate_per_zone_dict))
                     hb.reclassify_raster_hb(region_ids_raster_path, allocate_per_zone_dict, output_path, output_data_type=7, match_path=None, invoke_full_callback=False, existing_values='zero', verbose=False)
         else:
             # Original class-based format logic
@@ -1641,12 +1647,12 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan
                         else:
                             result = 0.0
 
-                        if 'nan' in str(result).lower():
+                        if pd.isna(result):
                             allocate_per_zone_dict[zone_id] = 0.0
                         else: 
                             allocate_per_zone_dict[zone_id] = result
                                 
-                    print('Allocate per zone dict for ' + column + ': ' + str(allocate_per_zone_dict))
+                    hb.log('Allocate per zone dict for ' + column + ': ' + str(allocate_per_zone_dict))
                     hb.reclassify_raster_hb(region_ids_raster_path, allocate_per_zone_dict, output_path, output_data_type=7, match_path=None, invoke_full_callback=False, existing_values='zero', verbose=False)
                     
 def combine_coarsified_regional_with_coarse_estimate(coarsified_path, coarse_estimate_path, combination_algorithm, output_path):

From f3cf2912fe4956bad58765d4842cedb8bb631a35 Mon Sep 17 00:00:00 2001
From: Justin Johnson <jajohns@umn.edu>
Date: Wed, 26 Nov 2025 10:27:48 -0600
Subject: [PATCH 4/6] Rgit log --oneline -- seals/seals_utils revert "Merge
 pull request #10 from jandrewjohnson/copilot/swift-bee"

This reverts commit f9f30301af777bbf2296079a7a7134642600ed6a, reversing
changes made to fce2da7a8a248a22b3604ac6359816557aad8efd.

'#' will be ignored, and an empty message aborts the commit.
---
 seals/seals_utils.py | 150 ++++++++-----------------------------------
 1 file changed, 28 insertions(+), 122 deletions(-)

diff --git a/seals/seals_utils.py b/seals/seals_utils.py
index 089a57e..44ca65b 100644
--- a/seals/seals_utils.py
+++ b/seals/seals_utils.py
@@ -1476,16 +1476,13 @@ def load_blocks_list(p, input_dir):
 
 def convert_regional_change_to_coarse(regional_change_vector_path, regional_change_classes_path, coarse_ha_per_cell_path, scenario_label, output_dir, output_filename_end, columns_to_process, regions_column_label, years, region_ids_raster_path=None,  distribution_algorithm='proportional', coarse_change_raster_path=None):
     # Converts a regional vector change map to a coarse gridded representation. 
-    # - regional_change_vector_path is a gpkg with the boundaries of the regions that are changing.
+    # - Regiona_change_vector_path is a gpkg with the boundaries of the regions that are changing.
     # - regional_change_classes_path is a csv with columns for each changing landuse class (in columns to process) that reports net change in hectares per polygon.
-    #   This CSV can be in two formats:
-    #     1. Class-based format (original): columns are land-use classes (e.g., 'cropland', 'forest'), rows are regions
-    #     2. Year-based format (new): columns are years (e.g., '2030', '2050'), rows are regions + class combinations with a 'class' column
-    # - coarse_ha_per_cell_path is a raster that reports the number of hectares per cell in the coarse grid. Necessary to work with unprojected data, which is our assumed form.
-    # - output_dir is the directory where output rasters will be saved.
-    # - columns_to_process is a list of the column headers that represent net changes in LU classes.
+    # - Coarse_ha_per_cell_path is a raster that reports the number of hectares per cell in the coarse grid. Necessary to work with unprojected data, which is our assumed form.
+    # - Output path is the path to save the output raster.
+    # - Columns to process is a list of the columns headers in the gpkg that represent net changes in LU classes.
     # - distribution_algorithm is a string that indicates how to distribute the change across the cells. default is "proportional".
-    # - coarse_change_raster_path is an optional path that, if provided, will be combined with the regional change to produce a combined change raster.  
+    # - Coarse_change_raster_path is an optional path that, if provided, will be combined with the regional change to produce a combined change raster.  
     
     # Read both inputs                          
     regional_change_vector = gpd.read_file(regional_change_vector_path)    
@@ -1532,128 +1529,37 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan
 
     ### Define the allocation of the total to individual cells
     
-    # Detect if the CSV has years as columns (year-based format) or classes as columns (class-based format)
-    # Year-based format: columns are years (e.g., '2030', '2050'), rows have region + class
-    # Class-based format: columns are classes (e.g., 'cropland', 'forest'), rows have region only
-    
-    # Check if any of the columns_to_process appear in the merged dataframe columns
-    # If not, we assume it's a year-based format that needs pivoting
-    has_class_columns = any(col in merged.columns for col in columns_to_process)
-    
-    # Check if any year appears as a column (as string or int)
-    year_columns = [col for col in merged.columns if str(col).replace('.', '').replace('-', '').isdigit()]
-    has_year_columns = len(year_columns) > 0
-    
-    if not has_class_columns and has_year_columns:
-        # Year-based format detected - need to pivot the data
-        # Expected format: region_id/label, class_label, year1, year2, ..., yearN
-        # We need to transform it to: region_id/label, class1, class2, ..., classN (for each year)
-        
-        # Find the class column name (usually 'class', 'lulc_class', 'class_label', etc.)
-        class_column_candidates = ['class', 'lulc_class', 'class_label', 'land_class', 'landuse_class']
-        class_column = None
-        for candidate in class_column_candidates:
-            if candidate in merged.columns:
-                class_column = candidate
-                break
-        
-        if class_column is None:
-            # Try to find any column that might contain class information
-            for col in merged.columns:
-                if 'class' in col.lower() and col != regions_column_label:
-                    class_column = col
-                    break
-        
-        if class_column is None:
-            raise NameError('Could not detect class column in year-based format CSV. Expected columns like "class", "lulc_class", or "class_label".')
-        
-        hb.log('Detected year-based format in CSV. Pivoting data from class column: ' + class_column)
-    
     # Creates a dict for each zone_id: to_allocate, which will be reclassified onto the zone ids.
     for year_c, year in enumerate(years):
         allocate_per_zone_dict = {}
-        
-        # If year-based format, we need to work with data for this specific year
-        if not has_class_columns and has_year_columns:
-            # Find the column for this year (could be string or int)
-            year_col = None
-            for col in year_columns:
-                try:
-                    if str(col) == str(year) or int(col) == int(year):
-                        year_col = col
-                        break
-                except (ValueError, TypeError):
-                    # Skip columns that can't be converted to int
-                    if str(col) == str(year):
-                        year_col = col
-                        break
-            
-            if year_col is None:
-                hb.log('Warning: Year ' + str(year) + ' not found in CSV columns. Available years: ' + str(year_columns))
-                continue
+        for column in columns_to_process:
+            output_path = os.path.join(output_dir, column + output_filename_end)
             
-            # For each class we need to process, extract the data from the pivoted structure
-            for column in columns_to_process:
-                output_path = os.path.join(output_dir, column + output_filename_end)
+            if not hb.path_exists(output_path):
+                hb.log('Processing ' + column + ' for ' + scenario_label + ',  writing to ' + output_path)
+                regions_column_id = regions_column_id.replace('label', 'id')
                 
-                if not hb.path_exists(output_path):
-                    hb.log('Processing ' + column + ' for year ' + str(year) + ' in ' + scenario_label + ',  writing to ' + output_path)
-                    regions_column_id = regions_column_id.replace('label', 'id')
-                    
-                    # Filter merged dataframe for this specific class
-                    class_data = merged[merged[class_column] == column]
-                    
-                    # Now iterate over the filtered data to get regional values for this year and class
-                    for idx, row in class_data.iterrows():
-                        zone_id = int(row[regions_column_id])
-                        change = row[year_col]
-                        
-                        if int(zone_id) in n_cells_per_zone:
-                            n_cells = n_cells_per_zone[int(zone_id)]
-                        else:
-                            n_cells = 0
-                        
-                        if n_cells > 0 and not pd.isna(change) and change != 0:
-                            result = change / n_cells
-                        else:
-                            result = 0.0
-                        
-                        if pd.isna(result):
-                            allocate_per_zone_dict[zone_id] = 0.0
-                        else:
-                            allocate_per_zone_dict[zone_id] = result
-                    
-                    hb.log('Allocate per zone dict for ' + column + ' (year ' + str(year) + '): ' + str(allocate_per_zone_dict))
-                    hb.reclassify_raster_hb(region_ids_raster_path, allocate_per_zone_dict, output_path, output_data_type=7, match_path=None, invoke_full_callback=False, existing_values='zero', verbose=False)
-        else:
-            # Original class-based format logic
-            for column in columns_to_process:
-                output_path = os.path.join(output_dir, column + output_filename_end)
-                
-                if not hb.path_exists(output_path):
-                    hb.log('Processing ' + column + ' for ' + scenario_label + ',  writing to ' + output_path)
-                    regions_column_id = regions_column_id.replace('label', 'id')
 
-                    for i, change in merged[column].items():
-                        zone_id = int(merged[regions_column_id][i])
+                for i, change in merged[column].items():
+                    zone_id = int(merged[regions_column_id][i])
+                    
+                    if int(zone_id) in n_cells_per_zone:
+                        n_cells = n_cells_per_zone[int(zone_id)] # BAD HACK, should be generalized to know ahead of time if it's an int or string
+                    else:
+                        n_cells = 0
                         
-                        if int(zone_id) in n_cells_per_zone:
-                            n_cells = n_cells_per_zone[int(zone_id)] # BAD HACK, should be generalized to know ahead of time if it's an int or string
-                        else:
-                            n_cells = 0
+                    if n_cells > 0  and change != 0:
+                        result = change / n_cells
+                    else:
+                        result = 0.0
+
+                    if 'nan' in str(result).lower():
+                        allocate_per_zone_dict[zone_id] = 0.0
+                    else: 
+                        allocate_per_zone_dict[zone_id] = result
                             
-                        if n_cells > 0  and change != 0:
-                            result = change / n_cells
-                        else:
-                            result = 0.0
-
-                        if pd.isna(result):
-                            allocate_per_zone_dict[zone_id] = 0.0
-                        else: 
-                            allocate_per_zone_dict[zone_id] = result
-                                
-                    hb.log('Allocate per zone dict for ' + column + ': ' + str(allocate_per_zone_dict))
-                    hb.reclassify_raster_hb(region_ids_raster_path, allocate_per_zone_dict, output_path, output_data_type=7, match_path=None, invoke_full_callback=False, existing_values='zero', verbose=False)
+                print('Allocate per zone dict for ' + column + ': ' + str(allocate_per_zone_dict))
+                hb.reclassify_raster_hb(region_ids_raster_path, allocate_per_zone_dict, output_path, output_data_type=7, match_path=None, invoke_full_callback=False, existing_values='zero', verbose=False)
                     
 def combine_coarsified_regional_with_coarse_estimate(coarsified_path, coarse_estimate_path, combination_algorithm, output_path):
     ### ABANDONED?

From 42626aac3bfa286ba28394a27ffc7b38f58de4d4 Mon Sep 17 00:00:00 2001
From: Justin Johnson <jajohns@umn.edu>
Date: Wed, 26 Nov 2025 15:01:47 -0600
Subject: [PATCH 5/6] Enhance regional change functionality: add support for
 input override path and improve AOI handling in project_aoi

---
 seals/run_seals_bonn.py                  | 149 +++++++++++++++++++++++
 seals/seals_process_coarse_timeseries.py |   4 +
 seals/seals_tasks.py                     |  21 +++-
 3 files changed, 169 insertions(+), 5 deletions(-)
 create mode 100644 seals/run_seals_bonn.py

diff --git a/seals/run_seals_bonn.py b/seals/run_seals_bonn.py
new file mode 100644
index 0000000..a5d6e66
--- /dev/null
+++ b/seals/run_seals_bonn.py
@@ -0,0 +1,149 @@
+import os
+import sys
+
+import hazelbean as hb
+import pandas as pd
+
+from seals import seals_generate_base_data, seals_initialize_project, seals_main, seals_process_coarse_timeseries, seals_tasks, seals_visualization_tasks
+
+
+
+def convert_cgebox_output_to_seals_regional_projections_input(p):
+    
+    input_path = p.regional_projections_input_path
+    output_path = os.path.join(p.cur_dir, 'regional_projections_input_pivoted.csv')
+    p.regional_projections_input_override_path = output_path
+    
+    if p.run_this:
+        
+        if not hb.path_exists(output_path):
+            df = hb.df_read(input_path)
+            
+            # Step 1: Melt the DataFrame to convert year columns into rows.
+
+            # Get the list of columns to unpivot (years)
+            years_to_unpivot = [col for col in df.columns if col.isdigit()]
+            # years_to_unpivot = []
+            melted = df.melt(
+                id_vars=[p.regions_column_label, 'LandCover'], # Assumes the land cover column is named 'LandCover'
+                value_vars=years_to_unpivot,
+                var_name='year',
+                value_name='value'
+            )
+
+            # Step 2: Pivot the melted DataFrame.
+            # We set the region and year as the new index, and create new columns from 'LandCover' categories.
+            merged_pivoted = melted.pivot_table(
+                index=[p.regions_column_label, 'year'],
+                columns='LandCover',
+                values='value'
+            ).reset_index()
+
+            
+            # Now add nuts_id
+            merged_pivoted['nuts_id'], unique_countries = pd.factorize(merged_pivoted[p.regions_column_label])
+            
+            # Write a new file in the task dir and reassign the project attribute to the new csv
+            hb.df_write(merged_pivoted, output_path)
+        
+        
+def build_bonn_task_tree(p):
+
+    # Define the project AOI
+    p.project_aoi_task = p.add_task(seals_tasks.project_aoi)
+    p.convert_cgebox_output_to_seals_regional_projections_input_task = p.add_task(convert_cgebox_output_to_seals_regional_projections_input)
+    
+
+    ##### FINE PROCESSED INPUTS #####
+    p.fine_processed_inputs_task = p.add_task(seals_generate_base_data.fine_processed_inputs)
+    p.generated_kernels_task = p.add_task(seals_generate_base_data.generated_kernels, parent=p.fine_processed_inputs_task, creates_dir=False)
+    p.lulc_clip_task = p.add_task(seals_generate_base_data.lulc_clip, parent=p.fine_processed_inputs_task, creates_dir=False)
+    p.lulc_simplifications_task = p.add_task(seals_generate_base_data.lulc_simplifications, parent=p.fine_processed_inputs_task, creates_dir=False)
+    p.lulc_binaries_task = p.add_task(seals_generate_base_data.lulc_binaries, parent=p.fine_processed_inputs_task, creates_dir=False)
+    p.lulc_convolutions_task = p.add_task(seals_generate_base_data.lulc_convolutions, parent=p.fine_processed_inputs_task, creates_dir=False)
+
+    ##### COARSE CHANGE #####
+    p.coarse_change_task = p.add_task(seals_process_coarse_timeseries.coarse_change, skip_existing=0)
+    p.extraction_task = p.add_task(seals_process_coarse_timeseries.coarse_extraction, parent=p.coarse_change_task, run=1, skip_existing=0)
+    p.coarse_simplified_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_proportion, parent=p.coarse_change_task, skip_existing=0)
+    p.coarse_simplified_ha_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_ha, parent=p.coarse_change_task, skip_existing=0)
+    p.coarse_simplified_ha_difference_from_previous_year_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_ha_difference_from_previous_year, parent=p.coarse_change_task, skip_existing=0)
+
+    ##### REGIONAL
+    p.regional_change_task = p.add_task(seals_process_coarse_timeseries.regional_change)
+
+    ##### ALLOCATION #####
+    p.allocations_task = p.add_iterator(seals_main.allocations, skip_existing=0)
+    p.allocation_zones_task = p.add_iterator(seals_main.allocation_zones, run_in_parallel=p.run_in_parallel, parent=p.allocations_task, skip_existing=0)
+    p.allocation_task = p.add_task(seals_main.allocation, parent=p.allocation_zones_task, skip_existing=0)
+
+    ##### STITCH ZONES #####
+    p.stitched_lulc_simplified_scenarios_task = p.add_task(seals_main.stitched_lulc_simplified_scenarios)
+
+    ##### VIZUALIZE EXISTING DATA #####
+    p.visualization_task = p.add_task(seals_visualization_tasks.visualization)
+    p.lulc_pngs_task = p.add_task(seals_visualization_tasks.lulc_pngs, parent=p.visualization_task)
+
+
+
+
+main = ''
+if __name__ == '__main__':
+
+    ### ------- ENVIRONMENT SETTINGS -------------------------------
+    # Users should only need to edit lines in this section
+
+    # Create a ProjectFlow Object to organize directories and enable parallel processing.
+    p = hb.ProjectFlow()
+
+    # Assign project-level attributes to the p object (such as in p.base_data_dir = ... below)
+    # including where the project_dir and base_data are located.
+    # The project_name is used to name the project directory below. If the directory exists, each task will not recreate
+    # files that already exist.
+    p.user_dir = os.path.expanduser('~')
+    p.extra_dirs = ['Files', 'seals', 'projects']
+    p.project_name = 'bonn_esm_cgebox_seals'
+    # p.project_name = p.project_name + '_' + hb.pretty_time() # If don't you want to recreate everything each time, comment out this line.
+
+    # Based on the paths above, set the project_dir. All files will be created in this directory.
+    p.project_dir = os.path.join(p.user_dir, os.sep.join(p.extra_dirs), p.project_name)
+    p.set_project_dir(p.project_dir)
+
+    p.run_in_parallel = 1 # Must be set before building the task tree if the task tree has parralel iterator tasks.
+
+    # Build the task tree via a building function and assign it to p. IF YOU WANT TO LOOK AT THE MODEL LOGIC, INSPECT THIS FUNCTION
+    build_bonn_task_tree(p)
+
+    # Set the base data dir. The model will check here to see if it has everything it needs to run.
+    # If anything is missing, it will download it. You can use the same base_data dir across multiple projects.
+    # Additionally, if you're clever, you can move files generated in your tasks to the right base_data_dir
+    # directory so that they are available for future projects and avoids redundant processing.
+    # The final directory has to be named base_data to match the naming convention on the google cloud bucket.
+    p.base_data_dir = os.path.join(p.user_dir, 'Files/base_data')
+
+    # ProjectFlow downloads all files automatically via the p.get_path() function. If you want it to download from a different
+    # bucket than default, provide the name and credentials here. Otherwise uses default public data 'gtap_invest_seals_2023_04_21'.
+    p.data_credentials_path = None
+    p.input_bucket_name = None
+
+    ## Set defaults and generate the scenario_definitions.csv if it doesn't exist.
+    # SEALS will run based on the scenarios defined in a scenario_definitions.csv
+    # If you have not run SEALS before, SEALS will generate it in your project's input_dir.
+    # A useful way to get started is to to run SEALS on the test data without modification
+    # and then edit the scenario_definitions.csv to your project needs.   
+    p.scenario_definitions_filename = 'standard_scenarios.csv' 
+    p.scenario_definitions_path = os.path.join(p.input_dir, p.scenario_definitions_filename)
+    seals_initialize_project.initialize_scenario_definitions(p)
+        
+    # Set processing resolution: determines how large of a chunk should be processed at a time. 4 deg is about max for 64gb memory systems
+    p.processing_resolution = 1.0 # In degrees. Must be in pyramid_compatible_resolutions
+
+    seals_initialize_project.set_advanced_options(p)
+
+    p.L = hb.get_logger('test_run_seals')
+    hb.log('Created ProjectFlow object at ' + p.project_dir + '\n    from script ' + p.calling_script + '\n    with base_data set at ' + p.base_data_dir)
+
+    p.execute()
+
+    result = 'Done!'
+
diff --git a/seals/seals_process_coarse_timeseries.py b/seals/seals_process_coarse_timeseries.py
index 14a0d3d..13ab182 100644
--- a/seals/seals_process_coarse_timeseries.py
+++ b/seals/seals_process_coarse_timeseries.py
@@ -60,6 +60,10 @@ def regional_change(p):
                         scenario_label = p.scenario_label    
                         output_filename_end = '_' + str(year) + '_' + str(previous_year) + '_ha_diff_' + p.exogenous_label + '_' + p.climate_label + '_' + p.model_label + '_' + p.counterfactual_label + '_regional_coarsified.tif'
                         regions_column_label = p.regions_column_label
+                        
+                        if getattr(p, 'regional_projections_input_override_path', None):
+                            regional_change_classes_path = p.regional_projections_input_override_path
+                        
                         # TODOO I should have iterated on class label here, then called the util file only on that one
                         seals_utils.convert_regional_change_to_coarse(regional_change_vector_path, 
                                                                       regional_change_classes_path, 
diff --git a/seals/seals_tasks.py b/seals/seals_tasks.py
index e1fa90e..a47f0b0 100644
--- a/seals/seals_tasks.py
+++ b/seals/seals_tasks.py
@@ -35,11 +35,22 @@ def project_aoi(p):
             if not p.regions_column_label in gdf.columns:
                 raise ValueError(f"The column '{p.regions_column_label}' is not found in the regions vector file: {p.regions_vector_path}. Please check the column name or the vector file.")
 
-            p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg')
-            p.aoi_label = p.aoi            
-        
-            filter_column = p.regions_column_label # if it's exactly 3 characters, assume it's an ISO3 code.
-            filter_value = p.aoi
+
+            if p.aoi == 'from_regional_projections_input_path':
+                # Read the csv to get the unique AOI values
+                df = hb.df_read(p.regional_projections_input_path)
+                unique_aois = list(df[p.regions_column_label].unique())
+                filter_column = p.regions_column_label
+                filter_value = unique_aois
+                p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg')
+                p.aoi_label = p.aoi     
+            else:
+                
+                p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg')
+                p.aoi_label = p.aoi            
+            
+                filter_column = p.regions_column_label # if it's exactly 3 characters, assume it's an ISO3 code.
+                filter_value = p.aoi
                
             
             for current_aoi_path in hb.list_filtered_paths_nonrecursively(p.cur_dir, include_strings='aoi'):

From e5bed421ddd87e1e6f19a7c1e0afd84787a0aa7b Mon Sep 17 00:00:00 2001
From: Justin Johnson <jajohns@umn.edu>
Date: Fri, 5 Dec 2025 09:23:42 -0600
Subject: [PATCH 6/6] bonn done in devstack but not in release script. waiting
 for final feedback from bonn team.

---
 seals/run_seals_bonn.py                  | 105 +++++++++++++++--------
 seals/seals_process_coarse_timeseries.py |  21 +++--
 seals/seals_utils.py                     |   6 +-
 3 files changed, 87 insertions(+), 45 deletions(-)

diff --git a/seals/run_seals_bonn.py b/seals/run_seals_bonn.py
index a5d6e66..7dbcf46 100644
--- a/seals/run_seals_bonn.py
+++ b/seals/run_seals_bonn.py
@@ -4,49 +4,84 @@
 import hazelbean as hb
 import pandas as pd
 
-from seals import seals_generate_base_data, seals_initialize_project, seals_main, seals_process_coarse_timeseries, seals_tasks, seals_visualization_tasks
+from seals import seals_generate_base_data, seals_initialize_project, seals_main, seals_process_coarse_timeseries, seals_tasks, seals_utils, seals_visualization_tasks
 
+# TODO
+# 1. make regional projections be the change not the total. 
+# 2. decide how to handle project_dir in the project repo so it uses the git-cloned inputs.
+# 3. test the setup.bat and decide if it's dumb to put it in the src (tho with git ignore listed)
 
 
 def convert_cgebox_output_to_seals_regional_projections_input(p):
     
-    input_path = p.regional_projections_input_path
-    output_path = os.path.join(p.cur_dir, 'regional_projections_input_pivoted.csv')
-    p.regional_projections_input_override_path = output_path
+
     
+    p.regional_projections_input_override_paths = {}
     if p.run_this:
-        
-        if not hb.path_exists(output_path):
-            df = hb.df_read(input_path)
-            
-            # Step 1: Melt the DataFrame to convert year columns into rows.
-
-            # Get the list of columns to unpivot (years)
-            years_to_unpivot = [col for col in df.columns if col.isdigit()]
-            # years_to_unpivot = []
-            melted = df.melt(
-                id_vars=[p.regions_column_label, 'LandCover'], # Assumes the land cover column is named 'LandCover'
-                value_vars=years_to_unpivot,
-                var_name='year',
-                value_name='value'
-            )
-
-            # Step 2: Pivot the melted DataFrame.
-            # We set the region and year as the new index, and create new columns from 'LandCover' categories.
-            merged_pivoted = melted.pivot_table(
-                index=[p.regions_column_label, 'year'],
-                columns='LandCover',
-                values='value'
-            ).reset_index()
-
+        for index, row in p.scenarios_df.iterrows():
+            seals_utils.assign_df_row_to_object_attributes(p, row)
+
+            if p.scenario_type != 'baseline':    
+                input_path = p.regional_projections_input_path
+                output_path = os.path.join(p.cur_dir, f'regional_projections_input_pivoted_{p.exogenous_label}.csv')     
+                
+                # START HERE, the override doesn't work cause it doesn't iterate over years.... use a catear?           
+                p.regional_projections_input_override_paths[p.scenario_label] = output_path
+                    
+                if not hb.path_exists(output_path):
+                    df = hb.df_read(input_path)
+                    
+                    # Step 1: Melt the DataFrame to convert year columns into rows.
+
+                    # Get the list of columns to unpivot (years)
+                    years_to_unpivot = [col for col in df.columns if col.isdigit()]
+                    # years_to_unpivot = []
+                    melted = df.melt(
+                        id_vars=[p.regions_column_label, 'LandCover'], # Assumes the land cover column is named 'LandCover'
+                        value_vars=years_to_unpivot,
+                        var_name='year',
+                        value_name='value'
+                    )
+
+                    # Step 2: Pivot the melted DataFrame.
+                    # We set the region and year as the new index, and create new columns from 'LandCover' categories.
+                    merged_pivoted = melted.pivot_table(
+                        index=[p.regions_column_label, 'year'],
+                        columns='LandCover',
+                        values='value'
+                    ).reset_index()
+
+                    
+                    # Now add nuts_id
+                    merged_pivoted['nuts_id'], unique_countries = pd.factorize(merged_pivoted[p.regions_column_label])
+                    merged_pivoted['nuts_id'] = merged_pivoted['nuts_id'] + 1
+                    
+                    # Define the columns for which the year-over-year change should be calculated
+                    land_use_columns = ['cropland', 'forest', 'grassland', 'other', 'othernat', 'urban', 'water']
+
+                    # Sort the DataFrame by 'nuts_label' and 'year' to ensure correct chronological order
+                    df_sorted = merged_pivoted.sort_values(by=['nuts_label', 'year'])
+
+                    # Group by 'nuts_label' and calculate the difference for the specified columns
+                    # .diff() calculates the difference from the previous row within each group
+                    # .fillna(0) replaces the initial NaN values with 0
+                    df_sorted[land_use_columns] = df_sorted.groupby('nuts_label')[land_use_columns].diff().fillna(0)
+
+                    # The 'df_sorted' DataFrame now contains the year-over-year change.
+                    # You can display the first few rows of the result for a specific region to verify.
+                    print("Year-over-year changes for CZ03:")
+                    print(df_sorted[df_sorted['nuts_label'] == 'CZ03'].head())            
+                    
+                    # multiply by 1000 because i think cgebox outputs in thousands of ha
+                    for col in land_use_columns:
+                        df_sorted[col] = df_sorted[col] * 1000
+                    # 2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050
+                    # repeat these numbers
+                    
+                    # Write a new file in the task dir and reassign the project attribute to the new csv
+                    hb.df_write(df_sorted, output_path)
+                
             
-            # Now add nuts_id
-            merged_pivoted['nuts_id'], unique_countries = pd.factorize(merged_pivoted[p.regions_column_label])
-            
-            # Write a new file in the task dir and reassign the project attribute to the new csv
-            hb.df_write(merged_pivoted, output_path)
-        
-        
 def build_bonn_task_tree(p):
 
     # Define the project AOI
diff --git a/seals/seals_process_coarse_timeseries.py b/seals/seals_process_coarse_timeseries.py
index 13ab182..03b0233 100644
--- a/seals/seals_process_coarse_timeseries.py
+++ b/seals/seals_process_coarse_timeseries.py
@@ -61,8 +61,8 @@ def regional_change(p):
                         output_filename_end = '_' + str(year) + '_' + str(previous_year) + '_ha_diff_' + p.exogenous_label + '_' + p.climate_label + '_' + p.model_label + '_' + p.counterfactual_label + '_regional_coarsified.tif'
                         regions_column_label = p.regions_column_label
                         
-                        if getattr(p, 'regional_projections_input_override_path', None):
-                            regional_change_classes_path = p.regional_projections_input_override_path
+                        if getattr(p, 'regional_projections_input_override_paths', None):
+                            regional_change_classes_path = p.regional_projections_input_override_paths[p.scenario_label]
                         
                         # TODOO I should have iterated on class label here, then called the util file only on that one
                         seals_utils.convert_regional_change_to_coarse(regional_change_vector_path, 
@@ -102,7 +102,7 @@ def regional_change(p):
                                 regional_coarsified_raster = hb.as_array(regional_coarsified_path)
                                 
                                 # covariate_additive
-                                covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster) * 1000.0
+                                covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster)
                                 hb.save_array_as_geotiff(covariate_additive, hb.suri(output_path_template, 'covariate_additive'), current_luc_coarse_projections_path)
                             
 
@@ -152,8 +152,11 @@ def regional_change(p):
                                 
                                     covariate_sum_shift_path = hb.suri(output_path_template, 'covariate_sum_shift')
                                     input = ((regional_coarsified_path, 1), (current_luc_coarse_projections_path, 1), (target_raster_path, 1))
+                                    
+                                    print('WARNING! sometimes need to use *1000 here to convert from ha to m2. Check if this is desired behavior. GTAP NEEDS THIS OTHERS DONT')
                                     def op(a, b, c):
-                                        return (a - (c-b)) * 1000.0
+                                        return (a - (c-b)) 
+                                        # return (a - (c-b)) * 1000
                                     hb.raster_calculator(input, op, covariate_sum_shift_path, 7, -9999.)
 
                                 #### COVARIATE MULTIPLY SHIFT
@@ -230,7 +233,7 @@ def op(a, b, c):
                                 covariate_multiply_shift_path = hb.suri(output_path_template, 'covariate_multiply_shift')
                                 input = ((current_luc_coarse_projections_path, 1), (covariate_multiply_regional_change_sum_path, 1), (covariate_multiply_regional_change_sum_path, 1))
                                 def op(a, b, c):
-                                    return (a * (b/c)) * 1000.0
+                                    return (a * (b/c))
                                 hb.raster_calculator(input, op, covariate_multiply_shift_path, 7, -9999.)
                                 
                                 # This is the one i want to use so also save it as the template. Can choose from different algorithms above.
@@ -238,7 +241,7 @@ def op(a, b, c):
                                 hb.path_copy(alg_to_use_path, output_path_template)
                                 5
                             # Given all of these, copy the one that we want to use to the name without a label
-                                    
+                        # 2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050             
                                 
 
                 else:
@@ -342,7 +345,7 @@ def regional_change_new_test(p):
                             regional_coarsified_raster = hb.as_array(regional_coarsified_path)
                             
                             # covariate_additive
-                            covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster) * 1000.0
+                            covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster)
                             hb.save_array_as_geotiff(covariate_additive, hb.suri(output_path_template, 'covariate_additive'), current_luc_coarse_projections_path)
                            
 
@@ -393,7 +396,7 @@ def regional_change_new_test(p):
                                 covariate_sum_shift_path = hb.suri(output_path_template, 'covariate_sum_shift')
                                 input = ((regional_coarsified_path, 1), (current_luc_coarse_projections_path, 1), (target_raster_path, 1))
                                 def op(a, b, c):
-                                    return (a - (c-b)) * 1000
+                                    return (a - (c-b))
                                 hb.raster_calculator(input, op, covariate_sum_shift_path, 7, -9999.)
 
                             #### COVARIATE MULTIPLY SHIFT
@@ -469,7 +472,7 @@ def op(a, b, c):
                             covariate_multiply_shift_path = hb.suri(output_path_template, 'covariate_multiply_shift')
                             input = ((current_luc_coarse_projections_path, 1), (covariate_multiply_regional_change_sum_path, 1), (covariate_multiply_regional_change_sum_path, 1))
                             def op(a, b, c):
-                                return (a * (b/c)) * 1000
+                                return (a * (b/c))
                             hb.raster_calculator(input, op, covariate_multiply_shift_path, 7, -9999.)
 
                             if p.region_to_coarse_algorithm == 'covariate_additive':
diff --git a/seals/seals_utils.py b/seals/seals_utils.py
index 44ca65b..1e8c1d4 100644
--- a/seals/seals_utils.py
+++ b/seals/seals_utils.py
@@ -1508,7 +1508,11 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan
     
     if region_ids_raster_path is None:
         region_ids_raster_path = os.path.join(output_dir, 'region_ids.tif')
+        
+    merged_vector_output_path = os.path.join(os.path.split(region_ids_raster_path)[0], 'regional_change_with_ids_' + scenario_label + '.gpkg')
     
+
+    merged.to_file(merged_vector_output_path, driver='GPKG')
     
     ### Rasterize the regions vector to a raster.
     
@@ -1519,7 +1523,7 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan
 
         # TODOO NOTE that here we are not using all_touched. This is a fundamental problem with coarse reclassification. Lots of the polygon will be missed. Ideally, you use all_touched=False for
         # country-country borders but all_touched=True for country-coastline boarders. Or join with EEZs?
-        hb.rasterize_to_match(regional_change_vector_path, coarse_ha_per_cell_path, region_ids_raster_path, burn_column_name=regions_column_id, burn_values=None, datatype=13, ndv=0, all_touched=False)
+        hb.rasterize_to_match(merged_vector_output_path, coarse_ha_per_cell_path, region_ids_raster_path, burn_column_name=regions_column_id, burn_values=None, datatype=13, ndv=0, all_touched=False)
 
     ### Get the number of cells per zone.