jandrewjohnson · jandrewjohnson · Dec 5, 2025 · Nov 26, 2025 · Nov 26, 2025 · Nov 26, 2025
diff --git a/seals/run_seals_bonn.py b/seals/run_seals_bonn.py
@@ -0,0 +1,184 @@
+import os
+import sys
+
+import hazelbean as hb
+import pandas as pd
+
+from seals import seals_generate_base_data, seals_initialize_project, seals_main, seals_process_coarse_timeseries, seals_tasks, seals_utils, seals_visualization_tasks
+
+# TODO
+# 1. make regional projections be the change not the total. 
+# 2. decide how to handle project_dir in the project repo so it uses the git-cloned inputs.
+# 3. test the setup.bat and decide if it's dumb to put it in the src (tho with git ignore listed)
+
+
+def convert_cgebox_output_to_seals_regional_projections_input(p):
+
+
+
+    p.regional_projections_input_override_paths = {}
+    if p.run_this:
+        for index, row in p.scenarios_df.iterrows():
+            seals_utils.assign_df_row_to_object_attributes(p, row)
+
+            if p.scenario_type != 'baseline':    
+                input_path = p.regional_projections_input_path
+                output_path = os.path.join(p.cur_dir, f'regional_projections_input_pivoted_{p.exogenous_label}.csv')     
+
+                # START HERE, the override doesn't work cause it doesn't iterate over years.... use a catear?           
+                p.regional_projections_input_override_paths[p.scenario_label] = output_path
+
+                if not hb.path_exists(output_path):
+                    df = hb.df_read(input_path)
+
+                    # Step 1: Melt the DataFrame to convert year columns into rows.
+
+                    # Get the list of columns to unpivot (years)
+                    years_to_unpivot = [col for col in df.columns if col.isdigit()]
+                    # years_to_unpivot = []
+                    melted = df.melt(
+                        id_vars=[p.regions_column_label, 'LandCover'], # Assumes the land cover column is named 'LandCover'
+                        value_vars=years_to_unpivot,
+                        var_name='year',
+                        value_name='value'
+                    )
+
+                    # Step 2: Pivot the melted DataFrame.
+                    # We set the region and year as the new index, and create new columns from 'LandCover' categories.
+                    merged_pivoted = melted.pivot_table(
+                        index=[p.regions_column_label, 'year'],
+                        columns='LandCover',
+                        values='value'
+                    ).reset_index()
+
+
+                    # Now add nuts_id
+                    merged_pivoted['nuts_id'], unique_countries = pd.factorize(merged_pivoted[p.regions_column_label])
+                    merged_pivoted['nuts_id'] = merged_pivoted['nuts_id'] + 1
+
+                    # Define the columns for which the year-over-year change should be calculated
+                    land_use_columns = ['cropland', 'forest', 'grassland', 'other', 'othernat', 'urban', 'water']
+
+                    # Sort the DataFrame by 'nuts_label' and 'year' to ensure correct chronological order
+                    df_sorted = merged_pivoted.sort_values(by=['nuts_label', 'year'])
+
+                    # Group by 'nuts_label' and calculate the difference for the specified columns
+                    # .diff() calculates the difference from the previous row within each group
+                    # .fillna(0) replaces the initial NaN values with 0
+                    df_sorted[land_use_columns] = df_sorted.groupby('nuts_label')[land_use_columns].diff().fillna(0)
+
+                    # The 'df_sorted' DataFrame now contains the year-over-year change.
+                    # You can display the first few rows of the result for a specific region to verify.
+                    print("Year-over-year changes for CZ03:")
+                    print(df_sorted[df_sorted['nuts_label'] == 'CZ03'].head())            
+
+                    # multiply by 1000 because i think cgebox outputs in thousands of ha
+                    for col in land_use_columns:
+                        df_sorted[col] = df_sorted[col] * 1000
+                    # 2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050
+                    # repeat these numbers
+
+                    # Write a new file in the task dir and reassign the project attribute to the new csv
+                    hb.df_write(df_sorted, output_path)
+
+
+def build_bonn_task_tree(p):
+
+    # Define the project AOI
+    p.project_aoi_task = p.add_task(seals_tasks.project_aoi)
+    p.convert_cgebox_output_to_seals_regional_projections_input_task = p.add_task(convert_cgebox_output_to_seals_regional_projections_input)
+
+
+    ##### FINE PROCESSED INPUTS #####
+    p.fine_processed_inputs_task = p.add_task(seals_generate_base_data.fine_processed_inputs)
+    p.generated_kernels_task = p.add_task(seals_generate_base_data.generated_kernels, parent=p.fine_processed_inputs_task, creates_dir=False)
+    p.lulc_clip_task = p.add_task(seals_generate_base_data.lulc_clip, parent=p.fine_processed_inputs_task, creates_dir=False)
+    p.lulc_simplifications_task = p.add_task(seals_generate_base_data.lulc_simplifications, parent=p.fine_processed_inputs_task, creates_dir=False)
+    p.lulc_binaries_task = p.add_task(seals_generate_base_data.lulc_binaries, parent=p.fine_processed_inputs_task, creates_dir=False)
+    p.lulc_convolutions_task = p.add_task(seals_generate_base_data.lulc_convolutions, parent=p.fine_processed_inputs_task, creates_dir=False)
+
+    ##### COARSE CHANGE #####
+    p.coarse_change_task = p.add_task(seals_process_coarse_timeseries.coarse_change, skip_existing=0)
+    p.extraction_task = p.add_task(seals_process_coarse_timeseries.coarse_extraction, parent=p.coarse_change_task, run=1, skip_existing=0)
+    p.coarse_simplified_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_proportion, parent=p.coarse_change_task, skip_existing=0)
+    p.coarse_simplified_ha_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_ha, parent=p.coarse_change_task, skip_existing=0)
+    p.coarse_simplified_ha_difference_from_previous_year_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_ha_difference_from_previous_year, parent=p.coarse_change_task, skip_existing=0)
+
+    ##### REGIONAL
+    p.regional_change_task = p.add_task(seals_process_coarse_timeseries.regional_change)
+
+    ##### ALLOCATION #####
+    p.allocations_task = p.add_iterator(seals_main.allocations, skip_existing=0)
+    p.allocation_zones_task = p.add_iterator(seals_main.allocation_zones, run_in_parallel=p.run_in_parallel, parent=p.allocations_task, skip_existing=0)
+    p.allocation_task = p.add_task(seals_main.allocation, parent=p.allocation_zones_task, skip_existing=0)
+
+    ##### STITCH ZONES #####
+    p.stitched_lulc_simplified_scenarios_task = p.add_task(seals_main.stitched_lulc_simplified_scenarios)
+
+    ##### VIZUALIZE EXISTING DATA #####
+    p.visualization_task = p.add_task(seals_visualization_tasks.visualization)
+    p.lulc_pngs_task = p.add_task(seals_visualization_tasks.lulc_pngs, parent=p.visualization_task)
+
+
+
+
+main = ''
+if __name__ == '__main__':
+
+    ### ------- ENVIRONMENT SETTINGS -------------------------------
+    # Users should only need to edit lines in this section
+
+    # Create a ProjectFlow Object to organize directories and enable parallel processing.
+    p = hb.ProjectFlow()
+
+    # Assign project-level attributes to the p object (such as in p.base_data_dir = ... below)
+    # including where the project_dir and base_data are located.
+    # The project_name is used to name the project directory below. If the directory exists, each task will not recreate
+    # files that already exist.
+    p.user_dir = os.path.expanduser('~')
+    p.extra_dirs = ['Files', 'seals', 'projects']
+    p.project_name = 'bonn_esm_cgebox_seals'
+    # p.project_name = p.project_name + '_' + hb.pretty_time() # If don't you want to recreate everything each time, comment out this line.
+
+    # Based on the paths above, set the project_dir. All files will be created in this directory.
+    p.project_dir = os.path.join(p.user_dir, os.sep.join(p.extra_dirs), p.project_name)
+    p.set_project_dir(p.project_dir)
+
+    p.run_in_parallel = 1 # Must be set before building the task tree if the task tree has parralel iterator tasks.
+
+    # Build the task tree via a building function and assign it to p. IF YOU WANT TO LOOK AT THE MODEL LOGIC, INSPECT THIS FUNCTION
+    build_bonn_task_tree(p)
+
+    # Set the base data dir. The model will check here to see if it has everything it needs to run.
+    # If anything is missing, it will download it. You can use the same base_data dir across multiple projects.
+    # Additionally, if you're clever, you can move files generated in your tasks to the right base_data_dir
+    # directory so that they are available for future projects and avoids redundant processing.
+    # The final directory has to be named base_data to match the naming convention on the google cloud bucket.
+    p.base_data_dir = os.path.join(p.user_dir, 'Files/base_data')
+
+    # ProjectFlow downloads all files automatically via the p.get_path() function. If you want it to download from a different
+    # bucket than default, provide the name and credentials here. Otherwise uses default public data 'gtap_invest_seals_2023_04_21'.
+    p.data_credentials_path = None
+    p.input_bucket_name = None
+
+    ## Set defaults and generate the scenario_definitions.csv if it doesn't exist.
+    # SEALS will run based on the scenarios defined in a scenario_definitions.csv
+    # If you have not run SEALS before, SEALS will generate it in your project's input_dir.
+    # A useful way to get started is to to run SEALS on the test data without modification
+    # and then edit the scenario_definitions.csv to your project needs.   
+    p.scenario_definitions_filename = 'standard_scenarios.csv' 
+    p.scenario_definitions_path = os.path.join(p.input_dir, p.scenario_definitions_filename)
+    seals_initialize_project.initialize_scenario_definitions(p)
+
+    # Set processing resolution: determines how large of a chunk should be processed at a time. 4 deg is about max for 64gb memory systems
+    p.processing_resolution = 1.0 # In degrees. Must be in pyramid_compatible_resolutions
+
+    seals_initialize_project.set_advanced_options(p)
+
+    p.L = hb.get_logger('test_run_seals')
+    hb.log('Created ProjectFlow object at ' + p.project_dir + '\n    from script ' + p.calling_script + '\n    with base_data set at ' + p.base_data_dir)
+
+    p.execute()
+
+    result = 'Done!'
+
diff --git a/seals/seals_process_coarse_timeseries.py b/seals/seals_process_coarse_timeseries.py
@@ -60,6 +60,10 @@ def regional_change(p):
                         scenario_label = p.scenario_label    
                         output_filename_end = '_' + str(year) + '_' + str(previous_year) + '_ha_diff_' + p.exogenous_label + '_' + p.climate_label + '_' + p.model_label + '_' + p.counterfactual_label + '_regional_coarsified.tif'
                         regions_column_label = p.regions_column_label
+
+                        if getattr(p, 'regional_projections_input_override_paths', None):
+                            regional_change_classes_path = p.regional_projections_input_override_paths[p.scenario_label]
+
                         # TODOO I should have iterated on class label here, then called the util file only on that one
                         seals_utils.convert_regional_change_to_coarse(regional_change_vector_path, 
                                                                       regional_change_classes_path, 
@@ -98,7 +102,7 @@ def regional_change(p):
                                 regional_coarsified_raster = hb.as_array(regional_coarsified_path)
 
                                 # covariate_additive
-                                covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster) * 1000.0
+                                covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster)
                                 hb.save_array_as_geotiff(covariate_additive, hb.suri(output_path_template, 'covariate_additive'), current_luc_coarse_projections_path)
 
 
@@ -148,8 +152,11 @@ def regional_change(p):
 
                                     covariate_sum_shift_path = hb.suri(output_path_template, 'covariate_sum_shift')
                                     input = ((regional_coarsified_path, 1), (current_luc_coarse_projections_path, 1), (target_raster_path, 1))
+
+                                    print('WARNING! sometimes need to use *1000 here to convert from ha to m2. Check if this is desired behavior. GTAP NEEDS THIS OTHERS DONT')
                                     def op(a, b, c):
-                                        return (a - (c-b)) * 1000.0
+                                        return (a - (c-b)) 
+                                        # return (a - (c-b)) * 1000
                                     hb.raster_calculator(input, op, covariate_sum_shift_path, 7, -9999.)
 
                                 #### COVARIATE MULTIPLY SHIFT
@@ -226,15 +233,15 @@ def op(a, b, c):
                                 covariate_multiply_shift_path = hb.suri(output_path_template, 'covariate_multiply_shift')
                                 input = ((current_luc_coarse_projections_path, 1), (covariate_multiply_regional_change_sum_path, 1), (covariate_multiply_regional_change_sum_path, 1))
                                 def op(a, b, c):
-                                    return (a * (b/c)) * 1000.0
+                                    return (a * (b/c))
                                 hb.raster_calculator(input, op, covariate_multiply_shift_path, 7, -9999.)
 
                                 # This is the one i want to use so also save it as the template. Can choose from different algorithms above.
                                 alg_to_use_path = hb.suri(output_path_template, 'covariate_sum_shift')
                                 hb.path_copy(alg_to_use_path, output_path_template)
                                 5
                             # Given all of these, copy the one that we want to use to the name without a label
-
+                        # 2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050             
 
 
                 else:
@@ -338,7 +345,7 @@ def regional_change_new_test(p):
                             regional_coarsified_raster = hb.as_array(regional_coarsified_path)
 
                             # covariate_additive
-                            covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster) * 1000.0
+                            covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster)
                             hb.save_array_as_geotiff(covariate_additive, hb.suri(output_path_template, 'covariate_additive'), current_luc_coarse_projections_path)
 
 
@@ -389,7 +396,7 @@ def regional_change_new_test(p):
                                 covariate_sum_shift_path = hb.suri(output_path_template, 'covariate_sum_shift')
                                 input = ((regional_coarsified_path, 1), (current_luc_coarse_projections_path, 1), (target_raster_path, 1))
                                 def op(a, b, c):
-                                    return (a - (c-b)) * 1000
+                                    return (a - (c-b))
                                 hb.raster_calculator(input, op, covariate_sum_shift_path, 7, -9999.)
 
                             #### COVARIATE MULTIPLY SHIFT
@@ -465,7 +472,7 @@ def op(a, b, c):
                             covariate_multiply_shift_path = hb.suri(output_path_template, 'covariate_multiply_shift')
                             input = ((current_luc_coarse_projections_path, 1), (covariate_multiply_regional_change_sum_path, 1), (covariate_multiply_regional_change_sum_path, 1))
                             def op(a, b, c):
-                                return (a * (b/c)) * 1000
+                                return (a * (b/c))
                             hb.raster_calculator(input, op, covariate_multiply_shift_path, 7, -9999.)
 
                             if p.region_to_coarse_algorithm == 'covariate_additive':

diff --git a/seals/seals_tasks.py b/seals/seals_tasks.py
@@ -35,11 +35,22 @@ def project_aoi(p):
             if not p.regions_column_label in gdf.columns:
                 raise ValueError(f"The column '{p.regions_column_label}' is not found in the regions vector file: {p.regions_vector_path}. Please check the column name or the vector file.")
 
-            p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg')
-            p.aoi_label = p.aoi            
-
-            filter_column = p.regions_column_label # if it's exactly 3 characters, assume it's an ISO3 code.
-            filter_value = p.aoi
+
+            if p.aoi == 'from_regional_projections_input_path':
+                # Read the csv to get the unique AOI values
+                df = hb.df_read(p.regional_projections_input_path)
+                unique_aois = list(df[p.regions_column_label].unique())
+                filter_column = p.regions_column_label
+                filter_value = unique_aois
+                p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg')
+                p.aoi_label = p.aoi     
+            else:
+
+                p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg')
+                p.aoi_label = p.aoi            
+
+                filter_column = p.regions_column_label # if it's exactly 3 characters, assume it's an ISO3 code.
+                filter_value = p.aoi
 
 
             for current_aoi_path in hb.list_filtered_paths_nonrecursively(p.cur_dir, include_strings='aoi'):

diff --git a/seals/seals_utils.py b/seals/seals_utils.py
@@ -1508,7 +1508,11 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan
 
     if region_ids_raster_path is None:
         region_ids_raster_path = os.path.join(output_dir, 'region_ids.tif')
+
+    merged_vector_output_path = os.path.join(os.path.split(region_ids_raster_path)[0], 'regional_change_with_ids_' + scenario_label + '.gpkg')
 
+
+    merged.to_file(merged_vector_output_path, driver='GPKG')
 
     ### Rasterize the regions vector to a raster.
 
@@ -1519,7 +1523,7 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan
 
         # TODOO NOTE that here we are not using all_touched. This is a fundamental problem with coarse reclassification. Lots of the polygon will be missed. Ideally, you use all_touched=False for
         # country-country borders but all_touched=True for country-coastline boarders. Or join with EEZs?
-        hb.rasterize_to_match(regional_change_vector_path, coarse_ha_per_cell_path, region_ids_raster_path, burn_column_name=regions_column_id, burn_values=None, datatype=13, ndv=0, all_touched=False)
+        hb.rasterize_to_match(merged_vector_output_path, coarse_ha_per_cell_path, region_ids_raster_path, burn_column_name=regions_column_id, burn_values=None, datatype=13, ndv=0, all_touched=False)
 
     ### Get the number of cells per zone.