Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
243 changes: 243 additions & 0 deletions input/seals_cgebox_input/SSP1.csv

Large diffs are not rendered by default.

243 changes: 243 additions & 0 deletions input/seals_cgebox_input/SSP2.csv

Large diffs are not rendered by default.

243 changes: 243 additions & 0 deletions input/seals_cgebox_input/SSP3.csv

Large diffs are not rendered by default.

243 changes: 243 additions & 0 deletions input/seals_cgebox_input/SSP4.csv

Large diffs are not rendered by default.

243 changes: 243 additions & 0 deletions input/seals_cgebox_input/SSP5.csv

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions input/seals_cgebox_input/seals_cgebox_scenarios.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
scenario_label,scenario_type,aoi,exogenous_label,climate_label,model_label,counterfactual_label,years,baseline_reference_label,base_years,key_base_year,comparison_counterfactual_labels,time_dim_adjustment,coarse_projections_input_path,lulc_src_label,lulc_simplification_label,lulc_correspondence_path,coarse_src_label,coarse_simplification_label,coarse_correspondence_path,lc_class_varname,dimensions,calibration_parameters_source,base_year_lulc_path,regional_projections_input_path,regions_vector_path,regions_column_label
baseline,baseline,from_regional_projections_input_path,baseline,,luh2-message,,2017,,2017,2017,,add2015,luh2/raw_data/rcp45_ssp2/multiple-states_input4MIPs_landState_ScenarioMIP_UofMD-MESSAGE-ssp245-2-1-f_gn_2015-2100.nc,esa,seals7,seals/default_inputs/esa_seals7_correspondence.csv,luh2-14,seals7,seals/default_inputs/luh2-14_seals7_correspondence.csv,all_variables,time,seals/default_inputs/trained_coefficients_seals_manuscript_global_2025_3600sec.csv,lulc\esa\lulc_esa_2017.tif,ssp1.csv,europe_nuts2_2016.gpkg,nuts_label
ssp2_rcp45,bau,from_regional_projections_input_path,ssp2,rcp45,luh2-message,bau,2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050,baseline,2017,2017,,add2015,luh2/raw_data/rcp45_ssp2/multiple-states_input4MIPs_landState_ScenarioMIP_UofMD-MESSAGE-ssp245-2-1-f_gn_2015-2100.nc,esa,seals7,seals/default_inputs/esa_seals7_correspondence.csv,luh2-14,seals7,seals/default_inputs/luh2-14_seals7_correspondence.csv,all_variables,time,seals/default_inputs/trained_coefficients_seals_manuscript_global_2025_3600sec.csv,lulc\esa\lulc_esa_2017.tif,ssp2.csv,europe_nuts2_2016.gpkg,nuts_label
ssp1_rcp26,bau,from_regional_projections_input_path,ssp1,rcp26,luh2-message,bau,2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050,baseline,2017,2017,,add2015,luh2/raw_data/rcp45_ssp2/multiple-states_input4MIPs_landState_ScenarioMIP_UofMD-MESSAGE-ssp245-2-1-f_gn_2015-2100.nc,esa,seals7,seals/default_inputs/esa_seals7_correspondence.csv,luh2-14,seals7,seals/default_inputs/luh2-14_seals7_correspondence.csv,all_variables,time,seals/default_inputs/trained_coefficients_seals_manuscript_global_2025_3600sec.csv,lulc\esa\lulc_esa_2017.tif,ssp1.csv,europe_nuts2_2016.gpkg,nuts_label
ssp3_rcp70,bau,from_regional_projections_input_path,ssp3,rcp70,luh2-message,bau,2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050,baseline,2017,2017,,add2015,luh2/raw_data/rcp45_ssp2/multiple-states_input4MIPs_landState_ScenarioMIP_UofMD-MESSAGE-ssp245-2-1-f_gn_2015-2100.nc,esa,seals7,seals/default_inputs/esa_seals7_correspondence.csv,luh2-14,seals7,seals/default_inputs/luh2-14_seals7_correspondence.csv,all_variables,time,seals/default_inputs/trained_coefficients_seals_manuscript_global_2025_3600sec.csv,lulc\esa\lulc_esa_2017.tif,ssp3.csv,europe_nuts2_2016.gpkg,nuts_label
ssp4_rcp60,bau,from_regional_projections_input_path,ssp4,rcp60,luh2-message,bau,2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050,baseline,2017,2017,,add2015,luh2/raw_data/rcp45_ssp2/multiple-states_input4MIPs_landState_ScenarioMIP_UofMD-MESSAGE-ssp245-2-1-f_gn_2015-2100.nc,esa,seals7,seals/default_inputs/esa_seals7_correspondence.csv,luh2-14,seals7,seals/default_inputs/luh2-14_seals7_correspondence.csv,all_variables,time,seals/default_inputs/trained_coefficients_seals_manuscript_global_2025_3600sec.csv,lulc\esa\lulc_esa_2017.tif,ssp4.csv,europe_nuts2_2016.gpkg,nuts_label
ssp5_rcp85,bau,from_regional_projections_input_path,ssp5,rcp85,luh2-message,bau,2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050,baseline,2017,2017,,add2015,luh2/raw_data/rcp45_ssp2/multiple-states_input4MIPs_landState_ScenarioMIP_UofMD-MESSAGE-ssp245-2-1-f_gn_2015-2100.nc,esa,seals7,seals/default_inputs/esa_seals7_correspondence.csv,luh2-14,seals7,seals/default_inputs/luh2-14_seals7_correspondence.csv,all_variables,time,seals/default_inputs/trained_coefficients_seals_manuscript_global_2025_3600sec.csv,lulc\esa\lulc_esa_2017.tif,ssp5.csv,europe_nuts2_2016.gpkg,nuts_label
3 changes: 3 additions & 0 deletions input/seals_cgebox_input/seals_cgebox_scenarios_test.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
scenario_label,scenario_type,aoi,exogenous_label,climate_label,model_label,counterfactual_label,years,baseline_reference_label,base_years,key_base_year,comparison_counterfactual_labels,time_dim_adjustment,coarse_projections_input_path,lulc_src_label,lulc_simplification_label,lulc_correspondence_path,coarse_src_label,coarse_simplification_label,coarse_correspondence_path,lc_class_varname,dimensions,calibration_parameters_source,base_year_lulc_path,regional_projections_input_path,regions_vector_path,regions_column_label
baseline_luh2-message,baseline,from_regional_projections_input_path,baseline,,luh2-message,,2017,,2017,2017,,add2015,luh2/raw_data/rcp45_ssp2/multiple-states_input4MIPs_landState_ScenarioMIP_UofMD-MESSAGE-ssp245-2-1-f_gn_2015-2100.nc,esa,seals7,seals/default_inputs/esa_seals7_correspondence.csv,luh2-14,seals7,seals/default_inputs/luh2-14_seals7_correspondence.csv,all_variables,time,seals/default_inputs/default_global_coefficients.csv,lulc\esa\lulc_esa_2017.tif,ssp1.csv,europe_nuts2_2016.gpkg,nuts_label
ssp2_rcp45_luh2-message_bau,bau,from_regional_projections_input_path,ssp2,rcp45,luh2-message,bau,2050,baseline_luh2-message,2017,2017,,add2015,luh2/raw_data/rcp45_ssp2/multiple-states_input4MIPs_landState_ScenarioMIP_UofMD-MESSAGE-ssp245-2-1-f_gn_2015-2100.nc,esa,seals7,seals/default_inputs/esa_seals7_correspondence.csv,luh2-14,seals7,seals/default_inputs/luh2-14_seals7_correspondence.csv,all_variables,time,seals/default_inputs/default_global_coefficients.csv,lulc\esa\lulc_esa_2017.tif,ssp1.csv,europe_nuts2_2016.gpkg,nuts_label
228 changes: 228 additions & 0 deletions seals/run_seals_cgebox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
import os, sys, time

import hazelbean as hb
import pandas as pd

from seals import seals_generate_base_data, seals_initialize_project, seals_main, seals_process_coarse_timeseries, seals_tasks, seals_utils, seals_visualization_tasks

### ENVIRONMENT VARIABLES (nothing else should need to be edited besides here)
project_name = 'seals_cgebox_devstack' # Name of the project. Also is used to set the project dir

project_dir = os.path.join('../projects', project_name) # DEVSTACK OPTION. If you're running a standalone repo clone, you probably want to set this to just os.path.join('..')
input_dir = os.path.join(project_dir, 'input')
input_data_dir = 'input/seals_cgebox_input' # Will look in the repo or base data for this and then copy it to the input_dir above.
scenario_definitions_path = os.path.join(input_dir, 'seals_cgebox_scenarios.csv') # Path to the scenario definitions file.


def convert_cgebox_output_to_seals_regional_projections_input(p):
"""Convert CGEBox output to SEALS regional projections input format.

This function reads CGEBox output files, processes them to calculate year-over-year changes
in land use categories, and saves the transformed data in a format compatible with SEALS.

This is the only task different from the standard seals workflow and is the only task
different in the build_bonn_task_tree function.

The hard part is that because the input regional_change csvs are modified, we need also to write
a replacements dictionary p.regional_projections_input_override_paths that maps scenario labels
to the new csvs. SEALS checks for an override dictionary here to replace what is defined
in the scenarios.csv."""


if p.run_this:

# Define a dictionary to hold override paths for each scenario
p.regional_projections_input_override_paths = {}

for index, row in p.scenarios_df.iterrows():
seals_utils.assign_df_row_to_object_attributes(p, row)

if p.scenario_type != 'baseline':
input_path = p.regional_projections_input_path
output_path = os.path.join(p.cur_dir, f'regional_projections_input_pivoted_{p.exogenous_label}.csv')

# START HERE, the override doesn't work cause it doesn't iterate over years.... use a catear?
p.regional_projections_input_override_paths[p.scenario_label] = output_path

if not hb.path_exists(output_path):
df = hb.df_read(input_path)

# Step 1: Melt the DataFrame to convert year columns into rows.
# Get the list of columns to unpivot (years)
years_to_unpivot = [col for col in df.columns if col.isdigit()]
melted = df.melt(
id_vars=[p.regions_column_label, 'LandCover'], # Assumes the land cover column is named 'LandCover'
value_vars=years_to_unpivot,
var_name='year',
value_name='value'
)

# Step 2: Pivot the melted DataFrame.
# We set the region and year as the new index, and create new columns from 'LandCover' categories.
merged_pivoted = melted.pivot_table(
index=[p.regions_column_label, 'year'],
columns='LandCover',
values='value'
).reset_index()

# Now add nuts_id
merged_pivoted['nuts_id'], unique_countries = pd.factorize(merged_pivoted[p.regions_column_label])
merged_pivoted['nuts_id'] = merged_pivoted['nuts_id'] + 1

# Define the columns for which the year-over-year change should be calculated
land_use_columns = ['cropland', 'forest', 'grassland', 'other', 'othernat', 'urban', 'water']

# Sort the DataFrame by 'nuts_label' and 'year' to ensure correct chronological order
df_sorted = merged_pivoted.sort_values(by=['nuts_label', 'year'])

# Group by 'nuts_label' and calculate the difference for the specified columns
# .diff() calculates the difference from the previous row within each group
# .fillna(0) replaces the initial NaN values with 0
df_sorted[land_use_columns] = df_sorted.groupby('nuts_label')[land_use_columns].diff().fillna(0)

# The 'df_sorted' DataFrame now contains the year-over-year change.
# You can display the first few rows of the result for a specific region to verify.
print("Year-over-year changes for CZ03:")
print(df_sorted[df_sorted['nuts_label'] == 'CZ03'].head())

# multiply by 1000 because i cgebox outputs in thousands of ha
for col in land_use_columns:
df_sorted[col] = df_sorted[col] * 1000

# Write a new file in the task dir and reassign the project attribute to the new csv
hb.df_write(df_sorted, output_path)


def build_bonn_task_tree(p):

# Define the project AOI
p.project_aoi_task = p.add_task(seals_tasks.project_aoi)
p.convert_cgebox_output_to_seals_regional_projections_input_task = p.add_task(convert_cgebox_output_to_seals_regional_projections_input)


##### FINE PROCESSED INPUTS #####
p.fine_processed_inputs_task = p.add_task(seals_generate_base_data.fine_processed_inputs)
p.generated_kernels_task = p.add_task(seals_generate_base_data.generated_kernels, parent=p.fine_processed_inputs_task, creates_dir=False)
p.lulc_clip_task = p.add_task(seals_generate_base_data.lulc_clip, parent=p.fine_processed_inputs_task, creates_dir=False)
p.lulc_simplifications_task = p.add_task(seals_generate_base_data.lulc_simplifications, parent=p.fine_processed_inputs_task, creates_dir=False)
p.lulc_binaries_task = p.add_task(seals_generate_base_data.lulc_binaries, parent=p.fine_processed_inputs_task, creates_dir=False)
p.lulc_convolutions_task = p.add_task(seals_generate_base_data.lulc_convolutions, parent=p.fine_processed_inputs_task, creates_dir=False)

##### COARSE CHANGE #####
p.coarse_change_task = p.add_task(seals_process_coarse_timeseries.coarse_change, skip_existing=0)
p.extraction_task = p.add_task(seals_process_coarse_timeseries.coarse_extraction, parent=p.coarse_change_task, run=1, skip_existing=0)
p.coarse_simplified_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_proportion, parent=p.coarse_change_task, skip_existing=0)
p.coarse_simplified_ha_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_ha, parent=p.coarse_change_task, skip_existing=0)
p.coarse_simplified_ha_difference_from_previous_year_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_ha_difference_from_previous_year, parent=p.coarse_change_task, skip_existing=0)

##### REGIONAL
p.regional_change_task = p.add_task(seals_process_coarse_timeseries.regional_change)

##### ALLOCATION #####
p.allocations_task = p.add_iterator(seals_main.allocations, skip_existing=0)
p.allocation_zones_task = p.add_iterator(seals_main.allocation_zones, run_in_parallel=p.run_in_parallel, parent=p.allocations_task, skip_existing=0)
p.allocation_task = p.add_task(seals_main.allocation, parent=p.allocation_zones_task, skip_existing=0)

##### STITCH ZONES #####
p.stitched_lulc_simplified_scenarios_task = p.add_task(seals_main.stitched_lulc_simplified_scenarios)

##### VIZUALIZE EXISTING DATA #####
p.visualization_task = p.add_task(seals_visualization_tasks.visualization)
p.lulc_pngs_task = p.add_task(seals_visualization_tasks.lulc_pngs, parent=p.visualization_task)


main = ''
if __name__ == '__main__':

# Create a ProjectFlow Object to organize directories and enable parallel processing.
p = hb.ProjectFlow()

# Default locations (to be used if local vars not defined above)
p.user_dir = os.path.expanduser('~')
p.extra_dirs = ['Files', 'seals', 'projects']

# Set processing resolution: determines how large of a chunk should be processed at a time. 4 deg is about max for 64gb memory systems
p.processing_resolution = 1.0 # In degrees. Must be in pyramid_compatible_resolutions

hb.log(f'Running script {__file__} with abs {os.path.abspath(__file__)}.')

if 'project_name' in globals():
hb.log(f'Using locally set project_name: {project_name}')
p.project_name = project_name
generate_new_project_dir_with_timestamp_for_every_run = False # If true, every run goes into a new and unique folder. This can help with debuging, but also means each run will be very slow as it will not use precalculated results.
if generate_new_project_dir_with_timestamp_for_every_run:
p.project_name = p.project_name + '_' + hb.pretty_time()

if 'project_dir' in globals():
hb.log(f'Using locally set project_dir: {project_dir} with abspath {os.path.abspath(project_dir)}')
p.project_dir = project_dir
else:
p.project_dir = os.path.join(p.user_dir, os.sep.join(p.extra_dirs), p.project_name)
p.set_project_dir(p.project_dir)

if 'input_dir' in globals():
hb.log(f'Using locally set input_dir: {input_dir} with abspath {os.path.abspath(input_dir)}, But first we need to check that its different than the one implied by the project_dir')
if p.input_dir != input_dir:
hb.log(f'Overriding project input_dir {p.input_dir} with locally set input_dir: {input_dir}')
p.input_dir = input_dir
else:
pass # Just keep the one implied by the project_dir

if 'input_data_dir' in globals():
hb.log(f'Detected locally set input_data_dir: {input_data_dir} with abspath {os.path.abspath(input_data_dir)}. This happens when the data used for project setup (not the raw spatial data) is obtianed by git cloning. The assumed behavior here is that it will copy it from the repo input dir to the project input dir.')
p.input_data_dir = input_data_dir
# Copy the input data dir to the project input dir if it exists and the project input dir doesn't exist.
# When we copy to the local dir, we drop the input_projectname part:
_, target_dir = os.path.split(p.input_data_dir)
if hb.path_exists(p.input_data_dir, verbose=True):
hb.copy_file_tree_to_new_root(p.input_data_dir, p.input_dir, skip_existing=True)
hb.log(f'Copied input data from {p.input_data_dir} and target dir {target_dir}, abspath: {os.path.abspath(p.input_data_dir)} to {p.input_dir}, abspath: {os.path.abspath(p.input_dir)}.')

# Check for locally-set versions of base_data_dir, project_dir, and input_dir

# Set the base data dir. The model will check here to see if it has everything it needs to run.
# If anything is missing, it will download it. You can use the same base_data dir across multiple projects.
# Additionally, if you're clever, you can move files generated in your tasks to the right base_data_dir
# directory so that they are available for future projects and avoids redundant processing.
# The final directory has to be named base_data to match the naming convention on the google cloud bucket.
# if 'base_data_dir' in globals():
# hb.log(f'Using locally set base_data_dir: {base_data_dir}')
# p.base_data_dir = base_data_dir
# else:
# p.base_data_dir = os.path.join(p.user_dir, 'Files/base_data')
# # Actually set the base date, which will also validate that this folder is correct and not a duplicate.
p.set_base_data_dir()

## Set defaults and generate the scenario_definitions.csv if it doesn't exist.
# SEALS will run based on the scenarios defined in a scenario_definitions.csv
# If you have not run SEALS before, SEALS will generate it in your project's input_dir.
# A useful way to get started is to to run SEALS on the test data without modification
# and then edit the scenario_definitions.csv to your project needs.
if 'scenario_definitions_path' in globals():
hb.log(f'Using locally set scenarios_file_path: {scenario_definitions_path} with abspath {os.path.abspath(scenario_definitions_path)}')
p.scenario_definitions_path = scenario_definitions_path
else:
p.scenario_definitions_path = os.path.join(p.input_dir, 'scenarios.csv')

p.run_in_parallel = 1 # Must be set before building the task tree if the task tree has parralel iterator tasks.

# Build the task tree via a building function and assign it to p. IF YOU WANT TO LOOK AT THE MODEL LOGIC, INSPECT THIS FUNCTION
build_bonn_task_tree(p)

# ProjectFlow downloads all files automatically via the p.get_path() function. If you want it to download from a different
# bucket than default, provide the name and credentials here. Otherwise uses default public data 'gtap_invest_seals_2023_04_21'.
p.data_credentials_path = None
p.input_bucket_name = None


seals_initialize_project.initialize_scenario_definitions(p)

seals_initialize_project.set_advanced_options(p)

p.L = hb.get_logger('test_run_seals')
hb.log('Created ProjectFlow object at ' + p.project_dir + '\n from script ' + p.calling_script + '\n with base_data set at ' + p.base_data_dir)

p.execute()

result = 'Done!'

Loading