Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions seals/run_seals_bonn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
import os
import sys

import hazelbean as hb
import pandas as pd

from seals import seals_generate_base_data, seals_initialize_project, seals_main, seals_process_coarse_timeseries, seals_tasks, seals_utils, seals_visualization_tasks

# TODO
# 1. make regional projections be the change not the total.
# 2. decide how to handle project_dir in the project repo so it uses the git-cloned inputs.
# 3. test the setup.bat and decide if it's dumb to put it in the src (tho with git ignore listed)


def convert_cgebox_output_to_seals_regional_projections_input(p):



p.regional_projections_input_override_paths = {}
if p.run_this:
for index, row in p.scenarios_df.iterrows():
seals_utils.assign_df_row_to_object_attributes(p, row)

if p.scenario_type != 'baseline':
input_path = p.regional_projections_input_path
output_path = os.path.join(p.cur_dir, f'regional_projections_input_pivoted_{p.exogenous_label}.csv')

# START HERE, the override doesn't work cause it doesn't iterate over years.... use a catear?
p.regional_projections_input_override_paths[p.scenario_label] = output_path

if not hb.path_exists(output_path):
df = hb.df_read(input_path)

# Step 1: Melt the DataFrame to convert year columns into rows.

# Get the list of columns to unpivot (years)
years_to_unpivot = [col for col in df.columns if col.isdigit()]
# years_to_unpivot = []
melted = df.melt(
id_vars=[p.regions_column_label, 'LandCover'], # Assumes the land cover column is named 'LandCover'
value_vars=years_to_unpivot,
var_name='year',
value_name='value'
)

# Step 2: Pivot the melted DataFrame.
# We set the region and year as the new index, and create new columns from 'LandCover' categories.
merged_pivoted = melted.pivot_table(
index=[p.regions_column_label, 'year'],
columns='LandCover',
values='value'
).reset_index()


# Now add nuts_id
merged_pivoted['nuts_id'], unique_countries = pd.factorize(merged_pivoted[p.regions_column_label])
merged_pivoted['nuts_id'] = merged_pivoted['nuts_id'] + 1

# Define the columns for which the year-over-year change should be calculated
land_use_columns = ['cropland', 'forest', 'grassland', 'other', 'othernat', 'urban', 'water']

# Sort the DataFrame by 'nuts_label' and 'year' to ensure correct chronological order
df_sorted = merged_pivoted.sort_values(by=['nuts_label', 'year'])

# Group by 'nuts_label' and calculate the difference for the specified columns
# .diff() calculates the difference from the previous row within each group
# .fillna(0) replaces the initial NaN values with 0
df_sorted[land_use_columns] = df_sorted.groupby('nuts_label')[land_use_columns].diff().fillna(0)

# The 'df_sorted' DataFrame now contains the year-over-year change.
# You can display the first few rows of the result for a specific region to verify.
print("Year-over-year changes for CZ03:")
print(df_sorted[df_sorted['nuts_label'] == 'CZ03'].head())

# multiply by 1000 because i think cgebox outputs in thousands of ha
for col in land_use_columns:
df_sorted[col] = df_sorted[col] * 1000
# 2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050
# repeat these numbers

# Write a new file in the task dir and reassign the project attribute to the new csv
hb.df_write(df_sorted, output_path)


def build_bonn_task_tree(p):

# Define the project AOI
p.project_aoi_task = p.add_task(seals_tasks.project_aoi)
p.convert_cgebox_output_to_seals_regional_projections_input_task = p.add_task(convert_cgebox_output_to_seals_regional_projections_input)


##### FINE PROCESSED INPUTS #####
p.fine_processed_inputs_task = p.add_task(seals_generate_base_data.fine_processed_inputs)
p.generated_kernels_task = p.add_task(seals_generate_base_data.generated_kernels, parent=p.fine_processed_inputs_task, creates_dir=False)
p.lulc_clip_task = p.add_task(seals_generate_base_data.lulc_clip, parent=p.fine_processed_inputs_task, creates_dir=False)
p.lulc_simplifications_task = p.add_task(seals_generate_base_data.lulc_simplifications, parent=p.fine_processed_inputs_task, creates_dir=False)
p.lulc_binaries_task = p.add_task(seals_generate_base_data.lulc_binaries, parent=p.fine_processed_inputs_task, creates_dir=False)
p.lulc_convolutions_task = p.add_task(seals_generate_base_data.lulc_convolutions, parent=p.fine_processed_inputs_task, creates_dir=False)

##### COARSE CHANGE #####
p.coarse_change_task = p.add_task(seals_process_coarse_timeseries.coarse_change, skip_existing=0)
p.extraction_task = p.add_task(seals_process_coarse_timeseries.coarse_extraction, parent=p.coarse_change_task, run=1, skip_existing=0)
p.coarse_simplified_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_proportion, parent=p.coarse_change_task, skip_existing=0)
p.coarse_simplified_ha_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_ha, parent=p.coarse_change_task, skip_existing=0)
p.coarse_simplified_ha_difference_from_previous_year_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_ha_difference_from_previous_year, parent=p.coarse_change_task, skip_existing=0)

##### REGIONAL
p.regional_change_task = p.add_task(seals_process_coarse_timeseries.regional_change)

##### ALLOCATION #####
p.allocations_task = p.add_iterator(seals_main.allocations, skip_existing=0)
p.allocation_zones_task = p.add_iterator(seals_main.allocation_zones, run_in_parallel=p.run_in_parallel, parent=p.allocations_task, skip_existing=0)
p.allocation_task = p.add_task(seals_main.allocation, parent=p.allocation_zones_task, skip_existing=0)

##### STITCH ZONES #####
p.stitched_lulc_simplified_scenarios_task = p.add_task(seals_main.stitched_lulc_simplified_scenarios)

##### VIZUALIZE EXISTING DATA #####
p.visualization_task = p.add_task(seals_visualization_tasks.visualization)
p.lulc_pngs_task = p.add_task(seals_visualization_tasks.lulc_pngs, parent=p.visualization_task)




main = ''
if __name__ == '__main__':

### ------- ENVIRONMENT SETTINGS -------------------------------
# Users should only need to edit lines in this section

# Create a ProjectFlow Object to organize directories and enable parallel processing.
p = hb.ProjectFlow()

# Assign project-level attributes to the p object (such as in p.base_data_dir = ... below)
# including where the project_dir and base_data are located.
# The project_name is used to name the project directory below. If the directory exists, each task will not recreate
# files that already exist.
p.user_dir = os.path.expanduser('~')
p.extra_dirs = ['Files', 'seals', 'projects']
p.project_name = 'bonn_esm_cgebox_seals'
# p.project_name = p.project_name + '_' + hb.pretty_time() # If don't you want to recreate everything each time, comment out this line.

# Based on the paths above, set the project_dir. All files will be created in this directory.
p.project_dir = os.path.join(p.user_dir, os.sep.join(p.extra_dirs), p.project_name)
p.set_project_dir(p.project_dir)

p.run_in_parallel = 1 # Must be set before building the task tree if the task tree has parralel iterator tasks.

# Build the task tree via a building function and assign it to p. IF YOU WANT TO LOOK AT THE MODEL LOGIC, INSPECT THIS FUNCTION
build_bonn_task_tree(p)

# Set the base data dir. The model will check here to see if it has everything it needs to run.
# If anything is missing, it will download it. You can use the same base_data dir across multiple projects.
# Additionally, if you're clever, you can move files generated in your tasks to the right base_data_dir
# directory so that they are available for future projects and avoids redundant processing.
# The final directory has to be named base_data to match the naming convention on the google cloud bucket.
p.base_data_dir = os.path.join(p.user_dir, 'Files/base_data')

# ProjectFlow downloads all files automatically via the p.get_path() function. If you want it to download from a different
# bucket than default, provide the name and credentials here. Otherwise uses default public data 'gtap_invest_seals_2023_04_21'.
p.data_credentials_path = None
p.input_bucket_name = None

## Set defaults and generate the scenario_definitions.csv if it doesn't exist.
# SEALS will run based on the scenarios defined in a scenario_definitions.csv
# If you have not run SEALS before, SEALS will generate it in your project's input_dir.
# A useful way to get started is to to run SEALS on the test data without modification
# and then edit the scenario_definitions.csv to your project needs.
p.scenario_definitions_filename = 'standard_scenarios.csv'
p.scenario_definitions_path = os.path.join(p.input_dir, p.scenario_definitions_filename)
seals_initialize_project.initialize_scenario_definitions(p)

# Set processing resolution: determines how large of a chunk should be processed at a time. 4 deg is about max for 64gb memory systems
p.processing_resolution = 1.0 # In degrees. Must be in pyramid_compatible_resolutions

seals_initialize_project.set_advanced_options(p)

p.L = hb.get_logger('test_run_seals')
hb.log('Created ProjectFlow object at ' + p.project_dir + '\n from script ' + p.calling_script + '\n with base_data set at ' + p.base_data_dir)

p.execute()

result = 'Done!'

21 changes: 14 additions & 7 deletions seals/seals_process_coarse_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ def regional_change(p):
scenario_label = p.scenario_label
output_filename_end = '_' + str(year) + '_' + str(previous_year) + '_ha_diff_' + p.exogenous_label + '_' + p.climate_label + '_' + p.model_label + '_' + p.counterfactual_label + '_regional_coarsified.tif'
regions_column_label = p.regions_column_label

if getattr(p, 'regional_projections_input_override_paths', None):
regional_change_classes_path = p.regional_projections_input_override_paths[p.scenario_label]

# TODOO I should have iterated on class label here, then called the util file only on that one
seals_utils.convert_regional_change_to_coarse(regional_change_vector_path,
regional_change_classes_path,
Expand Down Expand Up @@ -98,7 +102,7 @@ def regional_change(p):
regional_coarsified_raster = hb.as_array(regional_coarsified_path)

# covariate_additive
covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster) * 1000.0
covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster)
hb.save_array_as_geotiff(covariate_additive, hb.suri(output_path_template, 'covariate_additive'), current_luc_coarse_projections_path)


Expand Down Expand Up @@ -148,8 +152,11 @@ def regional_change(p):

covariate_sum_shift_path = hb.suri(output_path_template, 'covariate_sum_shift')
input = ((regional_coarsified_path, 1), (current_luc_coarse_projections_path, 1), (target_raster_path, 1))

print('WARNING! sometimes need to use *1000 here to convert from ha to m2. Check if this is desired behavior. GTAP NEEDS THIS OTHERS DONT')
def op(a, b, c):
return (a - (c-b)) * 1000.0
return (a - (c-b))
# return (a - (c-b)) * 1000
hb.raster_calculator(input, op, covariate_sum_shift_path, 7, -9999.)

#### COVARIATE MULTIPLY SHIFT
Expand Down Expand Up @@ -226,15 +233,15 @@ def op(a, b, c):
covariate_multiply_shift_path = hb.suri(output_path_template, 'covariate_multiply_shift')
input = ((current_luc_coarse_projections_path, 1), (covariate_multiply_regional_change_sum_path, 1), (covariate_multiply_regional_change_sum_path, 1))
def op(a, b, c):
return (a * (b/c)) * 1000.0
return (a * (b/c))
hb.raster_calculator(input, op, covariate_multiply_shift_path, 7, -9999.)

# This is the one i want to use so also save it as the template. Can choose from different algorithms above.
alg_to_use_path = hb.suri(output_path_template, 'covariate_sum_shift')
hb.path_copy(alg_to_use_path, output_path_template)
5
# Given all of these, copy the one that we want to use to the name without a label

# 2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050


else:
Expand Down Expand Up @@ -338,7 +345,7 @@ def regional_change_new_test(p):
regional_coarsified_raster = hb.as_array(regional_coarsified_path)

# covariate_additive
covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster) * 1000.0
covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster)
hb.save_array_as_geotiff(covariate_additive, hb.suri(output_path_template, 'covariate_additive'), current_luc_coarse_projections_path)


Expand Down Expand Up @@ -389,7 +396,7 @@ def regional_change_new_test(p):
covariate_sum_shift_path = hb.suri(output_path_template, 'covariate_sum_shift')
input = ((regional_coarsified_path, 1), (current_luc_coarse_projections_path, 1), (target_raster_path, 1))
def op(a, b, c):
return (a - (c-b)) * 1000
return (a - (c-b))
hb.raster_calculator(input, op, covariate_sum_shift_path, 7, -9999.)

#### COVARIATE MULTIPLY SHIFT
Expand Down Expand Up @@ -465,7 +472,7 @@ def op(a, b, c):
covariate_multiply_shift_path = hb.suri(output_path_template, 'covariate_multiply_shift')
input = ((current_luc_coarse_projections_path, 1), (covariate_multiply_regional_change_sum_path, 1), (covariate_multiply_regional_change_sum_path, 1))
def op(a, b, c):
return (a * (b/c)) * 1000
return (a * (b/c))
hb.raster_calculator(input, op, covariate_multiply_shift_path, 7, -9999.)

if p.region_to_coarse_algorithm == 'covariate_additive':
Expand Down
21 changes: 16 additions & 5 deletions seals/seals_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,22 @@ def project_aoi(p):
if not p.regions_column_label in gdf.columns:
raise ValueError(f"The column '{p.regions_column_label}' is not found in the regions vector file: {p.regions_vector_path}. Please check the column name or the vector file.")

p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg')
p.aoi_label = p.aoi

filter_column = p.regions_column_label # if it's exactly 3 characters, assume it's an ISO3 code.
filter_value = p.aoi

if p.aoi == 'from_regional_projections_input_path':
# Read the csv to get the unique AOI values
df = hb.df_read(p.regional_projections_input_path)
unique_aois = list(df[p.regions_column_label].unique())
filter_column = p.regions_column_label
filter_value = unique_aois
p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg')
p.aoi_label = p.aoi
else:

p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg')
p.aoi_label = p.aoi

filter_column = p.regions_column_label # if it's exactly 3 characters, assume it's an ISO3 code.
filter_value = p.aoi


for current_aoi_path in hb.list_filtered_paths_nonrecursively(p.cur_dir, include_strings='aoi'):
Expand Down
6 changes: 5 additions & 1 deletion seals/seals_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1508,7 +1508,11 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan

if region_ids_raster_path is None:
region_ids_raster_path = os.path.join(output_dir, 'region_ids.tif')

merged_vector_output_path = os.path.join(os.path.split(region_ids_raster_path)[0], 'regional_change_with_ids_' + scenario_label + '.gpkg')


merged.to_file(merged_vector_output_path, driver='GPKG')

### Rasterize the regions vector to a raster.

Expand All @@ -1519,7 +1523,7 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan

# TODOO NOTE that here we are not using all_touched. This is a fundamental problem with coarse reclassification. Lots of the polygon will be missed. Ideally, you use all_touched=False for
# country-country borders but all_touched=True for country-coastline boarders. Or join with EEZs?
hb.rasterize_to_match(regional_change_vector_path, coarse_ha_per_cell_path, region_ids_raster_path, burn_column_name=regions_column_id, burn_values=None, datatype=13, ndv=0, all_touched=False)
hb.rasterize_to_match(merged_vector_output_path, coarse_ha_per_cell_path, region_ids_raster_path, burn_column_name=regions_column_id, burn_values=None, datatype=13, ndv=0, all_touched=False)

### Get the number of cells per zone.

Expand Down