diff --git a/seals/run_seals_bonn.py b/seals/run_seals_bonn.py new file mode 100644 index 0000000..7dbcf46 --- /dev/null +++ b/seals/run_seals_bonn.py @@ -0,0 +1,184 @@ +import os +import sys + +import hazelbean as hb +import pandas as pd + +from seals import seals_generate_base_data, seals_initialize_project, seals_main, seals_process_coarse_timeseries, seals_tasks, seals_utils, seals_visualization_tasks + +# TODO +# 1. make regional projections be the change not the total. +# 2. decide how to handle project_dir in the project repo so it uses the git-cloned inputs. +# 3. test the setup.bat and decide if it's dumb to put it in the src (tho with git ignore listed) + + +def convert_cgebox_output_to_seals_regional_projections_input(p): + + + + p.regional_projections_input_override_paths = {} + if p.run_this: + for index, row in p.scenarios_df.iterrows(): + seals_utils.assign_df_row_to_object_attributes(p, row) + + if p.scenario_type != 'baseline': + input_path = p.regional_projections_input_path + output_path = os.path.join(p.cur_dir, f'regional_projections_input_pivoted_{p.exogenous_label}.csv') + + # START HERE, the override doesn't work cause it doesn't iterate over years.... use a catear? + p.regional_projections_input_override_paths[p.scenario_label] = output_path + + if not hb.path_exists(output_path): + df = hb.df_read(input_path) + + # Step 1: Melt the DataFrame to convert year columns into rows. + + # Get the list of columns to unpivot (years) + years_to_unpivot = [col for col in df.columns if col.isdigit()] + # years_to_unpivot = [] + melted = df.melt( + id_vars=[p.regions_column_label, 'LandCover'], # Assumes the land cover column is named 'LandCover' + value_vars=years_to_unpivot, + var_name='year', + value_name='value' + ) + + # Step 2: Pivot the melted DataFrame. + # We set the region and year as the new index, and create new columns from 'LandCover' categories. + merged_pivoted = melted.pivot_table( + index=[p.regions_column_label, 'year'], + columns='LandCover', + values='value' + ).reset_index() + + + # Now add nuts_id + merged_pivoted['nuts_id'], unique_countries = pd.factorize(merged_pivoted[p.regions_column_label]) + merged_pivoted['nuts_id'] = merged_pivoted['nuts_id'] + 1 + + # Define the columns for which the year-over-year change should be calculated + land_use_columns = ['cropland', 'forest', 'grassland', 'other', 'othernat', 'urban', 'water'] + + # Sort the DataFrame by 'nuts_label' and 'year' to ensure correct chronological order + df_sorted = merged_pivoted.sort_values(by=['nuts_label', 'year']) + + # Group by 'nuts_label' and calculate the difference for the specified columns + # .diff() calculates the difference from the previous row within each group + # .fillna(0) replaces the initial NaN values with 0 + df_sorted[land_use_columns] = df_sorted.groupby('nuts_label')[land_use_columns].diff().fillna(0) + + # The 'df_sorted' DataFrame now contains the year-over-year change. + # You can display the first few rows of the result for a specific region to verify. + print("Year-over-year changes for CZ03:") + print(df_sorted[df_sorted['nuts_label'] == 'CZ03'].head()) + + # multiply by 1000 because i think cgebox outputs in thousands of ha + for col in land_use_columns: + df_sorted[col] = df_sorted[col] * 1000 + # 2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050 + # repeat these numbers + + # Write a new file in the task dir and reassign the project attribute to the new csv + hb.df_write(df_sorted, output_path) + + +def build_bonn_task_tree(p): + + # Define the project AOI + p.project_aoi_task = p.add_task(seals_tasks.project_aoi) + p.convert_cgebox_output_to_seals_regional_projections_input_task = p.add_task(convert_cgebox_output_to_seals_regional_projections_input) + + + ##### FINE PROCESSED INPUTS ##### + p.fine_processed_inputs_task = p.add_task(seals_generate_base_data.fine_processed_inputs) + p.generated_kernels_task = p.add_task(seals_generate_base_data.generated_kernels, parent=p.fine_processed_inputs_task, creates_dir=False) + p.lulc_clip_task = p.add_task(seals_generate_base_data.lulc_clip, parent=p.fine_processed_inputs_task, creates_dir=False) + p.lulc_simplifications_task = p.add_task(seals_generate_base_data.lulc_simplifications, parent=p.fine_processed_inputs_task, creates_dir=False) + p.lulc_binaries_task = p.add_task(seals_generate_base_data.lulc_binaries, parent=p.fine_processed_inputs_task, creates_dir=False) + p.lulc_convolutions_task = p.add_task(seals_generate_base_data.lulc_convolutions, parent=p.fine_processed_inputs_task, creates_dir=False) + + ##### COARSE CHANGE ##### + p.coarse_change_task = p.add_task(seals_process_coarse_timeseries.coarse_change, skip_existing=0) + p.extraction_task = p.add_task(seals_process_coarse_timeseries.coarse_extraction, parent=p.coarse_change_task, run=1, skip_existing=0) + p.coarse_simplified_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_proportion, parent=p.coarse_change_task, skip_existing=0) + p.coarse_simplified_ha_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_ha, parent=p.coarse_change_task, skip_existing=0) + p.coarse_simplified_ha_difference_from_previous_year_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_ha_difference_from_previous_year, parent=p.coarse_change_task, skip_existing=0) + + ##### REGIONAL + p.regional_change_task = p.add_task(seals_process_coarse_timeseries.regional_change) + + ##### ALLOCATION ##### + p.allocations_task = p.add_iterator(seals_main.allocations, skip_existing=0) + p.allocation_zones_task = p.add_iterator(seals_main.allocation_zones, run_in_parallel=p.run_in_parallel, parent=p.allocations_task, skip_existing=0) + p.allocation_task = p.add_task(seals_main.allocation, parent=p.allocation_zones_task, skip_existing=0) + + ##### STITCH ZONES ##### + p.stitched_lulc_simplified_scenarios_task = p.add_task(seals_main.stitched_lulc_simplified_scenarios) + + ##### VIZUALIZE EXISTING DATA ##### + p.visualization_task = p.add_task(seals_visualization_tasks.visualization) + p.lulc_pngs_task = p.add_task(seals_visualization_tasks.lulc_pngs, parent=p.visualization_task) + + + + +main = '' +if __name__ == '__main__': + + ### ------- ENVIRONMENT SETTINGS ------------------------------- + # Users should only need to edit lines in this section + + # Create a ProjectFlow Object to organize directories and enable parallel processing. + p = hb.ProjectFlow() + + # Assign project-level attributes to the p object (such as in p.base_data_dir = ... below) + # including where the project_dir and base_data are located. + # The project_name is used to name the project directory below. If the directory exists, each task will not recreate + # files that already exist. + p.user_dir = os.path.expanduser('~') + p.extra_dirs = ['Files', 'seals', 'projects'] + p.project_name = 'bonn_esm_cgebox_seals' + # p.project_name = p.project_name + '_' + hb.pretty_time() # If don't you want to recreate everything each time, comment out this line. + + # Based on the paths above, set the project_dir. All files will be created in this directory. + p.project_dir = os.path.join(p.user_dir, os.sep.join(p.extra_dirs), p.project_name) + p.set_project_dir(p.project_dir) + + p.run_in_parallel = 1 # Must be set before building the task tree if the task tree has parralel iterator tasks. + + # Build the task tree via a building function and assign it to p. IF YOU WANT TO LOOK AT THE MODEL LOGIC, INSPECT THIS FUNCTION + build_bonn_task_tree(p) + + # Set the base data dir. The model will check here to see if it has everything it needs to run. + # If anything is missing, it will download it. You can use the same base_data dir across multiple projects. + # Additionally, if you're clever, you can move files generated in your tasks to the right base_data_dir + # directory so that they are available for future projects and avoids redundant processing. + # The final directory has to be named base_data to match the naming convention on the google cloud bucket. + p.base_data_dir = os.path.join(p.user_dir, 'Files/base_data') + + # ProjectFlow downloads all files automatically via the p.get_path() function. If you want it to download from a different + # bucket than default, provide the name and credentials here. Otherwise uses default public data 'gtap_invest_seals_2023_04_21'. + p.data_credentials_path = None + p.input_bucket_name = None + + ## Set defaults and generate the scenario_definitions.csv if it doesn't exist. + # SEALS will run based on the scenarios defined in a scenario_definitions.csv + # If you have not run SEALS before, SEALS will generate it in your project's input_dir. + # A useful way to get started is to to run SEALS on the test data without modification + # and then edit the scenario_definitions.csv to your project needs. + p.scenario_definitions_filename = 'standard_scenarios.csv' + p.scenario_definitions_path = os.path.join(p.input_dir, p.scenario_definitions_filename) + seals_initialize_project.initialize_scenario_definitions(p) + + # Set processing resolution: determines how large of a chunk should be processed at a time. 4 deg is about max for 64gb memory systems + p.processing_resolution = 1.0 # In degrees. Must be in pyramid_compatible_resolutions + + seals_initialize_project.set_advanced_options(p) + + p.L = hb.get_logger('test_run_seals') + hb.log('Created ProjectFlow object at ' + p.project_dir + '\n from script ' + p.calling_script + '\n with base_data set at ' + p.base_data_dir) + + p.execute() + + result = 'Done!' + diff --git a/seals/seals_process_coarse_timeseries.py b/seals/seals_process_coarse_timeseries.py index 14a0d3d..03b0233 100644 --- a/seals/seals_process_coarse_timeseries.py +++ b/seals/seals_process_coarse_timeseries.py @@ -60,6 +60,10 @@ def regional_change(p): scenario_label = p.scenario_label output_filename_end = '_' + str(year) + '_' + str(previous_year) + '_ha_diff_' + p.exogenous_label + '_' + p.climate_label + '_' + p.model_label + '_' + p.counterfactual_label + '_regional_coarsified.tif' regions_column_label = p.regions_column_label + + if getattr(p, 'regional_projections_input_override_paths', None): + regional_change_classes_path = p.regional_projections_input_override_paths[p.scenario_label] + # TODOO I should have iterated on class label here, then called the util file only on that one seals_utils.convert_regional_change_to_coarse(regional_change_vector_path, regional_change_classes_path, @@ -98,7 +102,7 @@ def regional_change(p): regional_coarsified_raster = hb.as_array(regional_coarsified_path) # covariate_additive - covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster) * 1000.0 + covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster) hb.save_array_as_geotiff(covariate_additive, hb.suri(output_path_template, 'covariate_additive'), current_luc_coarse_projections_path) @@ -148,8 +152,11 @@ def regional_change(p): covariate_sum_shift_path = hb.suri(output_path_template, 'covariate_sum_shift') input = ((regional_coarsified_path, 1), (current_luc_coarse_projections_path, 1), (target_raster_path, 1)) + + print('WARNING! sometimes need to use *1000 here to convert from ha to m2. Check if this is desired behavior. GTAP NEEDS THIS OTHERS DONT') def op(a, b, c): - return (a - (c-b)) * 1000.0 + return (a - (c-b)) + # return (a - (c-b)) * 1000 hb.raster_calculator(input, op, covariate_sum_shift_path, 7, -9999.) #### COVARIATE MULTIPLY SHIFT @@ -226,7 +233,7 @@ def op(a, b, c): covariate_multiply_shift_path = hb.suri(output_path_template, 'covariate_multiply_shift') input = ((current_luc_coarse_projections_path, 1), (covariate_multiply_regional_change_sum_path, 1), (covariate_multiply_regional_change_sum_path, 1)) def op(a, b, c): - return (a * (b/c)) * 1000.0 + return (a * (b/c)) hb.raster_calculator(input, op, covariate_multiply_shift_path, 7, -9999.) # This is the one i want to use so also save it as the template. Can choose from different algorithms above. @@ -234,7 +241,7 @@ def op(a, b, c): hb.path_copy(alg_to_use_path, output_path_template) 5 # Given all of these, copy the one that we want to use to the name without a label - + # 2019 2020 2021 2023 2025 2027 2029 2030 2031 2033 2035 2037 2039 2040 2041 2043 2045 2047 2049 2050 else: @@ -338,7 +345,7 @@ def regional_change_new_test(p): regional_coarsified_raster = hb.as_array(regional_coarsified_path) # covariate_additive - covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster) * 1000.0 + covariate_additive = (current_luc_coarse_projections + regional_coarsified_raster) hb.save_array_as_geotiff(covariate_additive, hb.suri(output_path_template, 'covariate_additive'), current_luc_coarse_projections_path) @@ -389,7 +396,7 @@ def regional_change_new_test(p): covariate_sum_shift_path = hb.suri(output_path_template, 'covariate_sum_shift') input = ((regional_coarsified_path, 1), (current_luc_coarse_projections_path, 1), (target_raster_path, 1)) def op(a, b, c): - return (a - (c-b)) * 1000 + return (a - (c-b)) hb.raster_calculator(input, op, covariate_sum_shift_path, 7, -9999.) #### COVARIATE MULTIPLY SHIFT @@ -465,7 +472,7 @@ def op(a, b, c): covariate_multiply_shift_path = hb.suri(output_path_template, 'covariate_multiply_shift') input = ((current_luc_coarse_projections_path, 1), (covariate_multiply_regional_change_sum_path, 1), (covariate_multiply_regional_change_sum_path, 1)) def op(a, b, c): - return (a * (b/c)) * 1000 + return (a * (b/c)) hb.raster_calculator(input, op, covariate_multiply_shift_path, 7, -9999.) if p.region_to_coarse_algorithm == 'covariate_additive': diff --git a/seals/seals_tasks.py b/seals/seals_tasks.py index e1fa90e..a47f0b0 100644 --- a/seals/seals_tasks.py +++ b/seals/seals_tasks.py @@ -35,11 +35,22 @@ def project_aoi(p): if not p.regions_column_label in gdf.columns: raise ValueError(f"The column '{p.regions_column_label}' is not found in the regions vector file: {p.regions_vector_path}. Please check the column name or the vector file.") - p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg') - p.aoi_label = p.aoi - - filter_column = p.regions_column_label # if it's exactly 3 characters, assume it's an ISO3 code. - filter_value = p.aoi + + if p.aoi == 'from_regional_projections_input_path': + # Read the csv to get the unique AOI values + df = hb.df_read(p.regional_projections_input_path) + unique_aois = list(df[p.regions_column_label].unique()) + filter_column = p.regions_column_label + filter_value = unique_aois + p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg') + p.aoi_label = p.aoi + else: + + p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg') + p.aoi_label = p.aoi + + filter_column = p.regions_column_label # if it's exactly 3 characters, assume it's an ISO3 code. + filter_value = p.aoi for current_aoi_path in hb.list_filtered_paths_nonrecursively(p.cur_dir, include_strings='aoi'): diff --git a/seals/seals_utils.py b/seals/seals_utils.py index 44ca65b..1e8c1d4 100644 --- a/seals/seals_utils.py +++ b/seals/seals_utils.py @@ -1508,7 +1508,11 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan if region_ids_raster_path is None: region_ids_raster_path = os.path.join(output_dir, 'region_ids.tif') + + merged_vector_output_path = os.path.join(os.path.split(region_ids_raster_path)[0], 'regional_change_with_ids_' + scenario_label + '.gpkg') + + merged.to_file(merged_vector_output_path, driver='GPKG') ### Rasterize the regions vector to a raster. @@ -1519,7 +1523,7 @@ def convert_regional_change_to_coarse(regional_change_vector_path, regional_chan # TODOO NOTE that here we are not using all_touched. This is a fundamental problem with coarse reclassification. Lots of the polygon will be missed. Ideally, you use all_touched=False for # country-country borders but all_touched=True for country-coastline boarders. Or join with EEZs? - hb.rasterize_to_match(regional_change_vector_path, coarse_ha_per_cell_path, region_ids_raster_path, burn_column_name=regions_column_id, burn_values=None, datatype=13, ndv=0, all_touched=False) + hb.rasterize_to_match(merged_vector_output_path, coarse_ha_per_cell_path, region_ids_raster_path, burn_column_name=regions_column_id, burn_values=None, datatype=13, ndv=0, all_touched=False) ### Get the number of cells per zone.