From ca4b204885ee789680e431c56fdcdf0ecea2dda8 Mon Sep 17 00:00:00 2001 From: FarmVibes Release Pipeline Date: Thu, 4 Jul 2024 14:12:44 +0000 Subject: [PATCH 01/13] Add components needed to build docker images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Alex Crown Co-authored-by: Bruno Silva Co-authored-by: Eduardo Rodrigues Co-authored-by: Jessica Wolk Co-authored-by: Leonardo Nunes Co-authored-by: Naga Bilwanth Gangarapu Co-authored-by: Rafael Padilha Co-authored-by: Renato Luiz de Freitas Cunha Co-authored-by: Rick Gutierrez Co-authored-by: Roberto de Moura Estevão Filho Co-authored-by: Roberto Santos Co-authored-by: Sara Malvar --- .devcontainer/devcontainer.json | 62 + .devcontainer/post-create.sh | 68 ++ .gitattributes | 4 + .gitignore | 5 +- .ruff.toml | 17 + Makefile | 199 ++++ .../average_model/pixel_average_model.onnx | 3 + .../cdl_metadata/CDL_codes_names_colors.xls | 3 + op_resources/cdl_metadata/us_continental.wkt | 1 + op_resources/cloud_models/NOTICE.md | 8 + .../cloud_models/cloud_model1_cpu.onnx | 3 + .../cloud_models/cloud_model2_cpu.onnx | 3 + .../cloud_models/cloud_model3_cpu.onnx | 3 + .../cloud_models/cloud_model4_cpu.onnx | 3 + .../cloud_models/cloud_model5_cpu.onnx | 3 + .../terraces_grassed_waterways.onnx | 3 + op_resources/driveways_models/driveway.onnx | 3 + .../glad_tile_geometry/10d_tiles.geojson | 510 ++++++++ op_resources/glad_tile_geometry/NOTICE.md | 9 + op_resources/sentinel_tile_geometry/NOTICE.md | 7 + ...7_V20150622T000000_21000101T000000_B00.kml | 3 + op_resources/shadow_models/NOTICE.md | 12 + op_resources/shadow_models/shadow.onnx | 3 + op_resources/spaceeye_models/spaceeye.onnx | 3 + .../spectral_extension.onnx | 3 + ops/admag/admag_seasonal_field.yaml | 17 + ops/admag/admag_seasonal_field_op.py | 314 +++++ ops/admag/get_prescription.py | 43 + ops/admag/get_prescription.yaml | 17 + ops/admag/list_prescriptions.py | 94 ++ ops/admag/list_prescriptions.yaml | 16 + ops/admag/prescriptions.py | 107 ++ ops/admag/prescriptions.yaml | 18 + ops/admag/test_admag.py | 1010 ++++++++++++++++ .../aggregate_statistics_timeseries.yaml | 15 + .../aggregate_timeseries.py | 51 + ops/carbon_local/test_whatif.py | 298 +++++ ops/carbon_local/whatif_comet_local.py | 245 ++++ ops/carbon_local/whatif_comet_local_op.yaml | 18 + ops/chunk_raster/chunk_raster.py | 152 +++ ops/chunk_raster/chunk_raster.yaml | 17 + ops/chunk_raster/chunk_sequence_raster.yaml | 17 + ops/clip_raster/clip_raster.py | 66 ++ ops/clip_raster/clip_raster.yaml | 20 + ops/combine_chunks/combine_chunks.py | 112 ++ ops/combine_chunks/combine_chunks.yaml | 12 + ops/compute_cloud_prob/compute_cloud_prob.py | 117 ++ .../compute_cloud_prob.yaml | 25 + .../compute_cloud_water_mask.py | 63 + .../compute_cloud_water_mask.yaml | 23 + .../compute_conservation_practice.py | 109 ++ .../compute_conservation_practice.yaml | 30 + .../compute_evaporative_fraction.py | 158 +++ .../compute_evaporative_fraction.yaml | 25 + ops/compute_fcover/compute_fcover.yaml | 10 + ops/compute_fcover/fcover.py | 225 ++++ ops/compute_ghg_fluxes/compute_ghg_fluxes.py | 1043 +++++++++++++++++ .../compute_ghg_fluxes.yaml | 15 + ops/compute_ghg_fluxes/test_ghg_fluxes.py | 106 ++ .../compute_illuminance.py | 45 + .../compute_illuminance.yaml | 11 + ops/compute_index/compute_index.yaml | 15 + ops/compute_index/index.py | 213 ++++ ops/compute_index/test_index.py | 156 +++ .../compute_irrigation_probability.py | 113 ++ .../compute_irrigation_probability.yaml | 26 + .../compute_ngi_egi_layers.py | 102 ++ .../compute_ngi_egi_layers.yaml | 17 + ops/compute_onnx/compute_onnx.py | 105 ++ ops/compute_onnx/compute_onnx.yaml | 26 + .../compute_onnx_from_chunks.yaml | 25 + .../compute_onnx_from_sequence.yaml | 28 + ops/compute_onnx/test_compute_onnx.py | 154 +++ ops/compute_onnx/test_compute_onnx_chunk.py | 155 +++ .../compute_pixel_count.py | 69 ++ .../compute_pixel_count.yaml | 18 + .../test_compute_pixel_count.py | 73 ++ .../compute_raster_class_windowed_average.py | 149 +++ ...compute_raster_class_windowed_average.yaml | 17 + .../compute_raster_cluster.py | 80 ++ .../compute_raster_cluster.yaml | 29 + .../compute_raster_gradient.py | 82 ++ .../compute_raster_gradient.yaml | 11 + .../compute_shadow_prob.py | 119 ++ .../compute_shadow_prob.yaml | 25 + .../create_raster_sequence.py | 53 + .../create_raster_sequence.yaml | 18 + ...te_raster_sequence_from_sequence_list.yaml | 18 + ops/datavibe_filter/datavibe_filter.py | 38 + ops/datavibe_filter/datavibe_filter.yaml | 12 + ops/detect_driveway/detect_driveway.py | 199 ++++ ops/detect_driveway/detect_driveway.yaml | 29 + ops/detect_outliers/detect_outliers.py | 230 ++++ ops/detect_outliers/detect_outliers.yaml | 16 + ops/download_airbus/download_airbus.py | 92 ++ ops/download_airbus/download_airbus.yaml | 16 + ops/download_alos/download_alos.py | 40 + ops/download_alos/download_alos.yaml | 20 + ops/download_alos/test_download_alos.py | 81 ++ .../download_ambient_weather.py | 236 ++++ .../download_ambient_weather.yaml | 21 + .../download_bing_basemap.py | 81 ++ .../download_bing_basemap.yaml | 22 + .../test_download_bing_basemap.py | 66 ++ ops/download_cdl_data/download_cdl.py | 86 ++ ops/download_cdl_data/download_cdl.yaml | 12 + ops/download_cdl_data/download_cdl_data.py | 126 ++ ops/download_cdl_data/download_cdl_data.yaml | 10 + ops/download_chirps/download_chirps.py | 44 + ops/download_chirps/download_chirps.yaml | 11 + .../download_climatology_lab.py | 39 + .../download_climatology_lab.yaml | 16 + .../test_download_climatology_lab.py | 40 + ops/download_dem/download_dem.py | 68 ++ ops/download_dem/download_dem.yaml | 12 + ops/download_dem/test_download_dem.py | 51 + ops/download_era5/download_era5.py | 72 ++ ops/download_era5/download_era5.yaml | 12 + .../download_esri_landuse_landcover.py | 51 + .../download_esri_landuse_landcover.yaml | 12 + .../test_download_esri_landuse_landcover.py | 52 + ops/download_from_ref/download_from_ref.py | 86 ++ .../download_geometry_from_ref.yaml | 12 + .../download_raster_from_ref.yaml | 12 + .../download_rasters_from_smb.py | 144 +++ .../download_rasters_from_smb.yaml | 33 + .../download_gedi_product.py | 39 + .../download_gedi_product.yaml | 12 + .../test_download_gedi_product.py | 73 ++ ops/download_glad_data/download_glad.py | 37 + ops/download_glad_data/download_glad.yaml | 11 + .../test_download_glad_product.py | 38 + ops/download_gnatsgo/download_gnatsgo.yaml | 60 + .../download_gnatsgo_raster.py | 68 ++ ops/download_gnatsgo/test_download_gnatsgo.py | 71 ++ ops/download_hansen/download_hansen.py | 42 + ops/download_hansen/download_hansen.yaml | 21 + .../test_download_hansen_product.py | 48 + ops/download_herbie/download_herbie.py | 48 + ops/download_herbie/download_herbie.yaml | 12 + ops/download_herbie/forecast_range_split.py | 43 + ops/download_herbie/forecast_range_split.yaml | 18 + ops/download_herbie/forecast_weather.py | 120 ++ ops/download_herbie/forecast_weather.yaml | 23 + .../download_landsat_from_pc.yaml | 12 + .../download_landsat_pc.py | 48 + ops/download_modis_sr/download_modis_sr.py | 79 ++ ops/download_modis_sr/download_modis_sr.yaml | 14 + .../download_modis_vegetation.py | 49 + .../download_modis_vegetation.yaml | 16 + .../test_download_modis_vegetation.py | 76 ++ ops/download_naip/download_naip.py | 52 + ops/download_naip/download_naip.yaml | 12 + ops/download_naip/test_download_naip.py | 49 + .../download_road_geometries.py | 62 + .../download_road_geometries.yaml | 17 + .../download_sentinel1.yaml | 27 + .../download_sentinel1_rtc.py | 76 ++ .../test_download_sentinel1_rtc.py | 91 ++ .../download_sentinel1_grd.py | 88 ++ .../download_sentinel1_grd.yaml | 12 + .../test_download_sentinel1.py | 87 ++ .../download_s2_pc.py | 76 ++ .../download_sentinel2_from_pc.yaml | 12 + ops/download_soilgrids/download_soilgrids.py | 127 ++ .../download_soilgrids.yaml | 17 + .../download_stack_s2.py | 190 +++ .../download_stack_sentinel2.yaml | 32 + .../download_usda_soils.py | 68 ++ .../download_usda_soils.yaml | 18 + .../ensemble_cloud_prob.py | 31 + .../ensemble_cloud_prob.yaml | 16 + ops/estimate_canopy_cover/estimate_canopy.py | 70 ++ .../estimate_canopy_cover.yaml | 13 + ops/extract_gedi_rh100/extract_gedi_rh100.py | 104 ++ .../extract_gedi_rh100.yaml | 14 + .../test_extract_gedi_rh100.py | 91 ++ .../extract_protein_sequence.py | 48 + .../extract_protein_sequence.yaml | 9 + ops/get_angles/get_angles.py | 275 +++++ ops/get_angles/get_angles.yaml | 10 + ops/gfs_download/gfs_download.py | 81 ++ ops/gfs_download/gfs_download.yaml | 12 + ops/gfs_preprocess/gfs_preprocess.py | 110 ++ ops/gfs_preprocess/gfs_preprocess.yaml | 14 + .../group_rasters_by_geometries.py | 41 + .../group_rasters_by_geometries.yaml | 16 + .../group_rasters_by_time.py | 36 + .../group_rasters_by_time.yaml | 17 + .../test_group_rasters_by_time.py | 47 + .../group_sentinel1_orbits.py | 43 + .../group_sentinel1_orbits.yaml | 13 + .../group_sentinel2_orbits.py | 66 ++ .../group_sentinel2_orbits.yaml | 15 + .../group_s1_tile_sequence.yaml | 19 + .../group_s2_tile_sequence.yaml | 19 + .../group_s2cloudmask_tile_sequence.yaml | 19 + .../group_tile_sequence.py | 205 ++++ ...oil_sample_heatmap_using_classification.py | 192 +++ ...l_sample_heatmap_using_classification.yaml | 32 + .../soil_sample_heatmap_using_neighbors.py | 216 ++++ .../soil_sample_heatmap_using_neighbors.yaml | 27 + ..._cluster_sample_heatmap_using_neighbors.py | 206 ++++ ...oil_sample_heatmap_using_classification.py | 262 +++++ ops/helloworld/helloworld.py | 118 ++ ops/helloworld/helloworld.yaml | 14 + ops/linear_trend/linear_trend.py | 117 ++ ops/linear_trend/linear_trend.yaml | 12 + ops/linear_trend/test_linear_trend.py | 32 + ops/list_airbus_products/list_airbus.py | 57 + .../list_airbus_products.yaml | 14 + ops/list_alos_products/list_alos_products.py | 42 + .../list_alos_products.yaml | 11 + ops/list_alos_products/test_alos_list.py | 92 ++ ops/list_bing_maps/list_bing_maps.py | 71 ++ ops/list_bing_maps/list_bing_maps.yaml | 44 + ops/list_bing_maps/test_list_bing_maps.py | 145 +++ ops/list_cdl_products/list_cdl_products.py | 65 + ops/list_cdl_products/list_cdl_products.yaml | 13 + ops/list_chirps/list_chirps.py | 214 ++++ ops/list_chirps/list_chirps.yaml | 23 + .../list_climatology_lab.py | 61 + ops/list_climatology_lab/list_gridmet.yaml | 39 + .../list_terraclimate.yaml | 37 + .../test_list_climatology_lab.py | 64 + ops/list_dem_products/list_dem_products.py | 53 + ops/list_dem_products/list_dem_products.yaml | 17 + .../test_list_dem_products.py | 36 + ops/list_era5/list_era5.py | 67 ++ ops/list_era5/list_era5.yaml | 32 + ops/list_era5/list_era5_cds.py | 63 + ops/list_era5/list_era5_cds.yaml | 29 + .../list_esri_landuse_landcover.py | 41 + .../list_esri_landuse_landcover.yaml | 11 + .../test_list_esri_landuse_landcover.py | 33 + ops/list_gedi_products/list_gedi_products.py | 69 ++ .../list_gedi_products.yaml | 15 + ops/list_gedi_products/mock_items.json | 1 + .../test_list_gedi_products.py | 45 + ops/list_glad_products/list_glad_products.py | 42 + .../list_glad_products.yaml | 18 + ops/list_glad_products/test_glad_list.py | 169 +++ .../list_gnatsgo_products.py | 34 + .../list_gnatsgo_products.yaml | 12 + .../test_list_gnatsgo_products.py | 61 + .../list_hansen_products.py | 97 ++ .../list_hansen_products.yaml | 40 + ops/list_hansen_products/test_hansen_list.py | 143 +++ ops/list_herbie/list_herbie.py | 98 ++ ops/list_herbie/list_herbie.yaml | 52 + .../list_landsat_pc.py | 38 + .../list_landsat_products_pc.yaml | 11 + ops/list_modis_sr/list_modis_sr.py | 34 + ops/list_modis_sr/list_modis_sr.yaml | 17 + .../list_modis_vegetation.py | 34 + .../list_modis_vegetation.yaml | 15 + .../test_list_modis_vegetation.py | 73 ++ ops/list_naip_products/list_naip_products.py | 45 + .../list_naip_products.yaml | 11 + .../test_list_naip_products.py | 30 + .../list_sentinel1_products_pc.py | 43 + .../list_sentinel1_products_pc.yaml | 26 + .../sample_pc_output.json | 1 + .../test_list_sentinel1.py | 99 ++ ops/list_sentinel2_products/list_s2_pc.py | 27 + .../list_sentinel2_products_pc.yaml | 12 + ops/list_to_sequence/list_to_sequence.py | 39 + ops/list_to_sequence/list_to_sequence.yaml | 15 + ops/list_to_sequence/test_list_to_sequence.py | 107 ++ .../match_raster_to_ref.py | 42 + .../match_raster_to_ref.yaml | 21 + ops/merge_cloud_masks/merge_cloud_masks.py | 402 +++++++ ops/merge_cloud_masks/merge_cloud_masks.yaml | 23 + .../merge_cloud_masks_simple.py | 125 ++ .../merge_cloud_masks_simple.yaml | 23 + ops/merge_geometries/merge_geometries.py | 45 + ops/merge_geometries/merge_geometries.yaml | 25 + ops/merge_geometries/test_merge_geometries.py | 38 + .../merge_geometry_and_time_range.py | 18 + .../merge_geometry_and_time_range.yaml | 19 + .../test_merge_geometry_and_time_range.py | 40 + ops/merge_rasters/merge_rasters.py | 124 ++ ops/merge_rasters/merge_rasters.yaml | 27 + ops/merge_sentinel1_orbits/merge_sentinel1.py | 79 ++ .../merge_sentinel1_orbits.yaml | 26 + .../merge_sentinel2_orbits.py | 72 ++ .../merge_sentinel2_orbits.yaml | 13 + .../find_soil_sample_locations.py | 124 ++ .../find_soil_sample_locations.yaml | 20 + .../test_soil_sample_heatmap.py | 108 ++ ops/ordinal_trend_test/ordinal_trend_test.py | 109 ++ .../ordinal_trend_test.yaml | 24 + ops/ordinal_trend_test/test_ordinal_trend.py | 98 ++ .../pair_intersecting_rasters.py | 27 + .../pair_intersecting_rasters.yaml | 13 + ops/price_airbus_products/price_airbus.py | 60 + .../price_airbus_products.yaml | 16 + ops/protlearn/protlearn.py | 225 ++++ ops/protlearn/protlearn.yaml | 10 + ops/read_grib_forecast/read_grib_forecast.py | 78 ++ .../read_grib_forecast.yaml | 12 + ops/recode_raster/recode_raster.py | 37 + ops/recode_raster/recode_raster.yaml | 30 + ops/recode_raster/test_recode_raster.py | 105 ++ ops/remove_clouds/remove_clouds.py | 279 +++++ ops/remove_clouds/remove_clouds.yaml | 28 + .../remove_clouds_interpolation.yaml | 34 + ops/remove_clouds/test_remove_clouds.py | 48 + .../automatic_segmentation.yaml | 56 + ops/segment_anything/prompt_segmentation.yaml | 36 + ops/segment_anything/sam_inference.py | 561 +++++++++ ops/segment_anything/test_sam_inference.py | 471 ++++++++ .../combine_sam_masks.py | 161 +++ .../combine_sam_masks.yaml | 23 + .../test_combine_sam_masks.py | 135 +++ ops/segment_driveway/segment_driveway.py | 142 +++ ops/segment_driveway/segment_driveway.yaml | 25 + .../filter_items.py | 100 ++ .../select_necessary_coverage_items.yaml | 45 + .../test_filter.py | 49 + ops/select_sequence/select_sequence.py | 54 + ops/select_sequence/select_sequence.yaml | 21 + .../select_sequence_from_list.yaml | 26 + ops/split_sequence/split_sequence.py | 25 + .../split_spaceeye_sequence.yaml | 11 + ops/split_sequence/test_split_sequence.py | 36 + ops/stack_landsat/stack_landsat.py | 86 ++ ops/stack_landsat/stack_landsat.yaml | 15 + .../stack_sentinel2_bands.py | 169 +++ .../stack_sentinel2_bands.yaml | 14 + ops/summarize_raster/raster_summary.py | 66 ++ .../summarize_masked_raster.yaml | 15 + ops/summarize_raster/summarize_raster.yaml | 13 + ops/threshold_raster/threshold_raster.py | 35 + ops/threshold_raster/threshold_raster.yaml | 15 + ops/tile_sentinel1/tile_sentinel1.py | 91 ++ ops/tile_sentinel1/tile_sentinel1.yaml | 23 + ops/tile_sentinel1/tile_sentinel1_rtc.yaml | 23 + ops/unpack_refs/unpack_refs.py | 18 + ops/unpack_refs/unpack_refs.yaml | 11 + ops/weed_detection/weed_detection.py | 220 ++++ ops/weed_detection/weed_detection.yaml | 34 + pyrightconfig.json | 11 + pytest.ini | 4 + resources/docker/Dockerfile-api_orchestrator | 18 + resources/docker/Dockerfile-cache | 18 + resources/docker/Dockerfile-dev | 17 + resources/docker/Dockerfile-devcontainer | 40 + resources/docker/Dockerfile-services-base | 34 + resources/docker/Dockerfile-worker | 23 + resources/docker/Dockerfile-worker-base | 59 + resources/docker/docker-in-docker-install.sh | 93 ++ .../generate_datatype_hierarchy_diagram.py | 81 ++ .../generate_notebook_list.py | 160 +++ .../generate_workflow_list.py | 158 +++ .../templates/datatype_hierarchy_template.md | 6 + .../templates/list_notebook_template.md | 37 + .../templates/list_workflow_template.md | 23 + .../templates/workflow_yaml_template.md | 51 + resources/envs/dev.yaml | 10 + resources/envs/rest-api_orchestrator.yml | 8 + resources/envs/services-requirements.txt | 15 + resources/envs/worker-requirements.txt | 68 ++ resources/envs/worker.yml | 11 + scripts/setup_python_develop_env.sh | 15 + src/tests/__init__.py | 0 src/tests/benchmark/test_spaceeye_ops.py | 71 ++ src/tests/conftest.py | 13 + src/tests/test_notebooks.py | 39 + src/tests/test_op_workflows_integration.py | 312 +++++ src/tests/test_ops_building.py | 36 + src/tests/test_rest_api.py | 329 ++++++ src/tests/test_rest_api_client_integration.py | 241 ++++ src/tests/test_subprocess_client.py | 77 ++ src/tests/workflows_integration/__init__.py | 0 .../test_helloworld_integration.py | 113 ++ src/tests_local_cluster/expected.tif | 3 + .../test_cluster_integration.py | 190 +++ src/vibe_agent/setup.py | 37 + src/vibe_agent/tests/conftest.py | 83 ++ .../ops/test_dependencies_integration.py | 56 + .../tests/ops/test_op_cache_builder.py | 182 +++ src/vibe_agent/tests/ops/test_op_parser.py | 51 + src/vibe_agent/tests/ops/test_operation.py | 110 ++ .../tests/test_cache_metadata_store.py | 306 +++++ src/vibe_agent/tests/test_eywa_asset.py | 38 + .../tests/test_local_asset_manager.py | 110 ++ src/vibe_agent/tests/test_storage.py | 97 ++ src/vibe_agent/tests/test_uri_handling.py | 40 + src/vibe_agent/vibe_agent/__init__.py | 0 src/vibe_agent/vibe_agent/agent_config.py | 93 ++ src/vibe_agent/vibe_agent/cache.py | 240 ++++ .../vibe_agent/cache_metadata_store.py | 255 ++++ .../vibe_agent/cache_metadata_store_client.py | 38 + src/vibe_agent/vibe_agent/data_ops.py | 359 ++++++ src/vibe_agent/vibe_agent/launch_cache.py | 34 + src/vibe_agent/vibe_agent/launch_data_ops.py | 54 + src/vibe_agent/vibe_agent/launch_worker.py | 43 + src/vibe_agent/vibe_agent/ops.py | 237 ++++ src/vibe_agent/vibe_agent/ops_helper.py | 14 + src/vibe_agent/vibe_agent/storage/__init__.py | 4 + .../vibe_agent/storage/asset_management.py | 294 +++++ .../vibe_agent/storage/file_upload.py | 24 + .../vibe_agent/storage/local_storage.py | 193 +++ .../vibe_agent/storage/remote_storage.py | 298 +++++ src/vibe_agent/vibe_agent/storage/storage.py | 112 ++ src/vibe_agent/vibe_agent/worker.py | 524 +++++++++ src/vibe_common/setup.py | 38 + src/vibe_common/tests/conftest.py | 18 + src/vibe_common/tests/test_input_handlers.py | 97 ++ src/vibe_common/tests/test_messaging.py | 153 +++ src/vibe_common/tests/test_statestore.py | 21 + .../tests/test_vibe_dapr_client.py | 63 + src/vibe_common/vibe_common/__init__.py | 0 src/vibe_common/vibe_common/constants.py | 83 ++ src/vibe_common/vibe_common/dapr.py | 123 ++ src/vibe_common/vibe_common/dropdapr.py | 202 ++++ src/vibe_common/vibe_common/input_handlers.py | 61 + src/vibe_common/vibe_common/messaging.py | 648 ++++++++++ src/vibe_common/vibe_common/schemas.py | 224 ++++ .../vibe_common/secret_provider.py | 186 +++ src/vibe_common/vibe_common/statestore.py | 121 ++ src/vibe_common/vibe_common/telemetry.py | 83 ++ src/vibe_common/vibe_common/tokens.py | 234 ++++ .../vibe_common/vibe_dapr_client.py | 157 +++ .../vibe_common/workflow/__init__.py | 0 src/vibe_core/pyproject.toml | 11 +- src/vibe_core/tests/test_stac_converter.py | 127 ++ .../tests/test_type_serialization.py | 98 ++ src/vibe_core/vibe_core/cli/constants.py | 2 +- src/vibe_core/vibe_core/cli/local.py | 25 +- src/vibe_core/vibe_core/cli/remote.py | 7 +- src/vibe_core/vibe_core/cli/wrappers.py | 48 +- src/vibe_core/vibe_core/client.py | 2 +- src/vibe_core/vibe_core/data/utils.py | 41 +- .../terraform/aks/modules/infra/storage.tf | 2 +- .../terraform/aks/modules/kubernetes/otel.tf | 194 +++ .../local/modules/kubernetes/jaeger.tf | 138 +++ .../local/modules/kubernetes/otel.tf | 128 ++ .../vibe_core/terraform/services/cache.tf | 6 +- .../vibe_core/terraform/services/dataops.tf | 4 +- .../terraform/services/orchestrator.tf | 6 +- .../vibe_core/terraform/services/restapi.tf | 4 +- .../vibe_core/terraform/services/worker.tf | 6 +- src/vibe_dev/setup.py | 31 + src/vibe_dev/vibe_dev/__init__.py | 0 src/vibe_dev/vibe_dev/client/__init__.py | 5 + src/vibe_dev/vibe_dev/client/remote_client.py | 7 + .../vibe_dev/client/subprocess_client.py | 119 ++ src/vibe_dev/vibe_dev/local_runner.py | 75 ++ src/vibe_dev/vibe_dev/mock_utils.py | 18 + src/vibe_dev/vibe_dev/testing/__init__.py | 6 + .../testing/fake_ops/fake/base_base.yaml | 9 + .../vibe_dev/testing/fake_ops/fake/base_op.py | 14 + .../fake_ops/fake/item_inheritance.yaml | 9 + .../testing/fake_ops/fake/item_item.yaml | 9 + .../testing/fake_ops/fake/item_list.yaml | 10 + .../fake_ops/fake/list_and_item_inputs.yaml | 10 + .../fake_ops/fake/list_inheritance.yaml | 9 + .../testing/fake_ops/fake/list_item.yaml | 9 + .../testing/fake_ops/fake/list_list.yaml | 9 + .../fake_ops/fake/missing_inheritance.yaml | 9 + .../fake_ops/fake/nested_parameters.yaml | 19 + .../vibe_dev/testing/fake_ops/fake/op.py | 31 + .../testing/fake_ops/fake/raster.yaml | 9 + .../testing/fake_ops/fake/raster_list.yaml | 9 + .../fake_ops/fake/simple_parameter.yaml | 11 + .../testing/fake_ops/fake/str_list.yaml | 13 + .../testing/fake_ops/fake/timeseries.yaml | 9 + .../testing/fake_ops/fake/to_item_op.py | 11 + .../testing/fake_ops/fake/to_list_op.py | 17 + .../vibe_dev/testing/fake_ops/fake/vibe_op.py | 13 + .../testing/fake_workflows/bad_sink.yaml | 11 + .../testing/fake_workflows/bad_source.yaml | 11 + .../testing/fake_workflows/base_base.yaml | 16 + .../custom_indices_structure.yaml | 32 + .../fake_workflows/fan_out_and_in.yaml | 31 + .../fake_workflows/gather_and_parallel.yaml | 36 + ...ther_and_parallel_input_gather_output.yaml | 36 + .../fake_workflows/incompatible_source.yaml | 15 + .../testing/fake_workflows/inheritance.yaml | 28 + .../inheritance_after_fan_out.yaml | 23 + .../inheritance_before_fan_out.yaml | 23 + .../inheritance_from_source.yaml | 22 + .../testing/fake_workflows/item_gather.yaml | 17 + .../testing/fake_workflows/item_item.yaml | 10 + .../testing/fake_workflows/list_list.yaml | 10 + .../testing/fake_workflows/missing_edge.yaml | 21 + .../fake_workflows/missing_inheritance.yaml | 17 + .../fake_workflows/nested_fan_out.yaml | 37 + .../fake_workflows/nested_task_params.yaml | 14 + .../fake_workflows/nested_workflow.yaml | 22 + .../fake_workflows/resolve_nested_params.yaml | 23 + .../resolve_nested_params_default.yaml | 24 + ...esolve_nested_params_multiple_default.yaml | 23 + .../fake_workflows/resolve_params.yaml | 25 + .../fake_workflows/single_and_parallel.yaml | 36 + .../source_and_destination.yaml | 19 + .../fake_workflows/specific_source.yaml | 15 + .../specific_source_item_list.yaml | 15 + .../specific_source_list_list.yaml | 15 + .../testing/fake_workflows/str_input.yaml | 11 + .../testing/fake_workflows/task_params.yaml | 13 + .../testing/fake_workflows/three_ops.yaml | 21 + .../fake_workflows/two_level_inheritance.yaml | 24 + .../fake_workflows/unknown_task_params.yaml | 13 + .../fake_workflows/workflow_inception.yaml | 15 + .../testing/fake_workflows_fixtures.py | 35 + src/vibe_dev/vibe_dev/testing/op_tester.py | 213 ++++ .../vibe_dev/testing/storage_fixtures.py | 81 ++ src/vibe_dev/vibe_dev/testing/utils.py | 53 + .../vibe_dev/testing/workflow_fixtures.py | 149 +++ src/vibe_lib/setup.py | 14 + src/vibe_lib/tests/test_airbus_api.py | 52 + src/vibe_lib/tests/test_earthdata.py | 113 ++ src/vibe_lib/tests/test_predict_chips.py | 91 ++ src/vibe_lib/tests/test_raster_chipping.py | 117 ++ src/vibe_lib/vibe_lib/__init__.py | 0 src/vibe_lib/vibe_lib/airbus.py | 234 ++++ src/vibe_lib/vibe_lib/archive.py | 28 + src/vibe_lib/vibe_lib/bing_maps.py | 229 ++++ src/vibe_lib/vibe_lib/climatology_lab.py | 93 ++ src/vibe_lib/vibe_lib/comet_farm/__init__.py | 0 .../vibe_lib/comet_farm/comet_model.py | 157 +++ .../vibe_lib/comet_farm/comet_requester.py | 58 + .../vibe_lib/comet_farm/comet_server.py | 119 ++ src/vibe_lib/vibe_lib/deepmc/encoder.py | 71 ++ src/vibe_lib/vibe_lib/deepmc/helpers.py | 51 + .../vibe_lib/deepmc/locally_connected.py | 58 + src/vibe_lib/vibe_lib/deepmc/models.py | 126 ++ src/vibe_lib/vibe_lib/deepmc/time.py | 25 + src/vibe_lib/vibe_lib/deepmc/transform.py | 43 + src/vibe_lib/vibe_lib/earthdata.py | 110 ++ src/vibe_lib/vibe_lib/gaussian_mixture.py | 85 ++ src/vibe_lib/vibe_lib/geometry.py | 98 ++ src/vibe_lib/vibe_lib/gfs_blob_utils.py | 21 + src/vibe_lib/vibe_lib/glad.py | 51 + src/vibe_lib/vibe_lib/heatmap_neighbor.py | 89 ++ src/vibe_lib/vibe_lib/overlap_clustering.py | 158 +++ src/vibe_lib/vibe_lib/planetary_computer.py | 506 ++++++++ src/vibe_lib/vibe_lib/raster.py | 697 +++++++++++ src/vibe_lib/vibe_lib/segment_anything.py | 646 ++++++++++ src/vibe_lib/vibe_lib/shapefile.py | 48 + src/vibe_lib/vibe_lib/spaceeye/__init__.py | 0 src/vibe_lib/vibe_lib/spaceeye/chip.py | 428 +++++++ src/vibe_lib/vibe_lib/spaceeye/dataset.py | 501 ++++++++ .../vibe_lib/spaceeye/illumination.py | 111 ++ .../vibe_lib/spaceeye/interpolation.py | 100 ++ src/vibe_lib/vibe_lib/spaceeye/utils.py | 39 + src/vibe_lib/vibe_lib/stats.py | 56 + src/vibe_lib/vibe_lib/timeseries.py | 17 + src/vibe_notebook/setup.py | 2 +- src/vibe_server/setup.py | 37 + src/vibe_server/tests/conftest.py | 74 ++ src/vibe_server/tests/test_graph.py | 152 +++ src/vibe_server/tests/test_href_handler.py | 99 ++ src/vibe_server/tests/test_op_parallelism.py | 94 ++ src/vibe_server/tests/test_orchestrator.py | 430 +++++++ .../tests/test_parameter_resolver.py | 125 ++ .../tests/test_remote_workflow_runner.py | 239 ++++ src/vibe_server/tests/test_workflow.py | 304 +++++ .../tests/test_workflow_input_handler.py | 251 ++++ src/vibe_server/tests/test_workflow_parser.py | 89 ++ src/vibe_server/tests/test_workflow_runner.py | 62 + .../tests/test_workflow_spec_validator.py | 45 + src/vibe_server/tests/test_workflow_state.py | 417 +++++++ src/vibe_server/vibe_server/__init__.py | 0 src/vibe_server/vibe_server/href_handler.py | 71 ++ src/vibe_server/vibe_server/orchestrator.py | 843 +++++++++++++ src/vibe_server/vibe_server/server.py | 900 ++++++++++++++ src/vibe_server/vibe_server/sniffer.py | 113 ++ .../vibe_server/workflow/__init__.py | 42 + .../workflow/description_validator.py | 91 ++ src/vibe_server/vibe_server/workflow/graph.py | 141 +++ .../vibe_server/workflow/input_handler.py | 177 +++ .../vibe_server/workflow/parameter.py | 132 +++ .../vibe_server/workflow/runner/__init__.py | 10 + .../workflow/runner/remote_runner.py | 242 ++++ .../vibe_server/workflow/runner/runner.py | 325 +++++ .../workflow/runner/task_io_handler.py | 132 +++ .../vibe_server/workflow/spec_parser.py | 362 ++++++ .../vibe_server/workflow/spec_validator.py | 182 +++ .../vibe_server/workflow/workflow.py | 634 ++++++++++ .../admag/admag_seasonal_field.yaml | 53 + .../data_ingestion/admag/prescriptions.yaml | 75 ++ .../airbus/airbus_download.yaml | 33 + .../data_ingestion/airbus/airbus_price.yaml | 34 + .../alos/alos_forest_extent_download.yaml | 29 + .../alos_forest_extent_download_merge.yaml | 41 + .../data_ingestion/bing/basemap_download.yaml | 35 + .../bing/basemap_download_merge.yaml | 41 + .../data_ingestion/cdl/download_cdl.yaml | 26 + .../data_ingestion/dem/download_dem.yaml | 39 + .../data_ingestion/gedi/download_gedi.yaml | 35 + .../gedi/download_gedi_rh100.yaml | 35 + .../glad/glad_forest_extent_download.yaml | 27 + .../glad_forest_extent_download_merge.yaml | 41 + .../gnatsgo/download_gnatsgo.yaml | 79 ++ .../hansen/hansen_forest_change_download.yaml | 57 + .../landsat/preprocess_landsat.yaml | 43 + .../download_modis_surface_reflectance.yaml | 41 + .../download_modis_vegetation_index.yaml | 42 + .../data_ingestion/naip/download_naip.yaml | 28 + .../data_ingestion/osm_road_geometries.yaml | 37 + .../sentinel1/preprocess_s1.yaml | 75 ++ .../sentinel2/cloud_ensemble.yaml | 60 + .../sentinel2/improve_cloud_mask.yaml | 63 + .../improve_cloud_mask_ensemble.yaml | 45 + .../sentinel2/preprocess_s2.yaml | 66 ++ .../preprocess_s2_ensemble_masks.yaml | 46 + .../preprocess_s2_improved_masks.yaml | 54 + workflows/data_ingestion/soil/soilgrids.yaml | 53 + workflows/data_ingestion/soil/usda.yaml | 29 + .../data_ingestion/spaceeye/spaceeye.yaml | 56 + .../spaceeye/spaceeye_inference.yaml | 81 ++ .../spaceeye/spaceeye_interpolation.yaml | 62 + .../spaceeye_interpolation_inference.yaml | 68 ++ .../spaceeye/spaceeye_preprocess.yaml | 49 + .../spaceeye_preprocess_ensemble.yaml | 39 + .../user_data/ingest_geometry.yaml | 26 + .../user_data/ingest_raster.yaml | 26 + .../data_ingestion/user_data/ingest_smb.yaml | 42 + .../weather/download_chirps.yaml | 32 + .../data_ingestion/weather/download_era5.yaml | 48 + .../weather/download_era5_monthly.yaml | 47 + .../weather/download_gridmet.yaml | 49 + .../weather/download_herbie.yaml | 70 ++ .../weather/download_terraclimate.yaml | 47 + .../weather/get_ambient_weather.yaml | 37 + .../data_ingestion/weather/get_forecast.yaml | 45 + .../weather/herbie_forecast.yaml | 74 ++ .../chunk_onnx/chunk_onnx.yaml | 55 + .../chunk_onnx/chunk_onnx_sequence.yaml | 51 + workflows/data_processing/clip/clip.yaml | 32 + .../gradient/raster_gradient.yaml | 18 + .../heatmap/classification.yaml | 108 ++ workflows/data_processing/index/index.yaml | 28 + .../linear_trend/chunked_linear_trend.yaml | 43 + .../merge/match_merge_to_ref.yaml | 57 + .../outlier/detect_outlier.yaml | 36 + .../threshold/threshold_raster.yaml | 23 + .../timeseries/timeseries_aggregation.yaml | 27 + .../timeseries_masked_aggregation.yaml | 40 + .../farm_ai/agriculture/canopy_cover.yaml | 50 + .../farm_ai/agriculture/change_detection.yaml | 57 + .../agriculture/emergence_summary.yaml | 57 + .../agriculture/green_house_gas_fluxes.yaml | 28 + .../heatmap_using_classification.yaml | 49 + .../heatmap_using_classification_admag.yaml | 71 ++ ...heatmap_using_neighboring_data_points.yaml | 68 ++ .../farm_ai/agriculture/methane_index.yaml | 45 + .../farm_ai/agriculture/ndvi_summary.yaml | 47 + .../farm_ai/agriculture/weed_detection.yaml | 67 ++ .../admag_carbon_integration.yaml | 94 ++ .../farm_ai/carbon_local/carbon_whatif.yaml | 54 + .../conservation_practices.yaml | 92 ++ .../land_degradation/landsat_ndvi_trend.yaml | 34 + .../land_degradation/ndvi_linear_trend.yaml | 31 + .../segmentation/auto_segment_basemap.yaml | 77 ++ .../farm_ai/segmentation/auto_segment_s2.yaml | 75 ++ .../farm_ai/segmentation/segment_basemap.yaml | 57 + .../farm_ai/segmentation/segment_s2.yaml | 55 + .../farm_ai/sensor/optimal_locations.yaml | 58 + .../water/irrigation_classification.yaml | 140 +++ .../deforestation/alos_trend_detection.yaml | 61 + .../ordinal_trend_detection.yaml | 62 + workflows/helloworld.yaml | 18 + workflows/ml/crop_segmentation.yaml | 58 + .../datagen_crop_segmentation.yaml | 38 + workflows/ml/driveway_detection.yaml | 62 + .../automatic_segmentation.yaml | 82 ++ .../segment_anything/prompt_segmentation.yaml | 61 + workflows/ml/spectral_extension.yaml | 68 ++ 673 files changed, 51199 insertions(+), 71 deletions(-) create mode 100644 .devcontainer/devcontainer.json create mode 100755 .devcontainer/post-create.sh create mode 100644 .gitattributes create mode 100644 .ruff.toml create mode 100644 Makefile create mode 100644 op_resources/average_model/pixel_average_model.onnx create mode 100644 op_resources/cdl_metadata/CDL_codes_names_colors.xls create mode 100644 op_resources/cdl_metadata/us_continental.wkt create mode 100644 op_resources/cloud_models/NOTICE.md create mode 100644 op_resources/cloud_models/cloud_model1_cpu.onnx create mode 100644 op_resources/cloud_models/cloud_model2_cpu.onnx create mode 100644 op_resources/cloud_models/cloud_model3_cpu.onnx create mode 100644 op_resources/cloud_models/cloud_model4_cpu.onnx create mode 100644 op_resources/cloud_models/cloud_model5_cpu.onnx create mode 100644 op_resources/conservation_practices_models/terraces_grassed_waterways.onnx create mode 100644 op_resources/driveways_models/driveway.onnx create mode 100644 op_resources/glad_tile_geometry/10d_tiles.geojson create mode 100644 op_resources/glad_tile_geometry/NOTICE.md create mode 100644 op_resources/sentinel_tile_geometry/NOTICE.md create mode 100644 op_resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml create mode 100644 op_resources/shadow_models/NOTICE.md create mode 100644 op_resources/shadow_models/shadow.onnx create mode 100644 op_resources/spaceeye_models/spaceeye.onnx create mode 100644 op_resources/spectral_extension_model/spectral_extension.onnx create mode 100644 ops/admag/admag_seasonal_field.yaml create mode 100644 ops/admag/admag_seasonal_field_op.py create mode 100644 ops/admag/get_prescription.py create mode 100644 ops/admag/get_prescription.yaml create mode 100644 ops/admag/list_prescriptions.py create mode 100644 ops/admag/list_prescriptions.yaml create mode 100644 ops/admag/prescriptions.py create mode 100644 ops/admag/prescriptions.yaml create mode 100644 ops/admag/test_admag.py create mode 100644 ops/aggregate_statistics_timeseries/aggregate_statistics_timeseries.yaml create mode 100644 ops/aggregate_statistics_timeseries/aggregate_timeseries.py create mode 100644 ops/carbon_local/test_whatif.py create mode 100644 ops/carbon_local/whatif_comet_local.py create mode 100644 ops/carbon_local/whatif_comet_local_op.yaml create mode 100644 ops/chunk_raster/chunk_raster.py create mode 100644 ops/chunk_raster/chunk_raster.yaml create mode 100644 ops/chunk_raster/chunk_sequence_raster.yaml create mode 100644 ops/clip_raster/clip_raster.py create mode 100644 ops/clip_raster/clip_raster.yaml create mode 100644 ops/combine_chunks/combine_chunks.py create mode 100644 ops/combine_chunks/combine_chunks.yaml create mode 100644 ops/compute_cloud_prob/compute_cloud_prob.py create mode 100644 ops/compute_cloud_prob/compute_cloud_prob.yaml create mode 100644 ops/compute_cloud_water_mask/compute_cloud_water_mask.py create mode 100644 ops/compute_cloud_water_mask/compute_cloud_water_mask.yaml create mode 100644 ops/compute_conservation_practice/compute_conservation_practice.py create mode 100644 ops/compute_conservation_practice/compute_conservation_practice.yaml create mode 100644 ops/compute_evaporative_fraction/compute_evaporative_fraction.py create mode 100644 ops/compute_evaporative_fraction/compute_evaporative_fraction.yaml create mode 100644 ops/compute_fcover/compute_fcover.yaml create mode 100644 ops/compute_fcover/fcover.py create mode 100644 ops/compute_ghg_fluxes/compute_ghg_fluxes.py create mode 100644 ops/compute_ghg_fluxes/compute_ghg_fluxes.yaml create mode 100644 ops/compute_ghg_fluxes/test_ghg_fluxes.py create mode 100644 ops/compute_illuminance/compute_illuminance.py create mode 100644 ops/compute_illuminance/compute_illuminance.yaml create mode 100644 ops/compute_index/compute_index.yaml create mode 100644 ops/compute_index/index.py create mode 100644 ops/compute_index/test_index.py create mode 100644 ops/compute_irrigation_probability/compute_irrigation_probability.py create mode 100644 ops/compute_irrigation_probability/compute_irrigation_probability.yaml create mode 100644 ops/compute_ngi_egi_layers/compute_ngi_egi_layers.py create mode 100644 ops/compute_ngi_egi_layers/compute_ngi_egi_layers.yaml create mode 100644 ops/compute_onnx/compute_onnx.py create mode 100644 ops/compute_onnx/compute_onnx.yaml create mode 100644 ops/compute_onnx/compute_onnx_from_chunks.yaml create mode 100644 ops/compute_onnx/compute_onnx_from_sequence.yaml create mode 100644 ops/compute_onnx/test_compute_onnx.py create mode 100644 ops/compute_onnx/test_compute_onnx_chunk.py create mode 100644 ops/compute_pixel_count/compute_pixel_count.py create mode 100644 ops/compute_pixel_count/compute_pixel_count.yaml create mode 100644 ops/compute_pixel_count/test_compute_pixel_count.py create mode 100644 ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.py create mode 100644 ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.yaml create mode 100644 ops/compute_raster_cluster/compute_raster_cluster.py create mode 100644 ops/compute_raster_cluster/compute_raster_cluster.yaml create mode 100644 ops/compute_raster_gradient/compute_raster_gradient.py create mode 100644 ops/compute_raster_gradient/compute_raster_gradient.yaml create mode 100644 ops/compute_shadow_prob/compute_shadow_prob.py create mode 100644 ops/compute_shadow_prob/compute_shadow_prob.yaml create mode 100644 ops/create_raster_sequence/create_raster_sequence.py create mode 100644 ops/create_raster_sequence/create_raster_sequence.yaml create mode 100644 ops/create_raster_sequence/create_raster_sequence_from_sequence_list.yaml create mode 100644 ops/datavibe_filter/datavibe_filter.py create mode 100644 ops/datavibe_filter/datavibe_filter.yaml create mode 100644 ops/detect_driveway/detect_driveway.py create mode 100644 ops/detect_driveway/detect_driveway.yaml create mode 100644 ops/detect_outliers/detect_outliers.py create mode 100644 ops/detect_outliers/detect_outliers.yaml create mode 100644 ops/download_airbus/download_airbus.py create mode 100644 ops/download_airbus/download_airbus.yaml create mode 100644 ops/download_alos/download_alos.py create mode 100644 ops/download_alos/download_alos.yaml create mode 100644 ops/download_alos/test_download_alos.py create mode 100644 ops/download_ambient_weather/download_ambient_weather.py create mode 100644 ops/download_ambient_weather/download_ambient_weather.yaml create mode 100644 ops/download_bing_basemap/download_bing_basemap.py create mode 100644 ops/download_bing_basemap/download_bing_basemap.yaml create mode 100644 ops/download_bing_basemap/test_download_bing_basemap.py create mode 100644 ops/download_cdl_data/download_cdl.py create mode 100644 ops/download_cdl_data/download_cdl.yaml create mode 100644 ops/download_cdl_data/download_cdl_data.py create mode 100644 ops/download_cdl_data/download_cdl_data.yaml create mode 100644 ops/download_chirps/download_chirps.py create mode 100644 ops/download_chirps/download_chirps.yaml create mode 100644 ops/download_climatology_lab/download_climatology_lab.py create mode 100644 ops/download_climatology_lab/download_climatology_lab.yaml create mode 100644 ops/download_climatology_lab/test_download_climatology_lab.py create mode 100644 ops/download_dem/download_dem.py create mode 100644 ops/download_dem/download_dem.yaml create mode 100644 ops/download_dem/test_download_dem.py create mode 100644 ops/download_era5/download_era5.py create mode 100644 ops/download_era5/download_era5.yaml create mode 100644 ops/download_esri_landuse_landcover/download_esri_landuse_landcover.py create mode 100644 ops/download_esri_landuse_landcover/download_esri_landuse_landcover.yaml create mode 100644 ops/download_esri_landuse_landcover/test_download_esri_landuse_landcover.py create mode 100644 ops/download_from_ref/download_from_ref.py create mode 100644 ops/download_from_ref/download_geometry_from_ref.yaml create mode 100644 ops/download_from_ref/download_raster_from_ref.yaml create mode 100644 ops/download_from_smb/download_rasters_from_smb.py create mode 100644 ops/download_from_smb/download_rasters_from_smb.yaml create mode 100644 ops/download_gedi_product/download_gedi_product.py create mode 100644 ops/download_gedi_product/download_gedi_product.yaml create mode 100644 ops/download_gedi_product/test_download_gedi_product.py create mode 100644 ops/download_glad_data/download_glad.py create mode 100644 ops/download_glad_data/download_glad.yaml create mode 100644 ops/download_glad_data/test_download_glad_product.py create mode 100644 ops/download_gnatsgo/download_gnatsgo.yaml create mode 100644 ops/download_gnatsgo/download_gnatsgo_raster.py create mode 100644 ops/download_gnatsgo/test_download_gnatsgo.py create mode 100644 ops/download_hansen/download_hansen.py create mode 100644 ops/download_hansen/download_hansen.yaml create mode 100644 ops/download_hansen/test_download_hansen_product.py create mode 100644 ops/download_herbie/download_herbie.py create mode 100644 ops/download_herbie/download_herbie.yaml create mode 100644 ops/download_herbie/forecast_range_split.py create mode 100644 ops/download_herbie/forecast_range_split.yaml create mode 100644 ops/download_herbie/forecast_weather.py create mode 100644 ops/download_herbie/forecast_weather.yaml create mode 100644 ops/download_landsat_from_pc/download_landsat_from_pc.yaml create mode 100644 ops/download_landsat_from_pc/download_landsat_pc.py create mode 100644 ops/download_modis_sr/download_modis_sr.py create mode 100644 ops/download_modis_sr/download_modis_sr.yaml create mode 100644 ops/download_modis_vegetation/download_modis_vegetation.py create mode 100644 ops/download_modis_vegetation/download_modis_vegetation.yaml create mode 100644 ops/download_modis_vegetation/test_download_modis_vegetation.py create mode 100644 ops/download_naip/download_naip.py create mode 100644 ops/download_naip/download_naip.yaml create mode 100644 ops/download_naip/test_download_naip.py create mode 100644 ops/download_road_geometries/download_road_geometries.py create mode 100644 ops/download_road_geometries/download_road_geometries.yaml create mode 100644 ops/download_sentinel1/download_sentinel1.yaml create mode 100644 ops/download_sentinel1/download_sentinel1_rtc.py create mode 100644 ops/download_sentinel1/test_download_sentinel1_rtc.py create mode 100644 ops/download_sentinel1_grd/download_sentinel1_grd.py create mode 100644 ops/download_sentinel1_grd/download_sentinel1_grd.yaml create mode 100644 ops/download_sentinel1_grd/test_download_sentinel1.py create mode 100644 ops/download_sentinel2_from_pc/download_s2_pc.py create mode 100644 ops/download_sentinel2_from_pc/download_sentinel2_from_pc.yaml create mode 100644 ops/download_soilgrids/download_soilgrids.py create mode 100644 ops/download_soilgrids/download_soilgrids.yaml create mode 100644 ops/download_stack_sentinel2/download_stack_s2.py create mode 100644 ops/download_stack_sentinel2/download_stack_sentinel2.yaml create mode 100644 ops/download_usda_soils/download_usda_soils.py create mode 100644 ops/download_usda_soils/download_usda_soils.yaml create mode 100644 ops/ensemble_cloud_prob/ensemble_cloud_prob.py create mode 100644 ops/ensemble_cloud_prob/ensemble_cloud_prob.yaml create mode 100644 ops/estimate_canopy_cover/estimate_canopy.py create mode 100644 ops/estimate_canopy_cover/estimate_canopy_cover.yaml create mode 100644 ops/extract_gedi_rh100/extract_gedi_rh100.py create mode 100644 ops/extract_gedi_rh100/extract_gedi_rh100.yaml create mode 100644 ops/extract_gedi_rh100/test_extract_gedi_rh100.py create mode 100644 ops/extract_protein_sequence/extract_protein_sequence.py create mode 100644 ops/extract_protein_sequence/extract_protein_sequence.yaml create mode 100644 ops/get_angles/get_angles.py create mode 100644 ops/get_angles/get_angles.yaml create mode 100644 ops/gfs_download/gfs_download.py create mode 100644 ops/gfs_download/gfs_download.yaml create mode 100644 ops/gfs_preprocess/gfs_preprocess.py create mode 100644 ops/gfs_preprocess/gfs_preprocess.yaml create mode 100644 ops/group_rasters_by_geometries/group_rasters_by_geometries.py create mode 100644 ops/group_rasters_by_geometries/group_rasters_by_geometries.yaml create mode 100644 ops/group_rasters_by_time/group_rasters_by_time.py create mode 100644 ops/group_rasters_by_time/group_rasters_by_time.yaml create mode 100644 ops/group_rasters_by_time/test_group_rasters_by_time.py create mode 100644 ops/group_sentinel1_orbits/group_sentinel1_orbits.py create mode 100644 ops/group_sentinel1_orbits/group_sentinel1_orbits.yaml create mode 100644 ops/group_sentinel2_orbits/group_sentinel2_orbits.py create mode 100644 ops/group_sentinel2_orbits/group_sentinel2_orbits.yaml create mode 100644 ops/group_tile_sequence/group_s1_tile_sequence.yaml create mode 100644 ops/group_tile_sequence/group_s2_tile_sequence.yaml create mode 100644 ops/group_tile_sequence/group_s2cloudmask_tile_sequence.yaml create mode 100644 ops/group_tile_sequence/group_tile_sequence.py create mode 100644 ops/heatmap_sensor/soil_sample_heatmap_using_classification.py create mode 100644 ops/heatmap_sensor/soil_sample_heatmap_using_classification.yaml create mode 100644 ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.py create mode 100644 ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.yaml create mode 100644 ops/heatmap_sensor/test_soil_cluster_sample_heatmap_using_neighbors.py create mode 100644 ops/heatmap_sensor/test_soil_sample_heatmap_using_classification.py create mode 100644 ops/helloworld/helloworld.py create mode 100644 ops/helloworld/helloworld.yaml create mode 100644 ops/linear_trend/linear_trend.py create mode 100644 ops/linear_trend/linear_trend.yaml create mode 100644 ops/linear_trend/test_linear_trend.py create mode 100644 ops/list_airbus_products/list_airbus.py create mode 100644 ops/list_airbus_products/list_airbus_products.yaml create mode 100644 ops/list_alos_products/list_alos_products.py create mode 100644 ops/list_alos_products/list_alos_products.yaml create mode 100644 ops/list_alos_products/test_alos_list.py create mode 100644 ops/list_bing_maps/list_bing_maps.py create mode 100644 ops/list_bing_maps/list_bing_maps.yaml create mode 100644 ops/list_bing_maps/test_list_bing_maps.py create mode 100644 ops/list_cdl_products/list_cdl_products.py create mode 100644 ops/list_cdl_products/list_cdl_products.yaml create mode 100644 ops/list_chirps/list_chirps.py create mode 100644 ops/list_chirps/list_chirps.yaml create mode 100644 ops/list_climatology_lab/list_climatology_lab.py create mode 100644 ops/list_climatology_lab/list_gridmet.yaml create mode 100644 ops/list_climatology_lab/list_terraclimate.yaml create mode 100644 ops/list_climatology_lab/test_list_climatology_lab.py create mode 100644 ops/list_dem_products/list_dem_products.py create mode 100644 ops/list_dem_products/list_dem_products.yaml create mode 100644 ops/list_dem_products/test_list_dem_products.py create mode 100644 ops/list_era5/list_era5.py create mode 100644 ops/list_era5/list_era5.yaml create mode 100644 ops/list_era5/list_era5_cds.py create mode 100644 ops/list_era5/list_era5_cds.yaml create mode 100644 ops/list_esri_landuse_landcover/list_esri_landuse_landcover.py create mode 100644 ops/list_esri_landuse_landcover/list_esri_landuse_landcover.yaml create mode 100644 ops/list_esri_landuse_landcover/test_list_esri_landuse_landcover.py create mode 100644 ops/list_gedi_products/list_gedi_products.py create mode 100644 ops/list_gedi_products/list_gedi_products.yaml create mode 100644 ops/list_gedi_products/mock_items.json create mode 100644 ops/list_gedi_products/test_list_gedi_products.py create mode 100644 ops/list_glad_products/list_glad_products.py create mode 100644 ops/list_glad_products/list_glad_products.yaml create mode 100644 ops/list_glad_products/test_glad_list.py create mode 100644 ops/list_gnatsgo_products/list_gnatsgo_products.py create mode 100644 ops/list_gnatsgo_products/list_gnatsgo_products.yaml create mode 100644 ops/list_gnatsgo_products/test_list_gnatsgo_products.py create mode 100644 ops/list_hansen_products/list_hansen_products.py create mode 100644 ops/list_hansen_products/list_hansen_products.yaml create mode 100644 ops/list_hansen_products/test_hansen_list.py create mode 100644 ops/list_herbie/list_herbie.py create mode 100644 ops/list_herbie/list_herbie.yaml create mode 100644 ops/list_landsat_products_pc/list_landsat_pc.py create mode 100644 ops/list_landsat_products_pc/list_landsat_products_pc.yaml create mode 100644 ops/list_modis_sr/list_modis_sr.py create mode 100644 ops/list_modis_sr/list_modis_sr.yaml create mode 100644 ops/list_modis_vegetation/list_modis_vegetation.py create mode 100644 ops/list_modis_vegetation/list_modis_vegetation.yaml create mode 100644 ops/list_modis_vegetation/test_list_modis_vegetation.py create mode 100644 ops/list_naip_products/list_naip_products.py create mode 100644 ops/list_naip_products/list_naip_products.yaml create mode 100644 ops/list_naip_products/test_list_naip_products.py create mode 100644 ops/list_sentinel1_products/list_sentinel1_products_pc.py create mode 100644 ops/list_sentinel1_products/list_sentinel1_products_pc.yaml create mode 100644 ops/list_sentinel1_products/sample_pc_output.json create mode 100644 ops/list_sentinel1_products/test_list_sentinel1.py create mode 100644 ops/list_sentinel2_products/list_s2_pc.py create mode 100644 ops/list_sentinel2_products/list_sentinel2_products_pc.yaml create mode 100644 ops/list_to_sequence/list_to_sequence.py create mode 100644 ops/list_to_sequence/list_to_sequence.yaml create mode 100644 ops/list_to_sequence/test_list_to_sequence.py create mode 100644 ops/match_raster_to_ref/match_raster_to_ref.py create mode 100644 ops/match_raster_to_ref/match_raster_to_ref.yaml create mode 100644 ops/merge_cloud_masks/merge_cloud_masks.py create mode 100644 ops/merge_cloud_masks/merge_cloud_masks.yaml create mode 100644 ops/merge_cloud_masks/merge_cloud_masks_simple.py create mode 100644 ops/merge_cloud_masks/merge_cloud_masks_simple.yaml create mode 100644 ops/merge_geometries/merge_geometries.py create mode 100644 ops/merge_geometries/merge_geometries.yaml create mode 100644 ops/merge_geometries/test_merge_geometries.py create mode 100644 ops/merge_geometry_and_time_range/merge_geometry_and_time_range.py create mode 100644 ops/merge_geometry_and_time_range/merge_geometry_and_time_range.yaml create mode 100644 ops/merge_geometry_and_time_range/test_merge_geometry_and_time_range.py create mode 100644 ops/merge_rasters/merge_rasters.py create mode 100644 ops/merge_rasters/merge_rasters.yaml create mode 100644 ops/merge_sentinel1_orbits/merge_sentinel1.py create mode 100644 ops/merge_sentinel1_orbits/merge_sentinel1_orbits.yaml create mode 100644 ops/merge_sentinel2_orbits/merge_sentinel2_orbits.py create mode 100644 ops/merge_sentinel2_orbits/merge_sentinel2_orbits.yaml create mode 100644 ops/minimum_samples/find_soil_sample_locations.py create mode 100644 ops/minimum_samples/find_soil_sample_locations.yaml create mode 100644 ops/minimum_samples/test_soil_sample_heatmap.py create mode 100644 ops/ordinal_trend_test/ordinal_trend_test.py create mode 100644 ops/ordinal_trend_test/ordinal_trend_test.yaml create mode 100644 ops/ordinal_trend_test/test_ordinal_trend.py create mode 100644 ops/pair_intersecting_rasters/pair_intersecting_rasters.py create mode 100644 ops/pair_intersecting_rasters/pair_intersecting_rasters.yaml create mode 100644 ops/price_airbus_products/price_airbus.py create mode 100644 ops/price_airbus_products/price_airbus_products.yaml create mode 100644 ops/protlearn/protlearn.py create mode 100644 ops/protlearn/protlearn.yaml create mode 100644 ops/read_grib_forecast/read_grib_forecast.py create mode 100644 ops/read_grib_forecast/read_grib_forecast.yaml create mode 100644 ops/recode_raster/recode_raster.py create mode 100644 ops/recode_raster/recode_raster.yaml create mode 100644 ops/recode_raster/test_recode_raster.py create mode 100644 ops/remove_clouds/remove_clouds.py create mode 100644 ops/remove_clouds/remove_clouds.yaml create mode 100644 ops/remove_clouds/remove_clouds_interpolation.yaml create mode 100644 ops/remove_clouds/test_remove_clouds.py create mode 100644 ops/segment_anything/automatic_segmentation.yaml create mode 100644 ops/segment_anything/prompt_segmentation.yaml create mode 100644 ops/segment_anything/sam_inference.py create mode 100644 ops/segment_anything/test_sam_inference.py create mode 100644 ops/segment_anything_combine_masks/combine_sam_masks.py create mode 100644 ops/segment_anything_combine_masks/combine_sam_masks.yaml create mode 100644 ops/segment_anything_combine_masks/test_combine_sam_masks.py create mode 100644 ops/segment_driveway/segment_driveway.py create mode 100644 ops/segment_driveway/segment_driveway.yaml create mode 100644 ops/select_necessary_coverage_items/filter_items.py create mode 100644 ops/select_necessary_coverage_items/select_necessary_coverage_items.yaml create mode 100644 ops/select_necessary_coverage_items/test_filter.py create mode 100644 ops/select_sequence/select_sequence.py create mode 100644 ops/select_sequence/select_sequence.yaml create mode 100644 ops/select_sequence/select_sequence_from_list.yaml create mode 100644 ops/split_sequence/split_sequence.py create mode 100644 ops/split_sequence/split_spaceeye_sequence.yaml create mode 100644 ops/split_sequence/test_split_sequence.py create mode 100644 ops/stack_landsat/stack_landsat.py create mode 100644 ops/stack_landsat/stack_landsat.yaml create mode 100644 ops/stack_sentinel2_bands/stack_sentinel2_bands.py create mode 100644 ops/stack_sentinel2_bands/stack_sentinel2_bands.yaml create mode 100644 ops/summarize_raster/raster_summary.py create mode 100644 ops/summarize_raster/summarize_masked_raster.yaml create mode 100644 ops/summarize_raster/summarize_raster.yaml create mode 100644 ops/threshold_raster/threshold_raster.py create mode 100644 ops/threshold_raster/threshold_raster.yaml create mode 100644 ops/tile_sentinel1/tile_sentinel1.py create mode 100644 ops/tile_sentinel1/tile_sentinel1.yaml create mode 100644 ops/tile_sentinel1/tile_sentinel1_rtc.yaml create mode 100644 ops/unpack_refs/unpack_refs.py create mode 100644 ops/unpack_refs/unpack_refs.yaml create mode 100644 ops/weed_detection/weed_detection.py create mode 100644 ops/weed_detection/weed_detection.yaml create mode 100644 pyrightconfig.json create mode 100644 pytest.ini create mode 100644 resources/docker/Dockerfile-api_orchestrator create mode 100644 resources/docker/Dockerfile-cache create mode 100644 resources/docker/Dockerfile-dev create mode 100644 resources/docker/Dockerfile-devcontainer create mode 100644 resources/docker/Dockerfile-services-base create mode 100644 resources/docker/Dockerfile-worker create mode 100644 resources/docker/Dockerfile-worker-base create mode 100755 resources/docker/docker-in-docker-install.sh create mode 100644 resources/documentation_generation/generate_datatype_hierarchy_diagram.py create mode 100644 resources/documentation_generation/generate_notebook_list.py create mode 100644 resources/documentation_generation/generate_workflow_list.py create mode 100644 resources/documentation_generation/templates/datatype_hierarchy_template.md create mode 100644 resources/documentation_generation/templates/list_notebook_template.md create mode 100644 resources/documentation_generation/templates/list_workflow_template.md create mode 100644 resources/documentation_generation/templates/workflow_yaml_template.md create mode 100644 resources/envs/dev.yaml create mode 100644 resources/envs/rest-api_orchestrator.yml create mode 100644 resources/envs/services-requirements.txt create mode 100644 resources/envs/worker-requirements.txt create mode 100644 resources/envs/worker.yml create mode 100644 scripts/setup_python_develop_env.sh create mode 100644 src/tests/__init__.py create mode 100644 src/tests/benchmark/test_spaceeye_ops.py create mode 100644 src/tests/conftest.py create mode 100644 src/tests/test_notebooks.py create mode 100644 src/tests/test_op_workflows_integration.py create mode 100644 src/tests/test_ops_building.py create mode 100644 src/tests/test_rest_api.py create mode 100644 src/tests/test_rest_api_client_integration.py create mode 100644 src/tests/test_subprocess_client.py create mode 100644 src/tests/workflows_integration/__init__.py create mode 100644 src/tests/workflows_integration/test_helloworld_integration.py create mode 100644 src/tests_local_cluster/expected.tif create mode 100644 src/tests_local_cluster/test_cluster_integration.py create mode 100644 src/vibe_agent/setup.py create mode 100644 src/vibe_agent/tests/conftest.py create mode 100644 src/vibe_agent/tests/ops/test_dependencies_integration.py create mode 100644 src/vibe_agent/tests/ops/test_op_cache_builder.py create mode 100644 src/vibe_agent/tests/ops/test_op_parser.py create mode 100644 src/vibe_agent/tests/ops/test_operation.py create mode 100644 src/vibe_agent/tests/test_cache_metadata_store.py create mode 100644 src/vibe_agent/tests/test_eywa_asset.py create mode 100644 src/vibe_agent/tests/test_local_asset_manager.py create mode 100644 src/vibe_agent/tests/test_storage.py create mode 100644 src/vibe_agent/tests/test_uri_handling.py create mode 100644 src/vibe_agent/vibe_agent/__init__.py create mode 100644 src/vibe_agent/vibe_agent/agent_config.py create mode 100644 src/vibe_agent/vibe_agent/cache.py create mode 100644 src/vibe_agent/vibe_agent/cache_metadata_store.py create mode 100644 src/vibe_agent/vibe_agent/cache_metadata_store_client.py create mode 100644 src/vibe_agent/vibe_agent/data_ops.py create mode 100644 src/vibe_agent/vibe_agent/launch_cache.py create mode 100644 src/vibe_agent/vibe_agent/launch_data_ops.py create mode 100644 src/vibe_agent/vibe_agent/launch_worker.py create mode 100644 src/vibe_agent/vibe_agent/ops.py create mode 100644 src/vibe_agent/vibe_agent/ops_helper.py create mode 100644 src/vibe_agent/vibe_agent/storage/__init__.py create mode 100644 src/vibe_agent/vibe_agent/storage/asset_management.py create mode 100644 src/vibe_agent/vibe_agent/storage/file_upload.py create mode 100644 src/vibe_agent/vibe_agent/storage/local_storage.py create mode 100644 src/vibe_agent/vibe_agent/storage/remote_storage.py create mode 100644 src/vibe_agent/vibe_agent/storage/storage.py create mode 100644 src/vibe_agent/vibe_agent/worker.py create mode 100644 src/vibe_common/setup.py create mode 100644 src/vibe_common/tests/conftest.py create mode 100644 src/vibe_common/tests/test_input_handlers.py create mode 100644 src/vibe_common/tests/test_messaging.py create mode 100644 src/vibe_common/tests/test_statestore.py create mode 100644 src/vibe_common/tests/test_vibe_dapr_client.py create mode 100644 src/vibe_common/vibe_common/__init__.py create mode 100644 src/vibe_common/vibe_common/constants.py create mode 100644 src/vibe_common/vibe_common/dapr.py create mode 100644 src/vibe_common/vibe_common/dropdapr.py create mode 100644 src/vibe_common/vibe_common/input_handlers.py create mode 100644 src/vibe_common/vibe_common/messaging.py create mode 100644 src/vibe_common/vibe_common/schemas.py create mode 100644 src/vibe_common/vibe_common/secret_provider.py create mode 100644 src/vibe_common/vibe_common/statestore.py create mode 100644 src/vibe_common/vibe_common/telemetry.py create mode 100644 src/vibe_common/vibe_common/tokens.py create mode 100644 src/vibe_common/vibe_common/vibe_dapr_client.py create mode 100644 src/vibe_common/vibe_common/workflow/__init__.py create mode 100644 src/vibe_core/tests/test_stac_converter.py create mode 100644 src/vibe_core/tests/test_type_serialization.py create mode 100644 src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf create mode 100644 src/vibe_core/vibe_core/terraform/local/modules/kubernetes/jaeger.tf create mode 100644 src/vibe_core/vibe_core/terraform/local/modules/kubernetes/otel.tf create mode 100644 src/vibe_dev/setup.py create mode 100644 src/vibe_dev/vibe_dev/__init__.py create mode 100644 src/vibe_dev/vibe_dev/client/__init__.py create mode 100644 src/vibe_dev/vibe_dev/client/remote_client.py create mode 100644 src/vibe_dev/vibe_dev/client/subprocess_client.py create mode 100644 src/vibe_dev/vibe_dev/local_runner.py create mode 100644 src/vibe_dev/vibe_dev/mock_utils.py create mode 100644 src/vibe_dev/vibe_dev/testing/__init__.py create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_base.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_op.py create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_inheritance.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_item.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_list.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_and_item_inputs.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_inheritance.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_item.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_list.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/missing_inheritance.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/nested_parameters.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/op.py create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster_list.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/simple_parameter.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/str_list.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/timeseries.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_item_op.py create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_list_op.py create mode 100644 src/vibe_dev/vibe_dev/testing/fake_ops/fake/vibe_op.py create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/bad_sink.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/bad_source.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/base_base.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/custom_indices_structure.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/fan_out_and_in.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel_input_gather_output.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/incompatible_source.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_after_fan_out.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_before_fan_out.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_from_source.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/item_gather.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/item_item.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/list_list.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/missing_edge.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/missing_inheritance.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/nested_fan_out.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/nested_task_params.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/nested_workflow.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_default.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_multiple_default.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_params.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/single_and_parallel.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/source_and_destination.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_item_list.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_list_list.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/str_input.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/task_params.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/three_ops.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/two_level_inheritance.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/unknown_task_params.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows/workflow_inception.yaml create mode 100644 src/vibe_dev/vibe_dev/testing/fake_workflows_fixtures.py create mode 100644 src/vibe_dev/vibe_dev/testing/op_tester.py create mode 100644 src/vibe_dev/vibe_dev/testing/storage_fixtures.py create mode 100644 src/vibe_dev/vibe_dev/testing/utils.py create mode 100644 src/vibe_dev/vibe_dev/testing/workflow_fixtures.py create mode 100644 src/vibe_lib/setup.py create mode 100644 src/vibe_lib/tests/test_airbus_api.py create mode 100644 src/vibe_lib/tests/test_earthdata.py create mode 100644 src/vibe_lib/tests/test_predict_chips.py create mode 100644 src/vibe_lib/tests/test_raster_chipping.py create mode 100644 src/vibe_lib/vibe_lib/__init__.py create mode 100644 src/vibe_lib/vibe_lib/airbus.py create mode 100644 src/vibe_lib/vibe_lib/archive.py create mode 100644 src/vibe_lib/vibe_lib/bing_maps.py create mode 100644 src/vibe_lib/vibe_lib/climatology_lab.py create mode 100644 src/vibe_lib/vibe_lib/comet_farm/__init__.py create mode 100644 src/vibe_lib/vibe_lib/comet_farm/comet_model.py create mode 100644 src/vibe_lib/vibe_lib/comet_farm/comet_requester.py create mode 100644 src/vibe_lib/vibe_lib/comet_farm/comet_server.py create mode 100644 src/vibe_lib/vibe_lib/deepmc/encoder.py create mode 100644 src/vibe_lib/vibe_lib/deepmc/helpers.py create mode 100644 src/vibe_lib/vibe_lib/deepmc/locally_connected.py create mode 100644 src/vibe_lib/vibe_lib/deepmc/models.py create mode 100644 src/vibe_lib/vibe_lib/deepmc/time.py create mode 100644 src/vibe_lib/vibe_lib/deepmc/transform.py create mode 100644 src/vibe_lib/vibe_lib/earthdata.py create mode 100644 src/vibe_lib/vibe_lib/gaussian_mixture.py create mode 100644 src/vibe_lib/vibe_lib/geometry.py create mode 100644 src/vibe_lib/vibe_lib/gfs_blob_utils.py create mode 100644 src/vibe_lib/vibe_lib/glad.py create mode 100644 src/vibe_lib/vibe_lib/heatmap_neighbor.py create mode 100644 src/vibe_lib/vibe_lib/overlap_clustering.py create mode 100644 src/vibe_lib/vibe_lib/planetary_computer.py create mode 100644 src/vibe_lib/vibe_lib/raster.py create mode 100644 src/vibe_lib/vibe_lib/segment_anything.py create mode 100644 src/vibe_lib/vibe_lib/shapefile.py create mode 100644 src/vibe_lib/vibe_lib/spaceeye/__init__.py create mode 100644 src/vibe_lib/vibe_lib/spaceeye/chip.py create mode 100644 src/vibe_lib/vibe_lib/spaceeye/dataset.py create mode 100644 src/vibe_lib/vibe_lib/spaceeye/illumination.py create mode 100644 src/vibe_lib/vibe_lib/spaceeye/interpolation.py create mode 100644 src/vibe_lib/vibe_lib/spaceeye/utils.py create mode 100644 src/vibe_lib/vibe_lib/stats.py create mode 100644 src/vibe_lib/vibe_lib/timeseries.py create mode 100644 src/vibe_server/setup.py create mode 100644 src/vibe_server/tests/conftest.py create mode 100644 src/vibe_server/tests/test_graph.py create mode 100644 src/vibe_server/tests/test_href_handler.py create mode 100644 src/vibe_server/tests/test_op_parallelism.py create mode 100644 src/vibe_server/tests/test_orchestrator.py create mode 100644 src/vibe_server/tests/test_parameter_resolver.py create mode 100644 src/vibe_server/tests/test_remote_workflow_runner.py create mode 100644 src/vibe_server/tests/test_workflow.py create mode 100644 src/vibe_server/tests/test_workflow_input_handler.py create mode 100644 src/vibe_server/tests/test_workflow_parser.py create mode 100644 src/vibe_server/tests/test_workflow_runner.py create mode 100644 src/vibe_server/tests/test_workflow_spec_validator.py create mode 100644 src/vibe_server/tests/test_workflow_state.py create mode 100644 src/vibe_server/vibe_server/__init__.py create mode 100644 src/vibe_server/vibe_server/href_handler.py create mode 100755 src/vibe_server/vibe_server/orchestrator.py create mode 100644 src/vibe_server/vibe_server/server.py create mode 100644 src/vibe_server/vibe_server/sniffer.py create mode 100644 src/vibe_server/vibe_server/workflow/__init__.py create mode 100644 src/vibe_server/vibe_server/workflow/description_validator.py create mode 100644 src/vibe_server/vibe_server/workflow/graph.py create mode 100644 src/vibe_server/vibe_server/workflow/input_handler.py create mode 100644 src/vibe_server/vibe_server/workflow/parameter.py create mode 100644 src/vibe_server/vibe_server/workflow/runner/__init__.py create mode 100644 src/vibe_server/vibe_server/workflow/runner/remote_runner.py create mode 100644 src/vibe_server/vibe_server/workflow/runner/runner.py create mode 100644 src/vibe_server/vibe_server/workflow/runner/task_io_handler.py create mode 100644 src/vibe_server/vibe_server/workflow/spec_parser.py create mode 100644 src/vibe_server/vibe_server/workflow/spec_validator.py create mode 100644 src/vibe_server/vibe_server/workflow/workflow.py create mode 100644 workflows/data_ingestion/admag/admag_seasonal_field.yaml create mode 100644 workflows/data_ingestion/admag/prescriptions.yaml create mode 100644 workflows/data_ingestion/airbus/airbus_download.yaml create mode 100644 workflows/data_ingestion/airbus/airbus_price.yaml create mode 100644 workflows/data_ingestion/alos/alos_forest_extent_download.yaml create mode 100644 workflows/data_ingestion/alos/alos_forest_extent_download_merge.yaml create mode 100644 workflows/data_ingestion/bing/basemap_download.yaml create mode 100644 workflows/data_ingestion/bing/basemap_download_merge.yaml create mode 100644 workflows/data_ingestion/cdl/download_cdl.yaml create mode 100644 workflows/data_ingestion/dem/download_dem.yaml create mode 100644 workflows/data_ingestion/gedi/download_gedi.yaml create mode 100644 workflows/data_ingestion/gedi/download_gedi_rh100.yaml create mode 100644 workflows/data_ingestion/glad/glad_forest_extent_download.yaml create mode 100644 workflows/data_ingestion/glad/glad_forest_extent_download_merge.yaml create mode 100644 workflows/data_ingestion/gnatsgo/download_gnatsgo.yaml create mode 100644 workflows/data_ingestion/hansen/hansen_forest_change_download.yaml create mode 100644 workflows/data_ingestion/landsat/preprocess_landsat.yaml create mode 100644 workflows/data_ingestion/modis/download_modis_surface_reflectance.yaml create mode 100644 workflows/data_ingestion/modis/download_modis_vegetation_index.yaml create mode 100644 workflows/data_ingestion/naip/download_naip.yaml create mode 100644 workflows/data_ingestion/osm_road_geometries.yaml create mode 100644 workflows/data_ingestion/sentinel1/preprocess_s1.yaml create mode 100644 workflows/data_ingestion/sentinel2/cloud_ensemble.yaml create mode 100644 workflows/data_ingestion/sentinel2/improve_cloud_mask.yaml create mode 100644 workflows/data_ingestion/sentinel2/improve_cloud_mask_ensemble.yaml create mode 100644 workflows/data_ingestion/sentinel2/preprocess_s2.yaml create mode 100644 workflows/data_ingestion/sentinel2/preprocess_s2_ensemble_masks.yaml create mode 100644 workflows/data_ingestion/sentinel2/preprocess_s2_improved_masks.yaml create mode 100644 workflows/data_ingestion/soil/soilgrids.yaml create mode 100644 workflows/data_ingestion/soil/usda.yaml create mode 100644 workflows/data_ingestion/spaceeye/spaceeye.yaml create mode 100644 workflows/data_ingestion/spaceeye/spaceeye_inference.yaml create mode 100644 workflows/data_ingestion/spaceeye/spaceeye_interpolation.yaml create mode 100644 workflows/data_ingestion/spaceeye/spaceeye_interpolation_inference.yaml create mode 100644 workflows/data_ingestion/spaceeye/spaceeye_preprocess.yaml create mode 100644 workflows/data_ingestion/spaceeye/spaceeye_preprocess_ensemble.yaml create mode 100644 workflows/data_ingestion/user_data/ingest_geometry.yaml create mode 100644 workflows/data_ingestion/user_data/ingest_raster.yaml create mode 100644 workflows/data_ingestion/user_data/ingest_smb.yaml create mode 100755 workflows/data_ingestion/weather/download_chirps.yaml create mode 100644 workflows/data_ingestion/weather/download_era5.yaml create mode 100644 workflows/data_ingestion/weather/download_era5_monthly.yaml create mode 100644 workflows/data_ingestion/weather/download_gridmet.yaml create mode 100755 workflows/data_ingestion/weather/download_herbie.yaml create mode 100644 workflows/data_ingestion/weather/download_terraclimate.yaml create mode 100644 workflows/data_ingestion/weather/get_ambient_weather.yaml create mode 100644 workflows/data_ingestion/weather/get_forecast.yaml create mode 100644 workflows/data_ingestion/weather/herbie_forecast.yaml create mode 100644 workflows/data_processing/chunk_onnx/chunk_onnx.yaml create mode 100644 workflows/data_processing/chunk_onnx/chunk_onnx_sequence.yaml create mode 100644 workflows/data_processing/clip/clip.yaml create mode 100644 workflows/data_processing/gradient/raster_gradient.yaml create mode 100644 workflows/data_processing/heatmap/classification.yaml create mode 100644 workflows/data_processing/index/index.yaml create mode 100644 workflows/data_processing/linear_trend/chunked_linear_trend.yaml create mode 100644 workflows/data_processing/merge/match_merge_to_ref.yaml create mode 100644 workflows/data_processing/outlier/detect_outlier.yaml create mode 100644 workflows/data_processing/threshold/threshold_raster.yaml create mode 100644 workflows/data_processing/timeseries/timeseries_aggregation.yaml create mode 100644 workflows/data_processing/timeseries/timeseries_masked_aggregation.yaml create mode 100644 workflows/farm_ai/agriculture/canopy_cover.yaml create mode 100644 workflows/farm_ai/agriculture/change_detection.yaml create mode 100644 workflows/farm_ai/agriculture/emergence_summary.yaml create mode 100644 workflows/farm_ai/agriculture/green_house_gas_fluxes.yaml create mode 100644 workflows/farm_ai/agriculture/heatmap_using_classification.yaml create mode 100644 workflows/farm_ai/agriculture/heatmap_using_classification_admag.yaml create mode 100644 workflows/farm_ai/agriculture/heatmap_using_neighboring_data_points.yaml create mode 100644 workflows/farm_ai/agriculture/methane_index.yaml create mode 100644 workflows/farm_ai/agriculture/ndvi_summary.yaml create mode 100644 workflows/farm_ai/agriculture/weed_detection.yaml create mode 100644 workflows/farm_ai/carbon_local/admag_carbon_integration.yaml create mode 100644 workflows/farm_ai/carbon_local/carbon_whatif.yaml create mode 100644 workflows/farm_ai/land_cover_mapping/conservation_practices.yaml create mode 100644 workflows/farm_ai/land_degradation/landsat_ndvi_trend.yaml create mode 100755 workflows/farm_ai/land_degradation/ndvi_linear_trend.yaml create mode 100644 workflows/farm_ai/segmentation/auto_segment_basemap.yaml create mode 100644 workflows/farm_ai/segmentation/auto_segment_s2.yaml create mode 100644 workflows/farm_ai/segmentation/segment_basemap.yaml create mode 100644 workflows/farm_ai/segmentation/segment_s2.yaml create mode 100644 workflows/farm_ai/sensor/optimal_locations.yaml create mode 100644 workflows/farm_ai/water/irrigation_classification.yaml create mode 100644 workflows/forest_ai/deforestation/alos_trend_detection.yaml create mode 100644 workflows/forest_ai/deforestation/ordinal_trend_detection.yaml create mode 100644 workflows/helloworld.yaml create mode 100644 workflows/ml/crop_segmentation.yaml create mode 100644 workflows/ml/dataset_generation/datagen_crop_segmentation.yaml create mode 100644 workflows/ml/driveway_detection.yaml create mode 100644 workflows/ml/segment_anything/automatic_segmentation.yaml create mode 100644 workflows/ml/segment_anything/prompt_segmentation.yaml create mode 100644 workflows/ml/spectral_extension.yaml diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..e37e2bb0 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,62 @@ +{ + "build": { + "dockerfile": "../resources/docker/Dockerfile-devcontainer" + }, + "overrideCommand": false, + "mounts": [ + "type=bind,source=${localEnv:HOME}${localEnv:USERPROFILE},target=/user-home", + "source=terravibes-devcontainer-profile,target=/home/vscode,type=volume", + "target=/home/vscode/.vscode-server,type=volume", + "source=terravibes-devcontainer-var-lib-docker,target=/var/lib/docker,type=volume" + ], + "customizations": { + "vscode": { + "extensions": [ + "charliermarsh.ruff", + "ms-python.python", + "ms-python.vscode-pylance", + "ms-toolsai.jupyter", + "ms-azuretools.vscode-docker", + "DavidAnson.vscode-markdownlint", + "ms-vscode-remote.remote-containers", + "eamodio.gitlens", + "mutantdino.resourcemonitor" + ], + "settings": { + "python.defaultInterpreterPath": "/opt/venv/bin/python", + "python.testing.pytestArgs": [ + "src", + "ops" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.fixAll.ruff": "explicit", + "source.organizeImports.ruff": "explicit" + }, + "python.analysis.typeCheckingMode": "basic", + "python.analysis.diagnosticMode": "workspace", + "python.terminal.activateEnvironment": false, + "python.linting.flake8Enabled": true, + "git.autofetch": "all", + "terminal.integrated.allowChords": false, + "[python]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "charliermarsh.ruff" + } + } + } + }, + "remoteUser": "vscode", + "updateRemoteUserUID": true, + "postCreateCommand": "sed 's/\r$//' .devcontainer/post-create.sh | bash -", + "runArgs": [ + "--init", + "--privileged" + ], + "containerEnv": { + "USER": "vscode", + "PATH": "/home/vscode/.local/bin:/opt/venv/bin:/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + } +} \ No newline at end of file diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh new file mode 100755 index 00000000..46297a55 --- /dev/null +++ b/.devcontainer/post-create.sh @@ -0,0 +1,68 @@ +#!/bin/sh + +DOCKER_VERSION=24.0.2 +VSCODE_HOME=/home/vscode + +if [ -d /user-home/.ssh ]; then + echo "Making user ssh available in container..." + mkdir -p $VSCODE_HOME/.ssh + chmod 0700 $VSCODE_HOME/.ssh + for f in /user-home/.ssh/* + do + cp "$f" $VSCODE_HOME/.ssh/"$(basename "$f")" + chmod 0600 $VSCODE_HOME/.ssh/"$(basename "$f")" + done +fi + +# If the user has a git config file, copy it +if [ -f /user-home/.gitconfig ]; then + echo "Copying user .gitconfig..." + cp /user-home/.gitconfig $VSCODE_HOME/.gitconfig + echo "Enabling HTTP use path, in case the user cloned with HTTP" + git config --global credential.useHttpPath true +fi + +if [ "$(stat -c '%u' .)" != "$UID" ]; then + echo "The permissions of the current directory differ from the current user," + echo "which means we're probably running in Docker under a Windows host..." + echo "Adding the current directory to the git safe directory list" + git config --global --add safe.directory /workspaces/TerraVibes +fi + +sudo mkdir /opt/venv +sudo chown vscode /opt/venv +/opt/conda/bin/python3 -m venv --system-site-packages /opt/venv || exit 1 +/opt/venv/bin/pip install --upgrade pip + +if [[ "$(uname -a)" == *"WSL2"* ]]; then + # We're either in WSL2 or in a Windows host + echo "If we're on a Windows host, we need to convert files to unix mode..." + find cli scripts -type f -exec dos2unix --allow-chown {} \; +fi + +sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended +zsh -c "zstyle ':omz:update' mode auto" +zsh -c "zstyle ':omz:update' verbose minimal" +git clone https://github.com/zsh-users/zsh-autosuggestions.git ~/.oh-my-zsh/plugins/zsh-autosuggestions +git clone https://github.com/zsh-users/zsh-syntax-highlighting.git $ZSH_CUSTOM/plugins/zsh-syntax-highlighting +git clone --depth 1 -- https://github.com/marlonrichert/zsh-autocomplete.git $ZSH_CUSTOM/plugins/zsh-autocomplete +sed -i 's/plugins=(git)/plugins=(git zsh-autosuggestions zsh-syntax-highlighting zsh-autocomplete)/g' ~/.zshrc +echo "export LD_LIBRARY_PATH=/opt/conda/lib:\$LD_LIBRARY_PATH" >> ~/.zshrc +echo "export LD_LIBRARY_PATH=/opt/conda/lib:\$LD_LIBRARY_PATH" >> ~/.bashrc + +/opt/venv/bin/pip install --upgrade pyright +/opt/venv/bin/pip install --upgrade "pytest" "anyio[trio]" +sed -e '1,/dependencies:/d' < resources/envs/dev.yaml | \ + sed 's/-//' | \ + xargs /opt/venv/bin/pip install +eval $(grep 'terravibes_packages=' < "scripts/setup_python_develop_env.sh") +for package in $terravibes_packages +do + /opt/venv/bin/pip install -e src/$package +done + +sudo mkdir -p /opt/terravibes/ops +sudo ln -sf $(pwd)/op_resources /opt/terravibes/ops/resources +sudo mkdir /app +sudo ln -sf $(pwd)/ops /app/ops +sudo ln -sf $(pwd)/workflows /app/workflows \ No newline at end of file diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..214fd1ba --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +*.onnx filter=lfs diff=lfs merge=lfs -text +*.xls filter=lfs diff=lfs merge=lfs -text +*.kml filter=lfs diff=lfs merge=lfs -text +*.tif filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 97f7d813..bc3b9cba 100644 --- a/.gitignore +++ b/.gitignore @@ -122,12 +122,9 @@ resources/test/ !terravibes/vibe/lib/ .cspell/* -user-interface/package-lock.json -carbon/notebook/farmbeats/* -carbon/notebook/data/* .env.development test-output.xml /outputs/* -/op_resources/* +venv/ makeenv diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 00000000..b135b9cc --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,17 @@ +src= ["src"] +line-length = 100 +extend-include = ["*.ipynb"] + +[lint] +select = ["E", "F", "I", "W"] +ignore = ["E203"] +exclude = [".git", "__pycache__"] + +[lint.pycodestyle] +max-line-length = 100 + +[lint.per-file-ignores] +"__init__.py" = ["F401"] + +[format] +quote-style = "double" \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..3ac73b4b --- /dev/null +++ b/Makefile @@ -0,0 +1,199 @@ +.PHONY: help local clean revert-% revert clean-% local-% cluster set-image %-base + +SHELL = /bin/bash + +export PATH := $(HOME)/.config/farmvibes-ai:$(PATH) + +CACHE_DEPLOYMENT := terravibes-cache +ORCHESTRATOR_DEPLOYMENT := terravibes-orchestrator +REST_API_DEPLOYMENT := terravibes-rest-api +DATA_OPS_DEPLOYMENT := terravibes-data-ops +WORKER_DEPLOYMENT := terravibes-worker + +CACHE_REPO := farmai/terravibes/cache +ORCHESTRATOR_REPO := farmai/terravibes/api-orchestrator +REST_API_REPO := farmai/terravibes/api-orchestrator +DATA_OPS_REPO := farmai/terravibes/cache +WORKER_REPO := farmai/terravibes/worker + +CONTAINER_DEBUG_PORT := 5678 +REST_API_DEBUG_PORT := 5678 +ORCHESTRATOR_DEBUG_PORT := 5679 +CACHE_DEBUG_PORT := 5680 +WORKER_DEBUG_PORT := 5681 +DATA_OPS_DEBUG_PORT := 5682 + +CURRENT_CACHE_REPLICAS := $(shell env PATH=$(PATH) kubectl get deployment $(CACHE_DEPLOYMENT) -o jsonpath='{.status.replicas}') +CURRENT_REST_API_REPLICAS := $(shell env PATH=$(PATH) kubectl get deployment $(REST_API_DEPLOYMENT) -o jsonpath='{.status.replicas}') +CURRENT_ORCHESTRATOR_REPLICAS := $(shell env PATH=$(PATH) kubectl get deployment $(ORCHESTRATOR_DEPLOYMENT) -o jsonpath='{.status.replicas}') +CURRENT_DATA_OPS_REPLICAS := $(shell env PATH=$(PATH) kubectl get deployment $(DATA_OPS_DEPLOYMENT) -o jsonpath='{.status.replicas}') +CURRENT_WORKER_REPLICAS := $(shell env PATH=$(PATH) kubectl get deployment $(WORKER_DEPLOYMENT) -o jsonpath='{.status.replicas}') + +TAG := tmp-$(shell date +%s) +ROOT := $(shell git rev-parse --show-toplevel) + +build_cluster := env FARMVIBES_AI_IMAGE_PREFIX=terravibes- CONTAINER_REGISTRY_BASE=mcr.microsoft.com bash farmvibes-ai local setup +base_image_name := grep -oE 'FROM ([-a-zA-Z0-9@:%._\+~\#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~\#?&//=]*))' FILE | cut -d ' ' -f 2 + +define transform_image_name +$(shell docker ps | grep registry | rev | cut -d ' ' -f 1 | rev):5000/$(1) +endef + +help: ## Shows this help message + @echo -e This is the farmvibes.ai makefile. Supported targets are:\\n + @grep -E -h '\s##\s' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' + +local: cluster local-rest-api local-cache local-worker local-orchestrator local-data-ops ## Builds all images locally and deploys them into the local farmvibes.ai cluster + [ -z $(WAIT_AT_THE_END) ] || kubectl delete pods -l backend=terravibes && \ + kubectl wait --for=condition=Available deployment --timeout=300s -l backend=terravibes + +revert: cluster revert-rest-api revert-cache revert-worker revert-orchestrator ## Reverts all images to the official version + +services-base: resources/docker/Dockerfile-services-base + @docker manifest inspect `$(subst FILE,$<,$(base_image_name))` || \ + az acr login -n `$(subst FILE,$<,$(base_image_name)) | cut -d / -f 1 | sed 's|.azurecr.io||g'` || \ + echo "Failed to log into container registry. Please perform an `az login` and try again" + +%-base: resources/docker/Dockerfile-% + @docker manifest inspect `$(subst FILE,$<,$(base_image_name))` || \ + az acr login -n `$(subst FILE,$<,$(base_image_name)) | cut -d / -f 1 | sed 's|.azurecr.io||g'` || \ + echo "Failed to log into container registry. Please perform an `az login` and try again" + +delete-%: + kubectl scale deployment $(subst delete-,,$@) --replicas=0 + kubectl delete pod --wait=true -l app=$(subst delete-,,$@) --grace-period=0 --force + kubectl rollout status deployment $(subst delete-,,$@) + +repo-%: + docker pull $(CONTAINER_REGISTRY_BASE)/$(subst repo-,,$@):$(FARMVIBES_AI_IMAGE_TAG) + +set-image: + kubectl set image deployment $(DEPLOYMENT) "*=$(IMAGE_FULL_REFERENCE)" + kubectl rollout status deployment $(DEPLOYMENT) + +set-registry-image: push-image + DEPLOYMENT=$(DEPLOYMENT) IMAGE_FULL_REFERENCE=$(call transform_image_name,$(IMAGE_FULL_REFERENCE)) make -C . set-image + +push-image: + docker tag $(IMAGE_FULL_REFERENCE) 127.0.0.1:5000/$(IMAGE_FULL_REFERENCE) + docker push 127.0.0.1:5000/$(IMAGE_FULL_REFERENCE) + +scale: + kubectl scale deployment $(DEPLOYMENT) --replicas=$(shell [ "$(REPLICAS)" ] && echo "$(REPLICAS)" || echo 1) + [ ! -z $(WAIT_AT_THE_END) ] || kubectl wait --for=condition=Available deployment --timeout=300s $(DEPLOYMENT) + +# Have to replace Xfrozen_modules=on with Xfrozen_modules=off in the deployment +disable-frozen-modules: + kubectl get deployment $(DEPLOYMENT) -o yaml | sed 's|Xfrozen_modules=on|Xfrozen_modules=off|g' | kubectl apply -f - + +add-debug-flag: + kubectl get deployment $(DEPLOYMENT) -o yaml | sed 's|\(\s\+-\)\(.*port=3000\)|\1\2\n\1 --debug|' | kubectl apply -f - + +add-debug-flag-agent: + kubectl get deployment $(DEPLOYMENT) -o yaml | sed 's|\(\s\+-\)\(.*port=3000\)|\1\2\n\1 debug.activate=true|' | kubectl apply -f - + +local-rest-api: cluster local-rest-api-orchestrator delete-$(REST_API_DEPLOYMENT) ## Builds and deploys a local REST API image (enabling debug) + DEPLOYMENT=$(REST_API_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(REST_API_REPO):$(TAG) $(MAKE) -C . set-registry-image + @kubectl get deployment $(REST_API_DEPLOYMENT) -o json | grep -v last | grep -qo -- --debug || DEPLOYMENT=$(REST_API_DEPLOYMENT) $(MAKE) -C . add-debug-flag + DEPLOYMENT=$(REST_API_DEPLOYMENT) $(MAKE) -C . disable-frozen-modules + DEPLOYMENT=$(REST_API_DEPLOYMENT) REPLICAS=$(CURRENT_REST_API_REPLICAS) $(MAKE) scale + +revert-rest-api: cluster repo-$(REST_API_REPO) delete-$(REST_API_DEPLOYMENT) ## Reverts the REST API deployment to use the official image + DEPLOYMENT=$(REST_API_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CONTAINER_REGISTRY_BASE)/$(REST_API_REPO):$(FARMVIBES_AI_IMAGE_TAG) $(MAKE) set-registry-image + DEPLOYMENT=$(REST_API_DEPLOYMENT) REPLICAS=$(CURRENT_REST_API_REPLICAS) make scale + +local-orchestrator: cluster local-rest-api-orchestrator delete-$(ORCHESTRATOR_DEPLOYMENT) ## Builds and deploys a local ORCHESTRATOR image (enabling debug) + DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(ORCHESTRATOR_REPO):$(TAG) $(MAKE) -C . set-registry-image + @kubectl get deployment $(ORCHESTRATOR_DEPLOYMENT) -o json | grep -v last | grep -qo -- --debug || DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) $(MAKE) -C . add-debug-flag + DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) $(MAKE) -C . disable-frozen-modules + DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) REPLICAS=$(CURRENT_ORCHESTRATOR_REPLICAS) $(MAKE) scale + +revert-orchestrator: cluster repo-$(ORCHESTRATOR_REPO) delete-$(ORCHESTRATOR_DEPLOYMENT) ## Reverts the ORCHESTRATOR deployment to use the official image + DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CONTAINER_REGISTRY_BASE)/$(ORCHESTRATOR_REPO):$(FARMVIBES_AI_IMAGE_TAG) $(MAKE) set-registry-image + DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) REPLICAS=$(CURRENT_ORCHESTRATOR_REPLICAS) make scale + +local-data-ops: cluster local-cache-repo delete-$(DATA_OPS_DEPLOYMENT) ## Builds and deploys a local data ops image (enabling debug) + DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(DATA_OPS_REPO):$(TAG) $(MAKE) -C . set-registry-image + @kubectl get deployment $(DATA_OPS_DEPLOYMENT) -o json | grep -v last | grep -qo debug.activate || DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) $(MAKE) -C . add-debug-flag-agent + DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) $(MAKE) -C . disable-frozen-modules + DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) REPLICAS=$(CURRENT_DATA_OPS_REPLICAS) $(MAKE) scale + +revert-data-ops: cluster repo-$(DATA_OPS_REPO) delete-$(DATA_OPS_DEPLOYMENT) ## Reverts the data ops deployment to use the official image + DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CONTAINER_REGISTRY_BASE)/$(DATA_OPS_REPO):$(FARMVIBES_AI_IMAGE_TAG) $(MAKE) set-registry-image + DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) REPLICAS=$(CURRENT_DATA_OPS_REPLICAS) make scale + +local-worker: cluster local-worker-repo delete-$(WORKER_DEPLOYMENT) ## Builds and deploys a local WORKER image (enabling debug) + DEPLOYMENT=$(WORKER_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(WORKER_REPO):$(TAG) $(MAKE) -C . set-registry-image + DEPLOYMENT=$(WORKER_DEPLOYMENT) $(MAKE) -C . disable-frozen-modules + DEPLOYMENT=$(WORKER_DEPLOYMENT) REPLICAS=$(CURRENT_WORKER_REPLICAS) make scale + +revert-worker: cluster repo-$(WORKER_REPO) delete-$(WORKER_DEPLOYMENT) ## Reverts the WORKER deployment to use the official image + DEPLOYMENT=$(WORKER_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CONTAINER_REGISTRY_BASE)/$(WORKER_REPO):$(FARMVIBES_AI_IMAGE_TAG) make set-registry-image + DEPLOYMENT=$(WORKER_DEPLOYMENT) REPLICAS=$(CURRENT_WORKER_REPLICAS) make scale + +local-cache: cluster local-cache-repo delete-$(CACHE_DEPLOYMENT) ## Builds and deploys a local CACHE image (enabling debug) + DEPLOYMENT=$(CACHE_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CACHE_REPO):$(TAG) $(MAKE) -C . set-registry-image + @kubectl get deployment $(CACHE_DEPLOYMENT) -o json | grep -v last | grep -qo debug.activate || DEPLOYMENT=$(CACHE_DEPLOYMENT) $(MAKE) -C . add-debug-flag-agent + DEPLOYMENT=$(CACHE_DEPLOYMENT) $(MAKE) -C . disable-frozen-modules + DEPLOYMENT=$(CACHE_DEPLOYMENT) REPLICAS=$(CURRENT_CACHE_REPLICAS) make scale + +revert-cache: cluster repo-$(CACHE_REPO) delete-$(CACHE_DEPLOYMENT) ## Reverts the CACHE deployment to use the official image + DEPLOYMENT=$(CACHE_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CONTAINER_REGISTRY_BASE)/$(CACHE_REPO):$(FARMVIBES_AI_IMAGE_TAG) make set-registry-image + DEPLOYMENT=$(CACHE_DEPLOYMENT) REPLICAS=$(CURRENT_CACHE_REPLICAS) make scale + +local-rest-api-orchestrator: cluster services-base + $(eval export PATH=$(HOME)/.config/farmvibes-ai:$(PATH)) + docker build -t $(REST_API_REPO):$(TAG) -t $(ORCHESTRATOR_REPO):$(TAG) -f $(ROOT)/resources/docker/Dockerfile-api_orchestrator . + +local-cache-repo: cluster services-base + $(eval export PATH=$(HOME)/.config/farmvibes-ai:$(PATH)) + docker build -t $(CACHE_REPO):$(TAG) -f $(ROOT)/resources/docker/Dockerfile-cache . + +local-worker-repo: cluster worker-base + $(eval export PATH=$(HOME)/.config/farmvibes-ai:$(PATH)) + docker build -t $(WORKER_REPO):$(TAG) -f $(ROOT)/resources/docker/Dockerfile-worker . + +debug-rest-api: cluster local-rest-api ## Starts listening to debug the REST API + DEPLOYMENT=$(REST_API_DEPLOYMENT) REPLICAS=1 make scale + kubectl port-forward deployments/$(REST_API_DEPLOYMENT) $(REST_API_DEBUG_PORT):$(CONTAINER_DEBUG_PORT) + +debug-orchestrator: cluster local-orchestrator ## Starts listening to debug the ORCHESTRATOR + DEPLOYMENT=$(ORCHESTRATOR_DEPLOYMENT) REPLICAS=1 make scale + kubectl port-forward deployments/$(ORCHESTRATOR_DEPLOYMENT) $(ORCHESTRATOR_DEBUG_PORT):$(CONTAINER_DEBUG_PORT) + +debug-worker: cluster local-worker ## Starts listening to debug the WORKER + @kubectl get deployment $(WORKER_DEPLOYMENT) -o json | grep -v last | grep -qo debug.activate || DEPLOYMENT=$(WORKER_DEPLOYMENT) $(MAKE) -C . add-debug-flag-agent + DEPLOYMENT=$(WORKER_DEPLOYMENT) REPLICAS=1 make scale + kubectl port-forward pod/`kubectl get pods -l app=$(WORKER_DEPLOYMENT) --field-selector status.phase=Running | awk '/Running/{ print $$1 }'` \ + $(WORKER_DEBUG_PORT):$(CONTAINER_DEBUG_PORT) + +debug-cache: cluster local-cache ## Starts listening to debug the CACHE + DEPLOYMENT=$(CACHE_DEPLOYMENT) REPLICAS=1 make scale + kubectl port-forward pod/`kubectl get pods -l app=$(CACHE_DEPLOYMENT) --field-selector status.phase=Running | awk '/Running/{ print $$1 }'` \ + $(CACHE_DEBUG_PORT):$(CONTAINER_DEBUG_PORT) + +debug-data-ops: cluster local-data-ops ## Starts listening to debug the DATA_OPS + DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) REPLICAS=1 make scale + kubectl port-forward deployments/$(DATA_OPS_DEPLOYMENT) $(DATA_OPS_DEBUG_PORT):$(CONTAINER_DEBUG_PORT) + +clean: cluster revert clean-worker clean-orchestrator clean-rest-api clean-cache + +clean-cache: cluster revert-cache revert-worker ## Cleans up the cache image from the local docker "registry" + docker images | grep -E "$(CACHE_REPO)\\s+tmp.*" | awk '{ print $$3 }' | xargs docker rmi + +clean-worker: cluster revert-cache revert-worker ## Cleans up the worker image from the local docker "registry" + docker images | grep -E "$(WORKER_REPO)\\s+tmp.*" | awk '{ print $$3 }' | xargs docker rmi + +clean-orchestrator: cluster revert-rest-api revert-orchestrator ## Cleans up the orchestrator image from the local docker "registry" + docker images | grep -E "$(ORCHESTRATOR_REPO)\\s+tmp.*" | awk '{ print $$3 }' | xargs docker rmi + +clean-data-ops: cluster revert-rest-api revert-data-ops ## Cleans up the data-ops image from the local docker "registry" + docker images | grep -E "$(DATA_OPS_REPO)\\s+tmp.*" | awk '{ print $$3 }' | xargs docker rmi + +clean-rest-api: cluster revert-rest-api revert-orchestrator ## Cleans up the orchestrator image from the local docker "registry" + docker images | grep -E "$(REST_API_REPO)\\s+tmp.*" | awk '{ print $$3 }' | xargs docker rmi + +cluster: + $(eval export PATH=$(HOME)/.config/farmvibes-ai:$(PATH)) + which k3d || $(build_cluster) + docker ps | grep -q farmvibes-ai || farmvibes-ai local start || $(build_cluster) diff --git a/op_resources/average_model/pixel_average_model.onnx b/op_resources/average_model/pixel_average_model.onnx new file mode 100644 index 00000000..5b009f3b --- /dev/null +++ b/op_resources/average_model/pixel_average_model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d789bd1b4f6eb3ace534e9ffd74ea6aaf95f141c1b614211d5f6cfd0a329151d +size 186 diff --git a/op_resources/cdl_metadata/CDL_codes_names_colors.xls b/op_resources/cdl_metadata/CDL_codes_names_colors.xls new file mode 100644 index 00000000..fd4a92ba --- /dev/null +++ b/op_resources/cdl_metadata/CDL_codes_names_colors.xls @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd7f4adef7278f19db714da8cce766779d026cc65b858614ed4bc482de49ffe4 +size 70144 diff --git a/op_resources/cdl_metadata/us_continental.wkt b/op_resources/cdl_metadata/us_continental.wkt new file mode 100644 index 00000000..e8133628 --- /dev/null +++ b/op_resources/cdl_metadata/us_continental.wkt @@ -0,0 +1 @@ +POLYGON((-94.81758 49.38905, -94.64 48.84, -94.32914 48.67074, -93.63087 48.60926, -92.61 48.45, -91.64 48.14, -90.83 48.27, -89.6 48.01, -89.272917 48.019808, -88.378114 48.302918, -87.439793 47.94, -86.461991 47.553338, -85.652363 47.220219, -84.87608 46.900083, -84.779238 46.637102, -84.543749 46.538684, -84.6049 46.4396, -84.3367 46.40877, -84.14212 46.512226, -84.091851 46.275419, -83.890765 46.116927, -83.616131 46.116927, -83.469551 45.994686, -83.592851 45.816894, -82.550925 45.347517, -82.337763 44.44, -82.137642 43.571088, -82.43 42.98, -82.9 42.43, -83.12 42.08, -83.142 41.975681, -83.02981 41.832796, -82.690089 41.675105, -82.439278 41.675105, -81.277747 42.209026, -80.247448 42.3662, -78.939362 42.863611, -78.92 42.965, -79.01 43.27, -79.171674 43.466339, -78.72028 43.625089, -77.737885 43.629056, -76.820034 43.628784, -76.5 44.018459, -76.375 44.09631, -75.31821 44.81645, -74.867 45.00048, -73.34783 45.00738, -71.50506 45.0082, -71.405 45.255, -71.08482 45.30524, -70.66 45.46, -70.305 45.915, -69.99997 46.69307, -69.237216 47.447781, -68.905 47.185, -68.23444 47.35486, -67.79046 47.06636, -67.79134 45.70281, -67.13741 45.13753, -66.96466 44.8097, -68.03252 44.3252, -69.06 43.98, -70.11617 43.68405, -70.645476 43.090238, -70.81489 42.8653, -70.825 42.335, -70.495 41.805, -70.08 41.78, -70.185 42.145, -69.88497 41.92283, -69.96503 41.63717, -70.64 41.475, -71.12039 41.49445, -71.86 41.32, -72.295 41.27, -72.87643 41.22065, -73.71 40.931102, -72.24126 41.11948, -71.945 40.93, -73.345 40.63, -73.982 40.628, -73.952325 40.75075, -74.25671 40.47351, -73.96244 40.42763, -74.17838 39.70926, -74.90604 38.93954, -74.98041 39.1964, -75.20002 39.24845, -75.52805 39.4985, -75.32 38.96, -75.071835 38.782032, -75.05673 38.40412, -75.37747 38.01551, -75.94023 37.21689, -76.03127 37.2566, -75.72205 37.93705, -76.23287 38.319215, -76.35 39.15, -76.542725 38.717615, -76.32933 38.08326, -76.989998 38.239992, -76.30162 37.917945, -76.25874 36.9664, -75.9718 36.89726, -75.86804 36.55125, -75.72749 35.55074, -76.36318 34.80854, -77.397635 34.51201, -78.05496 33.92547, -78.55435 33.86133, -79.06067 33.49395, -79.20357 33.15839, -80.301325 32.509355, -80.86498 32.0333, -81.33629 31.44049, -81.49042 30.72999, -81.31371 30.03552, -80.98 29.18, -80.535585 28.47213, -80.53 28.04, -80.056539 26.88, -80.088015 26.205765, -80.13156 25.816775, -80.38103 25.20616, -80.68 25.08, -81.17213 25.20126, -81.33 25.64, -81.71 25.87, -82.24 26.73, -82.70515 27.49504, -82.85526 27.88624, -82.65 28.55, -82.93 29.1, -83.70959 29.93656, -84.1 30.09, -85.10882 29.63615, -85.28784 29.68612, -85.7731 30.15261, -86.4 30.4, -87.53036 30.27433, -88.41782 30.3849, -89.18049 30.31598, -89.593831 30.159994, -89.413735 29.89419, -89.43 29.48864, -89.21767 29.29108, -89.40823 29.15961, -89.77928 29.30714, -90.15463 29.11743, -90.880225 29.148535, -91.626785 29.677, -92.49906 29.5523, -93.22637 29.78375, -93.84842 29.71363, -94.69 29.48, -95.60026 28.73863, -96.59404 28.30748, -97.14 27.83, -97.37 27.38, -97.38 26.69, -97.33 26.21, -97.14 25.87, -97.53 25.84, -98.24 26.06, -99.02 26.37, -99.3 26.84, -99.52 27.54, -100.11 28.11, -100.45584 28.69612, -100.9576 29.38071, -101.6624 29.7793, -102.48 29.76, -103.11 28.97, -103.94 29.27, -104.45697 29.57196, -104.70575 30.12173, -105.03737 30.64402, -105.63159 31.08383, -106.1429 31.39995, -106.50759 31.75452, -108.24 31.754854, -108.24194 31.34222, -109.035 31.34194, -111.02361 31.33472, -113.30498 32.03914, -114.815 32.52528, -114.72139 32.72083, -115.99135 32.61239, -117.12776 32.53534, -117.295938 33.046225, -117.944 33.621236, -118.410602 33.740909, -118.519895 34.027782, -119.081 34.078, -119.438841 34.348477, -120.36778 34.44711, -120.62286 34.60855, -120.74433 35.15686, -121.71457 36.16153, -122.54747 37.55176, -122.51201 37.78339, -122.95319 38.11371, -123.7272 38.95166, -123.86517 39.76699, -124.39807 40.3132, -124.17886 41.14202, -124.2137 41.99964, -124.53284 42.76599, -124.14214 43.70838, -124.020535 44.615895, -123.89893 45.52341, -124.079635 46.86475, -124.39567 47.72017, -124.68721 48.184433, -124.566101 48.379715, -123.12 48.04, -122.58736 47.096, -122.34 47.36, -122.5 48.18, -122.84 49, -120 49, -117.03121 49, -116.04818 49, -113 49, -110.05 49, -107.05 49, -104.04826 48.99986, -100.65 49, -97.22872 49.0007, -95.15907 49, -95.15609 49.38425, -94.81758 49.38905)) diff --git a/op_resources/cloud_models/NOTICE.md b/op_resources/cloud_models/NOTICE.md new file mode 100644 index 00000000..f4d8bab6 --- /dev/null +++ b/op_resources/cloud_models/NOTICE.md @@ -0,0 +1,8 @@ +# NOTICE + +The models in this directory were converted to the ONNX format from the models +trained with data from the, and made available by, +[azavea cloud model](https://github.com/azavea/cloud-model). + +The dataset is licensed under the terms of the +[Creative Commons Attribution 4.0 International License](http://creativecommons.org/licenses/by/4.0/). \ No newline at end of file diff --git a/op_resources/cloud_models/cloud_model1_cpu.onnx b/op_resources/cloud_models/cloud_model1_cpu.onnx new file mode 100644 index 00000000..bd09b46d --- /dev/null +++ b/op_resources/cloud_models/cloud_model1_cpu.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45786f149f2cd0820846ad22336126a8a4d71d9e7d9cda02e0c3d58c3c5a1cd1 +size 56973507 diff --git a/op_resources/cloud_models/cloud_model2_cpu.onnx b/op_resources/cloud_models/cloud_model2_cpu.onnx new file mode 100644 index 00000000..f85f74ea --- /dev/null +++ b/op_resources/cloud_models/cloud_model2_cpu.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45de808bfa418d9fe31f7f4bdd656acdd20101fdc9219b901415e0e007e2db06 +size 56973507 diff --git a/op_resources/cloud_models/cloud_model3_cpu.onnx b/op_resources/cloud_models/cloud_model3_cpu.onnx new file mode 100644 index 00000000..cf9b2069 --- /dev/null +++ b/op_resources/cloud_models/cloud_model3_cpu.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ed62734bde0250af30e0085a37c1b81615c3ab286469d5a4b2709963153c329 +size 26225 diff --git a/op_resources/cloud_models/cloud_model4_cpu.onnx b/op_resources/cloud_models/cloud_model4_cpu.onnx new file mode 100644 index 00000000..9b3eed2c --- /dev/null +++ b/op_resources/cloud_models/cloud_model4_cpu.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b5638e37571e320e3625c163df2e250330d2f3d79e03216fc92f7899bbc909f +size 29269 diff --git a/op_resources/cloud_models/cloud_model5_cpu.onnx b/op_resources/cloud_models/cloud_model5_cpu.onnx new file mode 100644 index 00000000..4526178a --- /dev/null +++ b/op_resources/cloud_models/cloud_model5_cpu.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad93ee817eb724405760130ffe368c6d76932a0d6c6a6407de03f1abd7995229 +size 29269 diff --git a/op_resources/conservation_practices_models/terraces_grassed_waterways.onnx b/op_resources/conservation_practices_models/terraces_grassed_waterways.onnx new file mode 100644 index 00000000..49868bcd --- /dev/null +++ b/op_resources/conservation_practices_models/terraces_grassed_waterways.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b275e771e6478a6bbc4a155adea7c55c9d1de7b7fb03fde4808efcece0f743 +size 97707605 diff --git a/op_resources/driveways_models/driveway.onnx b/op_resources/driveways_models/driveway.onnx new file mode 100644 index 00000000..29c0e554 --- /dev/null +++ b/op_resources/driveways_models/driveway.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c07b7695ad6cff83e11a59570c670640f2b9914ce174906ea926f33ccde2726b +size 106619744 diff --git a/op_resources/glad_tile_geometry/10d_tiles.geojson b/op_resources/glad_tile_geometry/10d_tiles.geojson new file mode 100644 index 00000000..28e3f75b --- /dev/null +++ b/op_resources/glad_tile_geometry/10d_tiles.geojson @@ -0,0 +1,510 @@ +{ +"type": "FeatureCollection", +"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, +"features": [ +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": -50.0, "LR_X": -170.0, "LR_Y": -60.0, "NAME": "50S_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, -60.0 ], [ -180.0, -50.0 ], [ -170.0, -50.0 ], [ -170.0, -60.0 ], [ -180.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": -40.0, "LR_X": -170.0, "LR_Y": -50.0, "NAME": "40S_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, -50.0 ], [ -180.0, -40.0 ], [ -170.0, -40.0 ], [ -170.0, -50.0 ], [ -180.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": -30.0, "LR_X": -170.0, "LR_Y": -40.0, "NAME": "30S_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, -40.0 ], [ -180.0, -30.0 ], [ -170.0, -30.0 ], [ -170.0, -40.0 ], [ -180.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": -20.0, "LR_X": -170.0, "LR_Y": -30.0, "NAME": "20S_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, -30.0 ], [ -180.0, -20.0 ], [ -170.0, -20.0 ], [ -170.0, -30.0 ], [ -180.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": -10.0, "LR_X": -170.0, "LR_Y": -20.0, "NAME": "10S_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, -20.0 ], [ -180.0, -10.0 ], [ -170.0, -10.0 ], [ -170.0, -20.0 ], [ -180.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 0.0, "LR_X": -170.0, "LR_Y": -10.0, "NAME": "00N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, -10.0 ], [ -180.0, 0.0 ], [ -170.0, 0.0 ], [ -170.0, -10.0 ], [ -180.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 10.0, "LR_X": -170.0, "LR_Y": 0.0, "NAME": "10N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 0.0 ], [ -180.0, 10.0 ], [ -170.0, 10.0 ], [ -170.0, 0.0 ], [ -180.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 20.0, "LR_X": -170.0, "LR_Y": 10.0, "NAME": "20N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 10.0 ], [ -180.0, 20.0 ], [ -170.0, 20.0 ], [ -170.0, 10.0 ], [ -180.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 30.0, "LR_X": -170.0, "LR_Y": 20.0, "NAME": "30N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 20.0 ], [ -180.0, 30.0 ], [ -170.0, 30.0 ], [ -170.0, 20.0 ], [ -180.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 40.0, "LR_X": -170.0, "LR_Y": 30.0, "NAME": "40N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 30.0 ], [ -180.0, 40.0 ], [ -170.0, 40.0 ], [ -170.0, 30.0 ], [ -180.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 50.0, "LR_X": -170.0, "LR_Y": 40.0, "NAME": "50N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 40.0 ], [ -180.0, 50.0 ], [ -170.0, 50.0 ], [ -170.0, 40.0 ], [ -180.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 60.0, "LR_X": -170.0, "LR_Y": 50.0, "NAME": "60N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 50.0 ], [ -180.0, 60.0 ], [ -170.0, 60.0 ], [ -170.0, 50.0 ], [ -180.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 70.0, "LR_X": -170.0, "LR_Y": 60.0, "NAME": "70N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 60.0 ], [ -180.0, 70.0 ], [ -170.0, 70.0 ], [ -170.0, 60.0 ], [ -180.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -180.0, "UL_Y": 80.0, "LR_X": -170.0, "LR_Y": 70.0, "NAME": "80N_180W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -180.0, 70.0 ], [ -180.0, 80.0 ], [ -170.0, 80.0 ], [ -170.0, 70.0 ], [ -180.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": -50.0, "LR_X": -160.0, "LR_Y": -60.0, "NAME": "50S_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, -60.0 ], [ -170.0, -50.0 ], [ -160.0, -50.0 ], [ -160.0, -60.0 ], [ -170.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": -40.0, "LR_X": -160.0, "LR_Y": -50.0, "NAME": "40S_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, -50.0 ], [ -170.0, -40.0 ], [ -160.0, -40.0 ], [ -160.0, -50.0 ], [ -170.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": -30.0, "LR_X": -160.0, "LR_Y": -40.0, "NAME": "30S_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, -40.0 ], [ -170.0, -30.0 ], [ -160.0, -30.0 ], [ -160.0, -40.0 ], [ -170.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": -20.0, "LR_X": -160.0, "LR_Y": -30.0, "NAME": "20S_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, -30.0 ], [ -170.0, -20.0 ], [ -160.0, -20.0 ], [ -160.0, -30.0 ], [ -170.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": -10.0, "LR_X": -160.0, "LR_Y": -20.0, "NAME": "10S_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, -20.0 ], [ -170.0, -10.0 ], [ -160.0, -10.0 ], [ -160.0, -20.0 ], [ -170.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 0.0, "LR_X": -160.0, "LR_Y": -10.0, "NAME": "00N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, -10.0 ], [ -170.0, 0.0 ], [ -160.0, 0.0 ], [ -160.0, -10.0 ], [ -170.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 10.0, "LR_X": -160.0, "LR_Y": 0.0, "NAME": "10N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 0.0 ], [ -170.0, 10.0 ], [ -160.0, 10.0 ], [ -160.0, 0.0 ], [ -170.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 20.0, "LR_X": -160.0, "LR_Y": 10.0, "NAME": "20N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 10.0 ], [ -170.0, 20.0 ], [ -160.0, 20.0 ], [ -160.0, 10.0 ], [ -170.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 30.0, "LR_X": -160.0, "LR_Y": 20.0, "NAME": "30N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 20.0 ], [ -170.0, 30.0 ], [ -160.0, 30.0 ], [ -160.0, 20.0 ], [ -170.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 40.0, "LR_X": -160.0, "LR_Y": 30.0, "NAME": "40N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 30.0 ], [ -170.0, 40.0 ], [ -160.0, 40.0 ], [ -160.0, 30.0 ], [ -170.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 50.0, "LR_X": -160.0, "LR_Y": 40.0, "NAME": "50N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 40.0 ], [ -170.0, 50.0 ], [ -160.0, 50.0 ], [ -160.0, 40.0 ], [ -170.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 60.0, "LR_X": -160.0, "LR_Y": 50.0, "NAME": "60N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 50.0 ], [ -170.0, 60.0 ], [ -160.0, 60.0 ], [ -160.0, 50.0 ], [ -170.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 70.0, "LR_X": -160.0, "LR_Y": 60.0, "NAME": "70N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 60.0 ], [ -170.0, 70.0 ], [ -160.0, 70.0 ], [ -160.0, 60.0 ], [ -170.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -170.0, "UL_Y": 80.0, "LR_X": -160.0, "LR_Y": 70.0, "NAME": "80N_170W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -170.0, 70.0 ], [ -170.0, 80.0 ], [ -160.0, 80.0 ], [ -160.0, 70.0 ], [ -170.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": -50.0, "LR_X": -150.0, "LR_Y": -60.0, "NAME": "50S_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, -60.0 ], [ -160.0, -50.0 ], [ -150.0, -50.0 ], [ -150.0, -60.0 ], [ -160.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": -40.0, "LR_X": -150.0, "LR_Y": -50.0, "NAME": "40S_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, -50.0 ], [ -160.0, -40.0 ], [ -150.0, -40.0 ], [ -150.0, -50.0 ], [ -160.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": -30.0, "LR_X": -150.0, "LR_Y": -40.0, "NAME": "30S_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, -40.0 ], [ -160.0, -30.0 ], [ -150.0, -30.0 ], [ -150.0, -40.0 ], [ -160.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": -20.0, "LR_X": -150.0, "LR_Y": -30.0, "NAME": "20S_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, -30.0 ], [ -160.0, -20.0 ], [ -150.0, -20.0 ], [ -150.0, -30.0 ], [ -160.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": -10.0, "LR_X": -150.0, "LR_Y": -20.0, "NAME": "10S_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, -20.0 ], [ -160.0, -10.0 ], [ -150.0, -10.0 ], [ -150.0, -20.0 ], [ -160.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 0.0, "LR_X": -150.0, "LR_Y": -10.0, "NAME": "00N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, -10.0 ], [ -160.0, 0.0 ], [ -150.0, 0.0 ], [ -150.0, -10.0 ], [ -160.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 10.0, "LR_X": -150.0, "LR_Y": 0.0, "NAME": "10N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 0.0 ], [ -160.0, 10.0 ], [ -150.0, 10.0 ], [ -150.0, 0.0 ], [ -160.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 20.0, "LR_X": -150.0, "LR_Y": 10.0, "NAME": "20N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 10.0 ], [ -160.0, 20.0 ], [ -150.0, 20.0 ], [ -150.0, 10.0 ], [ -160.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 30.0, "LR_X": -150.0, "LR_Y": 20.0, "NAME": "30N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 20.0 ], [ -160.0, 30.0 ], [ -150.0, 30.0 ], [ -150.0, 20.0 ], [ -160.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 40.0, "LR_X": -150.0, "LR_Y": 30.0, "NAME": "40N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 30.0 ], [ -160.0, 40.0 ], [ -150.0, 40.0 ], [ -150.0, 30.0 ], [ -160.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 50.0, "LR_X": -150.0, "LR_Y": 40.0, "NAME": "50N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 40.0 ], [ -160.0, 50.0 ], [ -150.0, 50.0 ], [ -150.0, 40.0 ], [ -160.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 60.0, "LR_X": -150.0, "LR_Y": 50.0, "NAME": "60N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 50.0 ], [ -160.0, 60.0 ], [ -150.0, 60.0 ], [ -150.0, 50.0 ], [ -160.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 70.0, "LR_X": -150.0, "LR_Y": 60.0, "NAME": "70N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 60.0 ], [ -160.0, 70.0 ], [ -150.0, 70.0 ], [ -150.0, 60.0 ], [ -160.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -160.0, "UL_Y": 80.0, "LR_X": -150.0, "LR_Y": 70.0, "NAME": "80N_160W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -160.0, 70.0 ], [ -160.0, 80.0 ], [ -150.0, 80.0 ], [ -150.0, 70.0 ], [ -160.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": -50.0, "LR_X": -140.0, "LR_Y": -60.0, "NAME": "50S_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, -60.0 ], [ -150.0, -50.0 ], [ -140.0, -50.0 ], [ -140.0, -60.0 ], [ -150.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": -40.0, "LR_X": -140.0, "LR_Y": -50.0, "NAME": "40S_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, -50.0 ], [ -150.0, -40.0 ], [ -140.0, -40.0 ], [ -140.0, -50.0 ], [ -150.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": -30.0, "LR_X": -140.0, "LR_Y": -40.0, "NAME": "30S_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, -40.0 ], [ -150.0, -30.0 ], [ -140.0, -30.0 ], [ -140.0, -40.0 ], [ -150.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": -20.0, "LR_X": -140.0, "LR_Y": -30.0, "NAME": "20S_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, -30.0 ], [ -150.0, -20.0 ], [ -140.0, -20.0 ], [ -140.0, -30.0 ], [ -150.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": -10.0, "LR_X": -140.0, "LR_Y": -20.0, "NAME": "10S_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, -20.0 ], [ -150.0, -10.0 ], [ -140.0, -10.0 ], [ -140.0, -20.0 ], [ -150.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 0.0, "LR_X": -140.0, "LR_Y": -10.0, "NAME": "00N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, -10.0 ], [ -150.0, 0.0 ], [ -140.0, 0.0 ], [ -140.0, -10.0 ], [ -150.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 10.0, "LR_X": -140.0, "LR_Y": 0.0, "NAME": "10N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 0.0 ], [ -150.0, 10.0 ], [ -140.0, 10.0 ], [ -140.0, 0.0 ], [ -150.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 20.0, "LR_X": -140.0, "LR_Y": 10.0, "NAME": "20N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 10.0 ], [ -150.0, 20.0 ], [ -140.0, 20.0 ], [ -140.0, 10.0 ], [ -150.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 30.0, "LR_X": -140.0, "LR_Y": 20.0, "NAME": "30N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 20.0 ], [ -150.0, 30.0 ], [ -140.0, 30.0 ], [ -140.0, 20.0 ], [ -150.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 40.0, "LR_X": -140.0, "LR_Y": 30.0, "NAME": "40N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 30.0 ], [ -150.0, 40.0 ], [ -140.0, 40.0 ], [ -140.0, 30.0 ], [ -150.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 50.0, "LR_X": -140.0, "LR_Y": 40.0, "NAME": "50N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 40.0 ], [ -150.0, 50.0 ], [ -140.0, 50.0 ], [ -140.0, 40.0 ], [ -150.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 60.0, "LR_X": -140.0, "LR_Y": 50.0, "NAME": "60N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 50.0 ], [ -150.0, 60.0 ], [ -140.0, 60.0 ], [ -140.0, 50.0 ], [ -150.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 70.0, "LR_X": -140.0, "LR_Y": 60.0, "NAME": "70N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 60.0 ], [ -150.0, 70.0 ], [ -140.0, 70.0 ], [ -140.0, 60.0 ], [ -150.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -150.0, "UL_Y": 80.0, "LR_X": -140.0, "LR_Y": 70.0, "NAME": "80N_150W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -150.0, 70.0 ], [ -150.0, 80.0 ], [ -140.0, 80.0 ], [ -140.0, 70.0 ], [ -150.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": -50.0, "LR_X": -130.0, "LR_Y": -60.0, "NAME": "50S_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, -60.0 ], [ -140.0, -50.0 ], [ -130.0, -50.0 ], [ -130.0, -60.0 ], [ -140.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": -40.0, "LR_X": -130.0, "LR_Y": -50.0, "NAME": "40S_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, -50.0 ], [ -140.0, -40.0 ], [ -130.0, -40.0 ], [ -130.0, -50.0 ], [ -140.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": -30.0, "LR_X": -130.0, "LR_Y": -40.0, "NAME": "30S_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, -40.0 ], [ -140.0, -30.0 ], [ -130.0, -30.0 ], [ -130.0, -40.0 ], [ -140.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": -20.0, "LR_X": -130.0, "LR_Y": -30.0, "NAME": "20S_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, -30.0 ], [ -140.0, -20.0 ], [ -130.0, -20.0 ], [ -130.0, -30.0 ], [ -140.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": -10.0, "LR_X": -130.0, "LR_Y": -20.0, "NAME": "10S_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, -20.0 ], [ -140.0, -10.0 ], [ -130.0, -10.0 ], [ -130.0, -20.0 ], [ -140.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 0.0, "LR_X": -130.0, "LR_Y": -10.0, "NAME": "00N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, -10.0 ], [ -140.0, 0.0 ], [ -130.0, 0.0 ], [ -130.0, -10.0 ], [ -140.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 10.0, "LR_X": -130.0, "LR_Y": 0.0, "NAME": "10N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 0.0 ], [ -140.0, 10.0 ], [ -130.0, 10.0 ], [ -130.0, 0.0 ], [ -140.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 20.0, "LR_X": -130.0, "LR_Y": 10.0, "NAME": "20N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 10.0 ], [ -140.0, 20.0 ], [ -130.0, 20.0 ], [ -130.0, 10.0 ], [ -140.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 30.0, "LR_X": -130.0, "LR_Y": 20.0, "NAME": "30N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 20.0 ], [ -140.0, 30.0 ], [ -130.0, 30.0 ], [ -130.0, 20.0 ], [ -140.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 40.0, "LR_X": -130.0, "LR_Y": 30.0, "NAME": "40N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 30.0 ], [ -140.0, 40.0 ], [ -130.0, 40.0 ], [ -130.0, 30.0 ], [ -140.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 50.0, "LR_X": -130.0, "LR_Y": 40.0, "NAME": "50N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 40.0 ], [ -140.0, 50.0 ], [ -130.0, 50.0 ], [ -130.0, 40.0 ], [ -140.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 60.0, "LR_X": -130.0, "LR_Y": 50.0, "NAME": "60N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 50.0 ], [ -140.0, 60.0 ], [ -130.0, 60.0 ], [ -130.0, 50.0 ], [ -140.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 70.0, "LR_X": -130.0, "LR_Y": 60.0, "NAME": "70N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 60.0 ], [ -140.0, 70.0 ], [ -130.0, 70.0 ], [ -130.0, 60.0 ], [ -140.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -140.0, "UL_Y": 80.0, "LR_X": -130.0, "LR_Y": 70.0, "NAME": "80N_140W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -140.0, 70.0 ], [ -140.0, 80.0 ], [ -130.0, 80.0 ], [ -130.0, 70.0 ], [ -140.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": -50.0, "LR_X": -120.0, "LR_Y": -60.0, "NAME": "50S_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, -60.0 ], [ -130.0, -50.0 ], [ -120.0, -50.0 ], [ -120.0, -60.0 ], [ -130.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": -40.0, "LR_X": -120.0, "LR_Y": -50.0, "NAME": "40S_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, -50.0 ], [ -130.0, -40.0 ], [ -120.0, -40.0 ], [ -120.0, -50.0 ], [ -130.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": -30.0, "LR_X": -120.0, "LR_Y": -40.0, "NAME": "30S_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, -40.0 ], [ -130.0, -30.0 ], [ -120.0, -30.0 ], [ -120.0, -40.0 ], [ -130.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": -20.0, "LR_X": -120.0, "LR_Y": -30.0, "NAME": "20S_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, -30.0 ], [ -130.0, -20.0 ], [ -120.0, -20.0 ], [ -120.0, -30.0 ], [ -130.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": -10.0, "LR_X": -120.0, "LR_Y": -20.0, "NAME": "10S_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, -20.0 ], [ -130.0, -10.0 ], [ -120.0, -10.0 ], [ -120.0, -20.0 ], [ -130.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 0.0, "LR_X": -120.0, "LR_Y": -10.0, "NAME": "00N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, -10.0 ], [ -130.0, 0.0 ], [ -120.0, 0.0 ], [ -120.0, -10.0 ], [ -130.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 10.0, "LR_X": -120.0, "LR_Y": 0.0, "NAME": "10N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 0.0 ], [ -130.0, 10.0 ], [ -120.0, 10.0 ], [ -120.0, 0.0 ], [ -130.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 20.0, "LR_X": -120.0, "LR_Y": 10.0, "NAME": "20N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 10.0 ], [ -130.0, 20.0 ], [ -120.0, 20.0 ], [ -120.0, 10.0 ], [ -130.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 30.0, "LR_X": -120.0, "LR_Y": 20.0, "NAME": "30N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 20.0 ], [ -130.0, 30.0 ], [ -120.0, 30.0 ], [ -120.0, 20.0 ], [ -130.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 40.0, "LR_X": -120.0, "LR_Y": 30.0, "NAME": "40N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 30.0 ], [ -130.0, 40.0 ], [ -120.0, 40.0 ], [ -120.0, 30.0 ], [ -130.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 50.0, "LR_X": -120.0, "LR_Y": 40.0, "NAME": "50N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 40.0 ], [ -130.0, 50.0 ], [ -120.0, 50.0 ], [ -120.0, 40.0 ], [ -130.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 60.0, "LR_X": -120.0, "LR_Y": 50.0, "NAME": "60N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 50.0 ], [ -130.0, 60.0 ], [ -120.0, 60.0 ], [ -120.0, 50.0 ], [ -130.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 70.0, "LR_X": -120.0, "LR_Y": 60.0, "NAME": "70N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 60.0 ], [ -130.0, 70.0 ], [ -120.0, 70.0 ], [ -120.0, 60.0 ], [ -130.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -130.0, "UL_Y": 80.0, "LR_X": -120.0, "LR_Y": 70.0, "NAME": "80N_130W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -130.0, 70.0 ], [ -130.0, 80.0 ], [ -120.0, 80.0 ], [ -120.0, 70.0 ], [ -130.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": -50.0, "LR_X": -110.0, "LR_Y": -60.0, "NAME": "50S_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, -60.0 ], [ -120.0, -50.0 ], [ -110.0, -50.0 ], [ -110.0, -60.0 ], [ -120.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": -40.0, "LR_X": -110.0, "LR_Y": -50.0, "NAME": "40S_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, -50.0 ], [ -120.0, -40.0 ], [ -110.0, -40.0 ], [ -110.0, -50.0 ], [ -120.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": -30.0, "LR_X": -110.0, "LR_Y": -40.0, "NAME": "30S_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, -40.0 ], [ -120.0, -30.0 ], [ -110.0, -30.0 ], [ -110.0, -40.0 ], [ -120.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": -20.0, "LR_X": -110.0, "LR_Y": -30.0, "NAME": "20S_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, -30.0 ], [ -120.0, -20.0 ], [ -110.0, -20.0 ], [ -110.0, -30.0 ], [ -120.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": -10.0, "LR_X": -110.0, "LR_Y": -20.0, "NAME": "10S_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, -20.0 ], [ -120.0, -10.0 ], [ -110.0, -10.0 ], [ -110.0, -20.0 ], [ -120.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 0.0, "LR_X": -110.0, "LR_Y": -10.0, "NAME": "00N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, -10.0 ], [ -120.0, 0.0 ], [ -110.0, 0.0 ], [ -110.0, -10.0 ], [ -120.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 10.0, "LR_X": -110.0, "LR_Y": 0.0, "NAME": "10N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 0.0 ], [ -120.0, 10.0 ], [ -110.0, 10.0 ], [ -110.0, 0.0 ], [ -120.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 20.0, "LR_X": -110.0, "LR_Y": 10.0, "NAME": "20N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 10.0 ], [ -120.0, 20.0 ], [ -110.0, 20.0 ], [ -110.0, 10.0 ], [ -120.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 30.0, "LR_X": -110.0, "LR_Y": 20.0, "NAME": "30N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 20.0 ], [ -120.0, 30.0 ], [ -110.0, 30.0 ], [ -110.0, 20.0 ], [ -120.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 40.0, "LR_X": -110.0, "LR_Y": 30.0, "NAME": "40N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 30.0 ], [ -120.0, 40.0 ], [ -110.0, 40.0 ], [ -110.0, 30.0 ], [ -120.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 50.0, "LR_X": -110.0, "LR_Y": 40.0, "NAME": "50N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 40.0 ], [ -120.0, 50.0 ], [ -110.0, 50.0 ], [ -110.0, 40.0 ], [ -120.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 60.0, "LR_X": -110.0, "LR_Y": 50.0, "NAME": "60N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 50.0 ], [ -120.0, 60.0 ], [ -110.0, 60.0 ], [ -110.0, 50.0 ], [ -120.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 70.0, "LR_X": -110.0, "LR_Y": 60.0, "NAME": "70N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 60.0 ], [ -120.0, 70.0 ], [ -110.0, 70.0 ], [ -110.0, 60.0 ], [ -120.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -120.0, "UL_Y": 80.0, "LR_X": -110.0, "LR_Y": 70.0, "NAME": "80N_120W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -120.0, 70.0 ], [ -120.0, 80.0 ], [ -110.0, 80.0 ], [ -110.0, 70.0 ], [ -120.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": -50.0, "LR_X": -100.0, "LR_Y": -60.0, "NAME": "50S_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, -60.0 ], [ -110.0, -50.0 ], [ -100.0, -50.0 ], [ -100.0, -60.0 ], [ -110.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": -40.0, "LR_X": -100.0, "LR_Y": -50.0, "NAME": "40S_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, -50.0 ], [ -110.0, -40.0 ], [ -100.0, -40.0 ], [ -100.0, -50.0 ], [ -110.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": -30.0, "LR_X": -100.0, "LR_Y": -40.0, "NAME": "30S_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, -40.0 ], [ -110.0, -30.0 ], [ -100.0, -30.0 ], [ -100.0, -40.0 ], [ -110.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": -20.0, "LR_X": -100.0, "LR_Y": -30.0, "NAME": "20S_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, -30.0 ], [ -110.0, -20.0 ], [ -100.0, -20.0 ], [ -100.0, -30.0 ], [ -110.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": -10.0, "LR_X": -100.0, "LR_Y": -20.0, "NAME": "10S_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, -20.0 ], [ -110.0, -10.0 ], [ -100.0, -10.0 ], [ -100.0, -20.0 ], [ -110.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 0.0, "LR_X": -100.0, "LR_Y": -10.0, "NAME": "00N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, -10.0 ], [ -110.0, 0.0 ], [ -100.0, 0.0 ], [ -100.0, -10.0 ], [ -110.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 10.0, "LR_X": -100.0, "LR_Y": 0.0, "NAME": "10N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 0.0 ], [ -110.0, 10.0 ], [ -100.0, 10.0 ], [ -100.0, 0.0 ], [ -110.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 20.0, "LR_X": -100.0, "LR_Y": 10.0, "NAME": "20N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 10.0 ], [ -110.0, 20.0 ], [ -100.0, 20.0 ], [ -100.0, 10.0 ], [ -110.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 30.0, "LR_X": -100.0, "LR_Y": 20.0, "NAME": "30N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 20.0 ], [ -110.0, 30.0 ], [ -100.0, 30.0 ], [ -100.0, 20.0 ], [ -110.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 40.0, "LR_X": -100.0, "LR_Y": 30.0, "NAME": "40N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 30.0 ], [ -110.0, 40.0 ], [ -100.0, 40.0 ], [ -100.0, 30.0 ], [ -110.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 50.0, "LR_X": -100.0, "LR_Y": 40.0, "NAME": "50N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 40.0 ], [ -110.0, 50.0 ], [ -100.0, 50.0 ], [ -100.0, 40.0 ], [ -110.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 60.0, "LR_X": -100.0, "LR_Y": 50.0, "NAME": "60N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 50.0 ], [ -110.0, 60.0 ], [ -100.0, 60.0 ], [ -100.0, 50.0 ], [ -110.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 70.0, "LR_X": -100.0, "LR_Y": 60.0, "NAME": "70N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 60.0 ], [ -110.0, 70.0 ], [ -100.0, 70.0 ], [ -100.0, 60.0 ], [ -110.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -110.0, "UL_Y": 80.0, "LR_X": -100.0, "LR_Y": 70.0, "NAME": "80N_110W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -110.0, 70.0 ], [ -110.0, 80.0 ], [ -100.0, 80.0 ], [ -100.0, 70.0 ], [ -110.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": -50.0, "LR_X": -90.0, "LR_Y": -60.0, "NAME": "50S_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, -60.0 ], [ -100.0, -50.0 ], [ -90.0, -50.0 ], [ -90.0, -60.0 ], [ -100.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": -40.0, "LR_X": -90.0, "LR_Y": -50.0, "NAME": "40S_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, -50.0 ], [ -100.0, -40.0 ], [ -90.0, -40.0 ], [ -90.0, -50.0 ], [ -100.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": -30.0, "LR_X": -90.0, "LR_Y": -40.0, "NAME": "30S_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, -40.0 ], [ -100.0, -30.0 ], [ -90.0, -30.0 ], [ -90.0, -40.0 ], [ -100.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": -20.0, "LR_X": -90.0, "LR_Y": -30.0, "NAME": "20S_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, -30.0 ], [ -100.0, -20.0 ], [ -90.0, -20.0 ], [ -90.0, -30.0 ], [ -100.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": -10.0, "LR_X": -90.0, "LR_Y": -20.0, "NAME": "10S_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, -20.0 ], [ -100.0, -10.0 ], [ -90.0, -10.0 ], [ -90.0, -20.0 ], [ -100.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 0.0, "LR_X": -90.0, "LR_Y": -10.0, "NAME": "00N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, -10.0 ], [ -100.0, 0.0 ], [ -90.0, 0.0 ], [ -90.0, -10.0 ], [ -100.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 10.0, "LR_X": -90.0, "LR_Y": 0.0, "NAME": "10N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 0.0 ], [ -100.0, 10.0 ], [ -90.0, 10.0 ], [ -90.0, 0.0 ], [ -100.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 20.0, "LR_X": -90.0, "LR_Y": 10.0, "NAME": "20N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 10.0 ], [ -100.0, 20.0 ], [ -90.0, 20.0 ], [ -90.0, 10.0 ], [ -100.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 30.0, "LR_X": -90.0, "LR_Y": 20.0, "NAME": "30N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 20.0 ], [ -100.0, 30.0 ], [ -90.0, 30.0 ], [ -90.0, 20.0 ], [ -100.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 40.0, "LR_X": -90.0, "LR_Y": 30.0, "NAME": "40N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 30.0 ], [ -100.0, 40.0 ], [ -90.0, 40.0 ], [ -90.0, 30.0 ], [ -100.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 50.0, "LR_X": -90.0, "LR_Y": 40.0, "NAME": "50N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 40.0 ], [ -100.0, 50.0 ], [ -90.0, 50.0 ], [ -90.0, 40.0 ], [ -100.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 60.0, "LR_X": -90.0, "LR_Y": 50.0, "NAME": "60N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 50.0 ], [ -100.0, 60.0 ], [ -90.0, 60.0 ], [ -90.0, 50.0 ], [ -100.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 70.0, "LR_X": -90.0, "LR_Y": 60.0, "NAME": "70N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 60.0 ], [ -100.0, 70.0 ], [ -90.0, 70.0 ], [ -90.0, 60.0 ], [ -100.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -100.0, "UL_Y": 80.0, "LR_X": -90.0, "LR_Y": 70.0, "NAME": "80N_100W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -100.0, 70.0 ], [ -100.0, 80.0 ], [ -90.0, 80.0 ], [ -90.0, 70.0 ], [ -100.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": -50.0, "LR_X": -80.0, "LR_Y": -60.0, "NAME": "50S_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, -60.0 ], [ -90.0, -50.0 ], [ -80.0, -50.0 ], [ -80.0, -60.0 ], [ -90.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": -40.0, "LR_X": -80.0, "LR_Y": -50.0, "NAME": "40S_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, -50.0 ], [ -90.0, -40.0 ], [ -80.0, -40.0 ], [ -80.0, -50.0 ], [ -90.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": -30.0, "LR_X": -80.0, "LR_Y": -40.0, "NAME": "30S_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, -40.0 ], [ -90.0, -30.0 ], [ -80.0, -30.0 ], [ -80.0, -40.0 ], [ -90.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": -20.0, "LR_X": -80.0, "LR_Y": -30.0, "NAME": "20S_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, -30.0 ], [ -90.0, -20.0 ], [ -80.0, -20.0 ], [ -80.0, -30.0 ], [ -90.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": -10.0, "LR_X": -80.0, "LR_Y": -20.0, "NAME": "10S_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, -20.0 ], [ -90.0, -10.0 ], [ -80.0, -10.0 ], [ -80.0, -20.0 ], [ -90.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 0.0, "LR_X": -80.0, "LR_Y": -10.0, "NAME": "00N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, -10.0 ], [ -90.0, 0.0 ], [ -80.0, 0.0 ], [ -80.0, -10.0 ], [ -90.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 10.0, "LR_X": -80.0, "LR_Y": 0.0, "NAME": "10N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 0.0 ], [ -90.0, 10.0 ], [ -80.0, 10.0 ], [ -80.0, 0.0 ], [ -90.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 20.0, "LR_X": -80.0, "LR_Y": 10.0, "NAME": "20N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 10.0 ], [ -90.0, 20.0 ], [ -80.0, 20.0 ], [ -80.0, 10.0 ], [ -90.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 30.0, "LR_X": -80.0, "LR_Y": 20.0, "NAME": "30N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 20.0 ], [ -90.0, 30.0 ], [ -80.0, 30.0 ], [ -80.0, 20.0 ], [ -90.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 40.0, "LR_X": -80.0, "LR_Y": 30.0, "NAME": "40N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 30.0 ], [ -90.0, 40.0 ], [ -80.0, 40.0 ], [ -80.0, 30.0 ], [ -90.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 50.0, "LR_X": -80.0, "LR_Y": 40.0, "NAME": "50N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 40.0 ], [ -90.0, 50.0 ], [ -80.0, 50.0 ], [ -80.0, 40.0 ], [ -90.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 60.0, "LR_X": -80.0, "LR_Y": 50.0, "NAME": "60N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 50.0 ], [ -90.0, 60.0 ], [ -80.0, 60.0 ], [ -80.0, 50.0 ], [ -90.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 70.0, "LR_X": -80.0, "LR_Y": 60.0, "NAME": "70N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 60.0 ], [ -90.0, 70.0 ], [ -80.0, 70.0 ], [ -80.0, 60.0 ], [ -90.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -90.0, "UL_Y": 80.0, "LR_X": -80.0, "LR_Y": 70.0, "NAME": "80N_090W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -90.0, 70.0 ], [ -90.0, 80.0 ], [ -80.0, 80.0 ], [ -80.0, 70.0 ], [ -90.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": -50.0, "LR_X": -70.0, "LR_Y": -60.0, "NAME": "50S_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, -60.0 ], [ -80.0, -50.0 ], [ -70.0, -50.0 ], [ -70.0, -60.0 ], [ -80.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": -40.0, "LR_X": -70.0, "LR_Y": -50.0, "NAME": "40S_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, -50.0 ], [ -80.0, -40.0 ], [ -70.0, -40.0 ], [ -70.0, -50.0 ], [ -80.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": -30.0, "LR_X": -70.0, "LR_Y": -40.0, "NAME": "30S_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, -40.0 ], [ -80.0, -30.0 ], [ -70.0, -30.0 ], [ -70.0, -40.0 ], [ -80.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": -20.0, "LR_X": -70.0, "LR_Y": -30.0, "NAME": "20S_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, -30.0 ], [ -80.0, -20.0 ], [ -70.0, -20.0 ], [ -70.0, -30.0 ], [ -80.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": -10.0, "LR_X": -70.0, "LR_Y": -20.0, "NAME": "10S_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, -20.0 ], [ -80.0, -10.0 ], [ -70.0, -10.0 ], [ -70.0, -20.0 ], [ -80.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 0.0, "LR_X": -70.0, "LR_Y": -10.0, "NAME": "00N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, -10.0 ], [ -80.0, 0.0 ], [ -70.0, 0.0 ], [ -70.0, -10.0 ], [ -80.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 10.0, "LR_X": -70.0, "LR_Y": 0.0, "NAME": "10N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 0.0 ], [ -80.0, 10.0 ], [ -70.0, 10.0 ], [ -70.0, 0.0 ], [ -80.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 20.0, "LR_X": -70.0, "LR_Y": 10.0, "NAME": "20N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 10.0 ], [ -80.0, 20.0 ], [ -70.0, 20.0 ], [ -70.0, 10.0 ], [ -80.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 30.0, "LR_X": -70.0, "LR_Y": 20.0, "NAME": "30N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 20.0 ], [ -80.0, 30.0 ], [ -70.0, 30.0 ], [ -70.0, 20.0 ], [ -80.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 40.0, "LR_X": -70.0, "LR_Y": 30.0, "NAME": "40N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 30.0 ], [ -80.0, 40.0 ], [ -70.0, 40.0 ], [ -70.0, 30.0 ], [ -80.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 50.0, "LR_X": -70.0, "LR_Y": 40.0, "NAME": "50N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 40.0 ], [ -80.0, 50.0 ], [ -70.0, 50.0 ], [ -70.0, 40.0 ], [ -80.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 60.0, "LR_X": -70.0, "LR_Y": 50.0, "NAME": "60N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 50.0 ], [ -80.0, 60.0 ], [ -70.0, 60.0 ], [ -70.0, 50.0 ], [ -80.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 70.0, "LR_X": -70.0, "LR_Y": 60.0, "NAME": "70N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 60.0 ], [ -80.0, 70.0 ], [ -70.0, 70.0 ], [ -70.0, 60.0 ], [ -80.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -80.0, "UL_Y": 80.0, "LR_X": -70.0, "LR_Y": 70.0, "NAME": "80N_080W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -80.0, 70.0 ], [ -80.0, 80.0 ], [ -70.0, 80.0 ], [ -70.0, 70.0 ], [ -80.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": -50.0, "LR_X": -60.0, "LR_Y": -60.0, "NAME": "50S_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, -60.0 ], [ -70.0, -50.0 ], [ -60.0, -50.0 ], [ -60.0, -60.0 ], [ -70.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": -40.0, "LR_X": -60.0, "LR_Y": -50.0, "NAME": "40S_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, -50.0 ], [ -70.0, -40.0 ], [ -60.0, -40.0 ], [ -60.0, -50.0 ], [ -70.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": -30.0, "LR_X": -60.0, "LR_Y": -40.0, "NAME": "30S_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, -40.0 ], [ -70.0, -30.0 ], [ -60.0, -30.0 ], [ -60.0, -40.0 ], [ -70.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": -20.0, "LR_X": -60.0, "LR_Y": -30.0, "NAME": "20S_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, -30.0 ], [ -70.0, -20.0 ], [ -60.0, -20.0 ], [ -60.0, -30.0 ], [ -70.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": -10.0, "LR_X": -60.0, "LR_Y": -20.0, "NAME": "10S_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, -20.0 ], [ -70.0, -10.0 ], [ -60.0, -10.0 ], [ -60.0, -20.0 ], [ -70.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 0.0, "LR_X": -60.0, "LR_Y": -10.0, "NAME": "00N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, -10.0 ], [ -70.0, 0.0 ], [ -60.0, 0.0 ], [ -60.0, -10.0 ], [ -70.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 10.0, "LR_X": -60.0, "LR_Y": 0.0, "NAME": "10N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 0.0 ], [ -70.0, 10.0 ], [ -60.0, 10.0 ], [ -60.0, 0.0 ], [ -70.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 20.0, "LR_X": -60.0, "LR_Y": 10.0, "NAME": "20N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 10.0 ], [ -70.0, 20.0 ], [ -60.0, 20.0 ], [ -60.0, 10.0 ], [ -70.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 30.0, "LR_X": -60.0, "LR_Y": 20.0, "NAME": "30N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 20.0 ], [ -70.0, 30.0 ], [ -60.0, 30.0 ], [ -60.0, 20.0 ], [ -70.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 40.0, "LR_X": -60.0, "LR_Y": 30.0, "NAME": "40N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 30.0 ], [ -70.0, 40.0 ], [ -60.0, 40.0 ], [ -60.0, 30.0 ], [ -70.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 50.0, "LR_X": -60.0, "LR_Y": 40.0, "NAME": "50N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 40.0 ], [ -70.0, 50.0 ], [ -60.0, 50.0 ], [ -60.0, 40.0 ], [ -70.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 60.0, "LR_X": -60.0, "LR_Y": 50.0, "NAME": "60N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 50.0 ], [ -70.0, 60.0 ], [ -60.0, 60.0 ], [ -60.0, 50.0 ], [ -70.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 70.0, "LR_X": -60.0, "LR_Y": 60.0, "NAME": "70N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 60.0 ], [ -70.0, 70.0 ], [ -60.0, 70.0 ], [ -60.0, 60.0 ], [ -70.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -70.0, "UL_Y": 80.0, "LR_X": -60.0, "LR_Y": 70.0, "NAME": "80N_070W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -70.0, 70.0 ], [ -70.0, 80.0 ], [ -60.0, 80.0 ], [ -60.0, 70.0 ], [ -70.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": -50.0, "LR_X": -50.0, "LR_Y": -60.0, "NAME": "50S_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, -60.0 ], [ -60.0, -50.0 ], [ -50.0, -50.0 ], [ -50.0, -60.0 ], [ -60.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": -40.0, "LR_X": -50.0, "LR_Y": -50.0, "NAME": "40S_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, -50.0 ], [ -60.0, -40.0 ], [ -50.0, -40.0 ], [ -50.0, -50.0 ], [ -60.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": -30.0, "LR_X": -50.0, "LR_Y": -40.0, "NAME": "30S_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, -40.0 ], [ -60.0, -30.0 ], [ -50.0, -30.0 ], [ -50.0, -40.0 ], [ -60.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": -20.0, "LR_X": -50.0, "LR_Y": -30.0, "NAME": "20S_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, -30.0 ], [ -60.0, -20.0 ], [ -50.0, -20.0 ], [ -50.0, -30.0 ], [ -60.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": -10.0, "LR_X": -50.0, "LR_Y": -20.0, "NAME": "10S_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, -20.0 ], [ -60.0, -10.0 ], [ -50.0, -10.0 ], [ -50.0, -20.0 ], [ -60.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 0.0, "LR_X": -50.0, "LR_Y": -10.0, "NAME": "00N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, -10.0 ], [ -60.0, 0.0 ], [ -50.0, 0.0 ], [ -50.0, -10.0 ], [ -60.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 10.0, "LR_X": -50.0, "LR_Y": 0.0, "NAME": "10N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 0.0 ], [ -60.0, 10.0 ], [ -50.0, 10.0 ], [ -50.0, 0.0 ], [ -60.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 20.0, "LR_X": -50.0, "LR_Y": 10.0, "NAME": "20N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 10.0 ], [ -60.0, 20.0 ], [ -50.0, 20.0 ], [ -50.0, 10.0 ], [ -60.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 30.0, "LR_X": -50.0, "LR_Y": 20.0, "NAME": "30N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 20.0 ], [ -60.0, 30.0 ], [ -50.0, 30.0 ], [ -50.0, 20.0 ], [ -60.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 40.0, "LR_X": -50.0, "LR_Y": 30.0, "NAME": "40N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 30.0 ], [ -60.0, 40.0 ], [ -50.0, 40.0 ], [ -50.0, 30.0 ], [ -60.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 50.0, "LR_X": -50.0, "LR_Y": 40.0, "NAME": "50N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 40.0 ], [ -60.0, 50.0 ], [ -50.0, 50.0 ], [ -50.0, 40.0 ], [ -60.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 60.0, "LR_X": -50.0, "LR_Y": 50.0, "NAME": "60N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 50.0 ], [ -60.0, 60.0 ], [ -50.0, 60.0 ], [ -50.0, 50.0 ], [ -60.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 70.0, "LR_X": -50.0, "LR_Y": 60.0, "NAME": "70N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 60.0 ], [ -60.0, 70.0 ], [ -50.0, 70.0 ], [ -50.0, 60.0 ], [ -60.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -60.0, "UL_Y": 80.0, "LR_X": -50.0, "LR_Y": 70.0, "NAME": "80N_060W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -60.0, 70.0 ], [ -60.0, 80.0 ], [ -50.0, 80.0 ], [ -50.0, 70.0 ], [ -60.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": -50.0, "LR_X": -40.0, "LR_Y": -60.0, "NAME": "50S_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, -60.0 ], [ -50.0, -50.0 ], [ -40.0, -50.0 ], [ -40.0, -60.0 ], [ -50.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": -40.0, "LR_X": -40.0, "LR_Y": -50.0, "NAME": "40S_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, -50.0 ], [ -50.0, -40.0 ], [ -40.0, -40.0 ], [ -40.0, -50.0 ], [ -50.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": -30.0, "LR_X": -40.0, "LR_Y": -40.0, "NAME": "30S_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, -40.0 ], [ -50.0, -30.0 ], [ -40.0, -30.0 ], [ -40.0, -40.0 ], [ -50.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": -20.0, "LR_X": -40.0, "LR_Y": -30.0, "NAME": "20S_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, -30.0 ], [ -50.0, -20.0 ], [ -40.0, -20.0 ], [ -40.0, -30.0 ], [ -50.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": -10.0, "LR_X": -40.0, "LR_Y": -20.0, "NAME": "10S_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, -20.0 ], [ -50.0, -10.0 ], [ -40.0, -10.0 ], [ -40.0, -20.0 ], [ -50.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 0.0, "LR_X": -40.0, "LR_Y": -10.0, "NAME": "00N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, -10.0 ], [ -50.0, 0.0 ], [ -40.0, 0.0 ], [ -40.0, -10.0 ], [ -50.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 10.0, "LR_X": -40.0, "LR_Y": 0.0, "NAME": "10N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 0.0 ], [ -50.0, 10.0 ], [ -40.0, 10.0 ], [ -40.0, 0.0 ], [ -50.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 20.0, "LR_X": -40.0, "LR_Y": 10.0, "NAME": "20N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 10.0 ], [ -50.0, 20.0 ], [ -40.0, 20.0 ], [ -40.0, 10.0 ], [ -50.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 30.0, "LR_X": -40.0, "LR_Y": 20.0, "NAME": "30N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 20.0 ], [ -50.0, 30.0 ], [ -40.0, 30.0 ], [ -40.0, 20.0 ], [ -50.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 40.0, "LR_X": -40.0, "LR_Y": 30.0, "NAME": "40N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 30.0 ], [ -50.0, 40.0 ], [ -40.0, 40.0 ], [ -40.0, 30.0 ], [ -50.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 50.0, "LR_X": -40.0, "LR_Y": 40.0, "NAME": "50N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 40.0 ], [ -50.0, 50.0 ], [ -40.0, 50.0 ], [ -40.0, 40.0 ], [ -50.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 60.0, "LR_X": -40.0, "LR_Y": 50.0, "NAME": "60N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 50.0 ], [ -50.0, 60.0 ], [ -40.0, 60.0 ], [ -40.0, 50.0 ], [ -50.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 70.0, "LR_X": -40.0, "LR_Y": 60.0, "NAME": "70N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 60.0 ], [ -50.0, 70.0 ], [ -40.0, 70.0 ], [ -40.0, 60.0 ], [ -50.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -50.0, "UL_Y": 80.0, "LR_X": -40.0, "LR_Y": 70.0, "NAME": "80N_050W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -50.0, 70.0 ], [ -50.0, 80.0 ], [ -40.0, 80.0 ], [ -40.0, 70.0 ], [ -50.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": -50.0, "LR_X": -30.0, "LR_Y": -60.0, "NAME": "50S_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, -60.0 ], [ -40.0, -50.0 ], [ -30.0, -50.0 ], [ -30.0, -60.0 ], [ -40.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": -40.0, "LR_X": -30.0, "LR_Y": -50.0, "NAME": "40S_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, -50.0 ], [ -40.0, -40.0 ], [ -30.0, -40.0 ], [ -30.0, -50.0 ], [ -40.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": -30.0, "LR_X": -30.0, "LR_Y": -40.0, "NAME": "30S_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, -40.0 ], [ -40.0, -30.0 ], [ -30.0, -30.0 ], [ -30.0, -40.0 ], [ -40.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": -20.0, "LR_X": -30.0, "LR_Y": -30.0, "NAME": "20S_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, -30.0 ], [ -40.0, -20.0 ], [ -30.0, -20.0 ], [ -30.0, -30.0 ], [ -40.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": -10.0, "LR_X": -30.0, "LR_Y": -20.0, "NAME": "10S_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, -20.0 ], [ -40.0, -10.0 ], [ -30.0, -10.0 ], [ -30.0, -20.0 ], [ -40.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 0.0, "LR_X": -30.0, "LR_Y": -10.0, "NAME": "00N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, -10.0 ], [ -40.0, 0.0 ], [ -30.0, 0.0 ], [ -30.0, -10.0 ], [ -40.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 10.0, "LR_X": -30.0, "LR_Y": 0.0, "NAME": "10N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 0.0 ], [ -40.0, 10.0 ], [ -30.0, 10.0 ], [ -30.0, 0.0 ], [ -40.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 20.0, "LR_X": -30.0, "LR_Y": 10.0, "NAME": "20N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 10.0 ], [ -40.0, 20.0 ], [ -30.0, 20.0 ], [ -30.0, 10.0 ], [ -40.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 30.0, "LR_X": -30.0, "LR_Y": 20.0, "NAME": "30N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 20.0 ], [ -40.0, 30.0 ], [ -30.0, 30.0 ], [ -30.0, 20.0 ], [ -40.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 40.0, "LR_X": -30.0, "LR_Y": 30.0, "NAME": "40N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 30.0 ], [ -40.0, 40.0 ], [ -30.0, 40.0 ], [ -30.0, 30.0 ], [ -40.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 50.0, "LR_X": -30.0, "LR_Y": 40.0, "NAME": "50N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 40.0 ], [ -40.0, 50.0 ], [ -30.0, 50.0 ], [ -30.0, 40.0 ], [ -40.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 60.0, "LR_X": -30.0, "LR_Y": 50.0, "NAME": "60N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 50.0 ], [ -40.0, 60.0 ], [ -30.0, 60.0 ], [ -30.0, 50.0 ], [ -40.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 70.0, "LR_X": -30.0, "LR_Y": 60.0, "NAME": "70N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 60.0 ], [ -40.0, 70.0 ], [ -30.0, 70.0 ], [ -30.0, 60.0 ], [ -40.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -40.0, "UL_Y": 80.0, "LR_X": -30.0, "LR_Y": 70.0, "NAME": "80N_040W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -40.0, 70.0 ], [ -40.0, 80.0 ], [ -30.0, 80.0 ], [ -30.0, 70.0 ], [ -40.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": -50.0, "LR_X": -20.0, "LR_Y": -60.0, "NAME": "50S_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, -60.0 ], [ -30.0, -50.0 ], [ -20.0, -50.0 ], [ -20.0, -60.0 ], [ -30.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": -40.0, "LR_X": -20.0, "LR_Y": -50.0, "NAME": "40S_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, -50.0 ], [ -30.0, -40.0 ], [ -20.0, -40.0 ], [ -20.0, -50.0 ], [ -30.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": -30.0, "LR_X": -20.0, "LR_Y": -40.0, "NAME": "30S_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, -40.0 ], [ -30.0, -30.0 ], [ -20.0, -30.0 ], [ -20.0, -40.0 ], [ -30.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": -20.0, "LR_X": -20.0, "LR_Y": -30.0, "NAME": "20S_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, -30.0 ], [ -30.0, -20.0 ], [ -20.0, -20.0 ], [ -20.0, -30.0 ], [ -30.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": -10.0, "LR_X": -20.0, "LR_Y": -20.0, "NAME": "10S_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, -20.0 ], [ -30.0, -10.0 ], [ -20.0, -10.0 ], [ -20.0, -20.0 ], [ -30.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 0.0, "LR_X": -20.0, "LR_Y": -10.0, "NAME": "00N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, -10.0 ], [ -30.0, 0.0 ], [ -20.0, 0.0 ], [ -20.0, -10.0 ], [ -30.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 10.0, "LR_X": -20.0, "LR_Y": 0.0, "NAME": "10N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 0.0 ], [ -30.0, 10.0 ], [ -20.0, 10.0 ], [ -20.0, 0.0 ], [ -30.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 20.0, "LR_X": -20.0, "LR_Y": 10.0, "NAME": "20N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 10.0 ], [ -30.0, 20.0 ], [ -20.0, 20.0 ], [ -20.0, 10.0 ], [ -30.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 30.0, "LR_X": -20.0, "LR_Y": 20.0, "NAME": "30N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 20.0 ], [ -30.0, 30.0 ], [ -20.0, 30.0 ], [ -20.0, 20.0 ], [ -30.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 40.0, "LR_X": -20.0, "LR_Y": 30.0, "NAME": "40N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 30.0 ], [ -30.0, 40.0 ], [ -20.0, 40.0 ], [ -20.0, 30.0 ], [ -30.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 50.0, "LR_X": -20.0, "LR_Y": 40.0, "NAME": "50N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 40.0 ], [ -30.0, 50.0 ], [ -20.0, 50.0 ], [ -20.0, 40.0 ], [ -30.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 60.0, "LR_X": -20.0, "LR_Y": 50.0, "NAME": "60N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 50.0 ], [ -30.0, 60.0 ], [ -20.0, 60.0 ], [ -20.0, 50.0 ], [ -30.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 70.0, "LR_X": -20.0, "LR_Y": 60.0, "NAME": "70N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 60.0 ], [ -30.0, 70.0 ], [ -20.0, 70.0 ], [ -20.0, 60.0 ], [ -30.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -30.0, "UL_Y": 80.0, "LR_X": -20.0, "LR_Y": 70.0, "NAME": "80N_030W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -30.0, 70.0 ], [ -30.0, 80.0 ], [ -20.0, 80.0 ], [ -20.0, 70.0 ], [ -30.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": -50.0, "LR_X": -10.0, "LR_Y": -60.0, "NAME": "50S_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, -60.0 ], [ -20.0, -50.0 ], [ -10.0, -50.0 ], [ -10.0, -60.0 ], [ -20.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": -40.0, "LR_X": -10.0, "LR_Y": -50.0, "NAME": "40S_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, -50.0 ], [ -20.0, -40.0 ], [ -10.0, -40.0 ], [ -10.0, -50.0 ], [ -20.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": -30.0, "LR_X": -10.0, "LR_Y": -40.0, "NAME": "30S_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, -40.0 ], [ -20.0, -30.0 ], [ -10.0, -30.0 ], [ -10.0, -40.0 ], [ -20.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": -20.0, "LR_X": -10.0, "LR_Y": -30.0, "NAME": "20S_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, -30.0 ], [ -20.0, -20.0 ], [ -10.0, -20.0 ], [ -10.0, -30.0 ], [ -20.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": -10.0, "LR_X": -10.0, "LR_Y": -20.0, "NAME": "10S_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, -20.0 ], [ -20.0, -10.0 ], [ -10.0, -10.0 ], [ -10.0, -20.0 ], [ -20.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 0.0, "LR_X": -10.0, "LR_Y": -10.0, "NAME": "00N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, -10.0 ], [ -20.0, 0.0 ], [ -10.0, 0.0 ], [ -10.0, -10.0 ], [ -20.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 10.0, "LR_X": -10.0, "LR_Y": 0.0, "NAME": "10N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 0.0 ], [ -20.0, 10.0 ], [ -10.0, 10.0 ], [ -10.0, 0.0 ], [ -20.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 20.0, "LR_X": -10.0, "LR_Y": 10.0, "NAME": "20N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 10.0 ], [ -20.0, 20.0 ], [ -10.0, 20.0 ], [ -10.0, 10.0 ], [ -20.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 30.0, "LR_X": -10.0, "LR_Y": 20.0, "NAME": "30N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 20.0 ], [ -20.0, 30.0 ], [ -10.0, 30.0 ], [ -10.0, 20.0 ], [ -20.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 40.0, "LR_X": -10.0, "LR_Y": 30.0, "NAME": "40N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 30.0 ], [ -20.0, 40.0 ], [ -10.0, 40.0 ], [ -10.0, 30.0 ], [ -20.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 50.0, "LR_X": -10.0, "LR_Y": 40.0, "NAME": "50N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 40.0 ], [ -20.0, 50.0 ], [ -10.0, 50.0 ], [ -10.0, 40.0 ], [ -20.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 60.0, "LR_X": -10.0, "LR_Y": 50.0, "NAME": "60N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 50.0 ], [ -20.0, 60.0 ], [ -10.0, 60.0 ], [ -10.0, 50.0 ], [ -20.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 70.0, "LR_X": -10.0, "LR_Y": 60.0, "NAME": "70N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 60.0 ], [ -20.0, 70.0 ], [ -10.0, 70.0 ], [ -10.0, 60.0 ], [ -20.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -20.0, "UL_Y": 80.0, "LR_X": -10.0, "LR_Y": 70.0, "NAME": "80N_020W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -20.0, 70.0 ], [ -20.0, 80.0 ], [ -10.0, 80.0 ], [ -10.0, 70.0 ], [ -20.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": -50.0, "LR_X": 0.0, "LR_Y": -60.0, "NAME": "50S_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, -60.0 ], [ -10.0, -50.0 ], [ 0.0, -50.0 ], [ 0.0, -60.0 ], [ -10.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": -40.0, "LR_X": 0.0, "LR_Y": -50.0, "NAME": "40S_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, -50.0 ], [ -10.0, -40.0 ], [ 0.0, -40.0 ], [ 0.0, -50.0 ], [ -10.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": -30.0, "LR_X": 0.0, "LR_Y": -40.0, "NAME": "30S_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, -40.0 ], [ -10.0, -30.0 ], [ 0.0, -30.0 ], [ 0.0, -40.0 ], [ -10.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": -20.0, "LR_X": 0.0, "LR_Y": -30.0, "NAME": "20S_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, -30.0 ], [ -10.0, -20.0 ], [ 0.0, -20.0 ], [ 0.0, -30.0 ], [ -10.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": -10.0, "LR_X": 0.0, "LR_Y": -20.0, "NAME": "10S_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, -20.0 ], [ -10.0, -10.0 ], [ 0.0, -10.0 ], [ 0.0, -20.0 ], [ -10.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 0.0, "LR_X": 0.0, "LR_Y": -10.0, "NAME": "00N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, -10.0 ], [ -10.0, 0.0 ], [ 0.0, 0.0 ], [ 0.0, -10.0 ], [ -10.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 10.0, "LR_X": 0.0, "LR_Y": 0.0, "NAME": "10N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 0.0 ], [ -10.0, 10.0 ], [ 0.0, 10.0 ], [ 0.0, 0.0 ], [ -10.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 20.0, "LR_X": 0.0, "LR_Y": 10.0, "NAME": "20N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 10.0 ], [ -10.0, 20.0 ], [ 0.0, 20.0 ], [ 0.0, 10.0 ], [ -10.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 30.0, "LR_X": 0.0, "LR_Y": 20.0, "NAME": "30N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 20.0 ], [ -10.0, 30.0 ], [ 0.0, 30.0 ], [ 0.0, 20.0 ], [ -10.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 40.0, "LR_X": 0.0, "LR_Y": 30.0, "NAME": "40N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 30.0 ], [ -10.0, 40.0 ], [ 0.0, 40.0 ], [ 0.0, 30.0 ], [ -10.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 50.0, "LR_X": 0.0, "LR_Y": 40.0, "NAME": "50N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 40.0 ], [ -10.0, 50.0 ], [ 0.0, 50.0 ], [ 0.0, 40.0 ], [ -10.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 60.0, "LR_X": 0.0, "LR_Y": 50.0, "NAME": "60N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 50.0 ], [ -10.0, 60.0 ], [ 0.0, 60.0 ], [ 0.0, 50.0 ], [ -10.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 70.0, "LR_X": 0.0, "LR_Y": 60.0, "NAME": "70N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 60.0 ], [ -10.0, 70.0 ], [ 0.0, 70.0 ], [ 0.0, 60.0 ], [ -10.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": -10.0, "UL_Y": 80.0, "LR_X": 0.0, "LR_Y": 70.0, "NAME": "80N_010W" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -10.0, 70.0 ], [ -10.0, 80.0 ], [ 0.0, 80.0 ], [ 0.0, 70.0 ], [ -10.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": -50.0, "LR_X": 10.0, "LR_Y": -60.0, "NAME": "50S_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, -60.0 ], [ 0.0, -50.0 ], [ 10.0, -50.0 ], [ 10.0, -60.0 ], [ 0.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": -40.0, "LR_X": 10.0, "LR_Y": -50.0, "NAME": "40S_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, -50.0 ], [ 0.0, -40.0 ], [ 10.0, -40.0 ], [ 10.0, -50.0 ], [ 0.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": -30.0, "LR_X": 10.0, "LR_Y": -40.0, "NAME": "30S_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, -40.0 ], [ 0.0, -30.0 ], [ 10.0, -30.0 ], [ 10.0, -40.0 ], [ 0.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": -20.0, "LR_X": 10.0, "LR_Y": -30.0, "NAME": "20S_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, -30.0 ], [ 0.0, -20.0 ], [ 10.0, -20.0 ], [ 10.0, -30.0 ], [ 0.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": -10.0, "LR_X": 10.0, "LR_Y": -20.0, "NAME": "10S_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, -20.0 ], [ 0.0, -10.0 ], [ 10.0, -10.0 ], [ 10.0, -20.0 ], [ 0.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 0.0, "LR_X": 10.0, "LR_Y": -10.0, "NAME": "00N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, -10.0 ], [ 0.0, 0.0 ], [ 10.0, 0.0 ], [ 10.0, -10.0 ], [ 0.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 10.0, "LR_X": 10.0, "LR_Y": 0.0, "NAME": "10N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 0.0, 10.0 ], [ 10.0, 10.0 ], [ 10.0, 0.0 ], [ 0.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 20.0, "LR_X": 10.0, "LR_Y": 10.0, "NAME": "20N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 10.0 ], [ 0.0, 20.0 ], [ 10.0, 20.0 ], [ 10.0, 10.0 ], [ 0.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 30.0, "LR_X": 10.0, "LR_Y": 20.0, "NAME": "30N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 20.0 ], [ 0.0, 30.0 ], [ 10.0, 30.0 ], [ 10.0, 20.0 ], [ 0.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 40.0, "LR_X": 10.0, "LR_Y": 30.0, "NAME": "40N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 30.0 ], [ 0.0, 40.0 ], [ 10.0, 40.0 ], [ 10.0, 30.0 ], [ 0.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 50.0, "LR_X": 10.0, "LR_Y": 40.0, "NAME": "50N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 40.0 ], [ 0.0, 50.0 ], [ 10.0, 50.0 ], [ 10.0, 40.0 ], [ 0.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 60.0, "LR_X": 10.0, "LR_Y": 50.0, "NAME": "60N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 50.0 ], [ 0.0, 60.0 ], [ 10.0, 60.0 ], [ 10.0, 50.0 ], [ 0.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 70.0, "LR_X": 10.0, "LR_Y": 60.0, "NAME": "70N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 60.0 ], [ 0.0, 70.0 ], [ 10.0, 70.0 ], [ 10.0, 60.0 ], [ 0.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 0.0, "UL_Y": 80.0, "LR_X": 10.0, "LR_Y": 70.0, "NAME": "80N_000E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 70.0 ], [ 0.0, 80.0 ], [ 10.0, 80.0 ], [ 10.0, 70.0 ], [ 0.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": -50.0, "LR_X": 20.0, "LR_Y": -60.0, "NAME": "50S_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, -60.0 ], [ 10.0, -50.0 ], [ 20.0, -50.0 ], [ 20.0, -60.0 ], [ 10.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": -40.0, "LR_X": 20.0, "LR_Y": -50.0, "NAME": "40S_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, -50.0 ], [ 10.0, -40.0 ], [ 20.0, -40.0 ], [ 20.0, -50.0 ], [ 10.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": -30.0, "LR_X": 20.0, "LR_Y": -40.0, "NAME": "30S_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, -40.0 ], [ 10.0, -30.0 ], [ 20.0, -30.0 ], [ 20.0, -40.0 ], [ 10.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": -20.0, "LR_X": 20.0, "LR_Y": -30.0, "NAME": "20S_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, -30.0 ], [ 10.0, -20.0 ], [ 20.0, -20.0 ], [ 20.0, -30.0 ], [ 10.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": -10.0, "LR_X": 20.0, "LR_Y": -20.0, "NAME": "10S_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, -20.0 ], [ 10.0, -10.0 ], [ 20.0, -10.0 ], [ 20.0, -20.0 ], [ 10.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 0.0, "LR_X": 20.0, "LR_Y": -10.0, "NAME": "00N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, -10.0 ], [ 10.0, 0.0 ], [ 20.0, 0.0 ], [ 20.0, -10.0 ], [ 10.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 10.0, "LR_X": 20.0, "LR_Y": 0.0, "NAME": "10N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 0.0 ], [ 10.0, 10.0 ], [ 20.0, 10.0 ], [ 20.0, 0.0 ], [ 10.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 20.0, "LR_X": 20.0, "LR_Y": 10.0, "NAME": "20N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 10.0 ], [ 10.0, 20.0 ], [ 20.0, 20.0 ], [ 20.0, 10.0 ], [ 10.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 30.0, "LR_X": 20.0, "LR_Y": 20.0, "NAME": "30N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 20.0 ], [ 10.0, 30.0 ], [ 20.0, 30.0 ], [ 20.0, 20.0 ], [ 10.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 40.0, "LR_X": 20.0, "LR_Y": 30.0, "NAME": "40N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 30.0 ], [ 10.0, 40.0 ], [ 20.0, 40.0 ], [ 20.0, 30.0 ], [ 10.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 50.0, "LR_X": 20.0, "LR_Y": 40.0, "NAME": "50N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 40.0 ], [ 10.0, 50.0 ], [ 20.0, 50.0 ], [ 20.0, 40.0 ], [ 10.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 60.0, "LR_X": 20.0, "LR_Y": 50.0, "NAME": "60N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 50.0 ], [ 10.0, 60.0 ], [ 20.0, 60.0 ], [ 20.0, 50.0 ], [ 10.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 70.0, "LR_X": 20.0, "LR_Y": 60.0, "NAME": "70N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 60.0 ], [ 10.0, 70.0 ], [ 20.0, 70.0 ], [ 20.0, 60.0 ], [ 10.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 10.0, "UL_Y": 80.0, "LR_X": 20.0, "LR_Y": 70.0, "NAME": "80N_010E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 10.0, 70.0 ], [ 10.0, 80.0 ], [ 20.0, 80.0 ], [ 20.0, 70.0 ], [ 10.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": -50.0, "LR_X": 30.0, "LR_Y": -60.0, "NAME": "50S_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, -60.0 ], [ 20.0, -50.0 ], [ 30.0, -50.0 ], [ 30.0, -60.0 ], [ 20.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": -40.0, "LR_X": 30.0, "LR_Y": -50.0, "NAME": "40S_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, -50.0 ], [ 20.0, -40.0 ], [ 30.0, -40.0 ], [ 30.0, -50.0 ], [ 20.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": -30.0, "LR_X": 30.0, "LR_Y": -40.0, "NAME": "30S_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, -40.0 ], [ 20.0, -30.0 ], [ 30.0, -30.0 ], [ 30.0, -40.0 ], [ 20.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": -20.0, "LR_X": 30.0, "LR_Y": -30.0, "NAME": "20S_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, -30.0 ], [ 20.0, -20.0 ], [ 30.0, -20.0 ], [ 30.0, -30.0 ], [ 20.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": -10.0, "LR_X": 30.0, "LR_Y": -20.0, "NAME": "10S_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, -20.0 ], [ 20.0, -10.0 ], [ 30.0, -10.0 ], [ 30.0, -20.0 ], [ 20.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 0.0, "LR_X": 30.0, "LR_Y": -10.0, "NAME": "00N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, -10.0 ], [ 20.0, 0.0 ], [ 30.0, 0.0 ], [ 30.0, -10.0 ], [ 20.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 10.0, "LR_X": 30.0, "LR_Y": 0.0, "NAME": "10N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 0.0 ], [ 20.0, 10.0 ], [ 30.0, 10.0 ], [ 30.0, 0.0 ], [ 20.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 20.0, "LR_X": 30.0, "LR_Y": 10.0, "NAME": "20N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 10.0 ], [ 20.0, 20.0 ], [ 30.0, 20.0 ], [ 30.0, 10.0 ], [ 20.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 30.0, "LR_X": 30.0, "LR_Y": 20.0, "NAME": "30N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 20.0 ], [ 20.0, 30.0 ], [ 30.0, 30.0 ], [ 30.0, 20.0 ], [ 20.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 40.0, "LR_X": 30.0, "LR_Y": 30.0, "NAME": "40N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 30.0 ], [ 20.0, 40.0 ], [ 30.0, 40.0 ], [ 30.0, 30.0 ], [ 20.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 50.0, "LR_X": 30.0, "LR_Y": 40.0, "NAME": "50N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 40.0 ], [ 20.0, 50.0 ], [ 30.0, 50.0 ], [ 30.0, 40.0 ], [ 20.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 60.0, "LR_X": 30.0, "LR_Y": 50.0, "NAME": "60N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 50.0 ], [ 20.0, 60.0 ], [ 30.0, 60.0 ], [ 30.0, 50.0 ], [ 20.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 70.0, "LR_X": 30.0, "LR_Y": 60.0, "NAME": "70N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 60.0 ], [ 20.0, 70.0 ], [ 30.0, 70.0 ], [ 30.0, 60.0 ], [ 20.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 20.0, "UL_Y": 80.0, "LR_X": 30.0, "LR_Y": 70.0, "NAME": "80N_020E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 20.0, 70.0 ], [ 20.0, 80.0 ], [ 30.0, 80.0 ], [ 30.0, 70.0 ], [ 20.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": -50.0, "LR_X": 40.0, "LR_Y": -60.0, "NAME": "50S_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, -60.0 ], [ 30.0, -50.0 ], [ 40.0, -50.0 ], [ 40.0, -60.0 ], [ 30.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": -40.0, "LR_X": 40.0, "LR_Y": -50.0, "NAME": "40S_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, -50.0 ], [ 30.0, -40.0 ], [ 40.0, -40.0 ], [ 40.0, -50.0 ], [ 30.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": -30.0, "LR_X": 40.0, "LR_Y": -40.0, "NAME": "30S_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, -40.0 ], [ 30.0, -30.0 ], [ 40.0, -30.0 ], [ 40.0, -40.0 ], [ 30.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": -20.0, "LR_X": 40.0, "LR_Y": -30.0, "NAME": "20S_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, -30.0 ], [ 30.0, -20.0 ], [ 40.0, -20.0 ], [ 40.0, -30.0 ], [ 30.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": -10.0, "LR_X": 40.0, "LR_Y": -20.0, "NAME": "10S_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, -20.0 ], [ 30.0, -10.0 ], [ 40.0, -10.0 ], [ 40.0, -20.0 ], [ 30.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 0.0, "LR_X": 40.0, "LR_Y": -10.0, "NAME": "00N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, -10.0 ], [ 30.0, 0.0 ], [ 40.0, 0.0 ], [ 40.0, -10.0 ], [ 30.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 10.0, "LR_X": 40.0, "LR_Y": 0.0, "NAME": "10N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 0.0 ], [ 30.0, 10.0 ], [ 40.0, 10.0 ], [ 40.0, 0.0 ], [ 30.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 20.0, "LR_X": 40.0, "LR_Y": 10.0, "NAME": "20N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 10.0 ], [ 30.0, 20.0 ], [ 40.0, 20.0 ], [ 40.0, 10.0 ], [ 30.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 30.0, "LR_X": 40.0, "LR_Y": 20.0, "NAME": "30N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 20.0 ], [ 30.0, 30.0 ], [ 40.0, 30.0 ], [ 40.0, 20.0 ], [ 30.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 40.0, "LR_X": 40.0, "LR_Y": 30.0, "NAME": "40N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 30.0 ], [ 30.0, 40.0 ], [ 40.0, 40.0 ], [ 40.0, 30.0 ], [ 30.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 50.0, "LR_X": 40.0, "LR_Y": 40.0, "NAME": "50N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 40.0 ], [ 30.0, 50.0 ], [ 40.0, 50.0 ], [ 40.0, 40.0 ], [ 30.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 60.0, "LR_X": 40.0, "LR_Y": 50.0, "NAME": "60N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 50.0 ], [ 30.0, 60.0 ], [ 40.0, 60.0 ], [ 40.0, 50.0 ], [ 30.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 70.0, "LR_X": 40.0, "LR_Y": 60.0, "NAME": "70N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 60.0 ], [ 30.0, 70.0 ], [ 40.0, 70.0 ], [ 40.0, 60.0 ], [ 30.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 30.0, "UL_Y": 80.0, "LR_X": 40.0, "LR_Y": 70.0, "NAME": "80N_030E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 30.0, 70.0 ], [ 30.0, 80.0 ], [ 40.0, 80.0 ], [ 40.0, 70.0 ], [ 30.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": -50.0, "LR_X": 50.0, "LR_Y": -60.0, "NAME": "50S_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, -60.0 ], [ 40.0, -50.0 ], [ 50.0, -50.0 ], [ 50.0, -60.0 ], [ 40.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": -40.0, "LR_X": 50.0, "LR_Y": -50.0, "NAME": "40S_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, -50.0 ], [ 40.0, -40.0 ], [ 50.0, -40.0 ], [ 50.0, -50.0 ], [ 40.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": -30.0, "LR_X": 50.0, "LR_Y": -40.0, "NAME": "30S_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, -40.0 ], [ 40.0, -30.0 ], [ 50.0, -30.0 ], [ 50.0, -40.0 ], [ 40.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": -20.0, "LR_X": 50.0, "LR_Y": -30.0, "NAME": "20S_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, -30.0 ], [ 40.0, -20.0 ], [ 50.0, -20.0 ], [ 50.0, -30.0 ], [ 40.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": -10.0, "LR_X": 50.0, "LR_Y": -20.0, "NAME": "10S_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, -20.0 ], [ 40.0, -10.0 ], [ 50.0, -10.0 ], [ 50.0, -20.0 ], [ 40.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 0.0, "LR_X": 50.0, "LR_Y": -10.0, "NAME": "00N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, -10.0 ], [ 40.0, 0.0 ], [ 50.0, 0.0 ], [ 50.0, -10.0 ], [ 40.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 10.0, "LR_X": 50.0, "LR_Y": 0.0, "NAME": "10N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 0.0 ], [ 40.0, 10.0 ], [ 50.0, 10.0 ], [ 50.0, 0.0 ], [ 40.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 20.0, "LR_X": 50.0, "LR_Y": 10.0, "NAME": "20N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 10.0 ], [ 40.0, 20.0 ], [ 50.0, 20.0 ], [ 50.0, 10.0 ], [ 40.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 30.0, "LR_X": 50.0, "LR_Y": 20.0, "NAME": "30N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 20.0 ], [ 40.0, 30.0 ], [ 50.0, 30.0 ], [ 50.0, 20.0 ], [ 40.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 40.0, "LR_X": 50.0, "LR_Y": 30.0, "NAME": "40N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 30.0 ], [ 40.0, 40.0 ], [ 50.0, 40.0 ], [ 50.0, 30.0 ], [ 40.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 50.0, "LR_X": 50.0, "LR_Y": 40.0, "NAME": "50N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 40.0 ], [ 40.0, 50.0 ], [ 50.0, 50.0 ], [ 50.0, 40.0 ], [ 40.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 60.0, "LR_X": 50.0, "LR_Y": 50.0, "NAME": "60N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 50.0 ], [ 40.0, 60.0 ], [ 50.0, 60.0 ], [ 50.0, 50.0 ], [ 40.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 70.0, "LR_X": 50.0, "LR_Y": 60.0, "NAME": "70N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 60.0 ], [ 40.0, 70.0 ], [ 50.0, 70.0 ], [ 50.0, 60.0 ], [ 40.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 40.0, "UL_Y": 80.0, "LR_X": 50.0, "LR_Y": 70.0, "NAME": "80N_040E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 40.0, 70.0 ], [ 40.0, 80.0 ], [ 50.0, 80.0 ], [ 50.0, 70.0 ], [ 40.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": -50.0, "LR_X": 60.0, "LR_Y": -60.0, "NAME": "50S_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, -60.0 ], [ 50.0, -50.0 ], [ 60.0, -50.0 ], [ 60.0, -60.0 ], [ 50.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": -40.0, "LR_X": 60.0, "LR_Y": -50.0, "NAME": "40S_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, -50.0 ], [ 50.0, -40.0 ], [ 60.0, -40.0 ], [ 60.0, -50.0 ], [ 50.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": -30.0, "LR_X": 60.0, "LR_Y": -40.0, "NAME": "30S_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, -40.0 ], [ 50.0, -30.0 ], [ 60.0, -30.0 ], [ 60.0, -40.0 ], [ 50.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": -20.0, "LR_X": 60.0, "LR_Y": -30.0, "NAME": "20S_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, -30.0 ], [ 50.0, -20.0 ], [ 60.0, -20.0 ], [ 60.0, -30.0 ], [ 50.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": -10.0, "LR_X": 60.0, "LR_Y": -20.0, "NAME": "10S_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, -20.0 ], [ 50.0, -10.0 ], [ 60.0, -10.0 ], [ 60.0, -20.0 ], [ 50.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 0.0, "LR_X": 60.0, "LR_Y": -10.0, "NAME": "00N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, -10.0 ], [ 50.0, 0.0 ], [ 60.0, 0.0 ], [ 60.0, -10.0 ], [ 50.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 10.0, "LR_X": 60.0, "LR_Y": 0.0, "NAME": "10N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 0.0 ], [ 50.0, 10.0 ], [ 60.0, 10.0 ], [ 60.0, 0.0 ], [ 50.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 20.0, "LR_X": 60.0, "LR_Y": 10.0, "NAME": "20N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 10.0 ], [ 50.0, 20.0 ], [ 60.0, 20.0 ], [ 60.0, 10.0 ], [ 50.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 30.0, "LR_X": 60.0, "LR_Y": 20.0, "NAME": "30N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 20.0 ], [ 50.0, 30.0 ], [ 60.0, 30.0 ], [ 60.0, 20.0 ], [ 50.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 40.0, "LR_X": 60.0, "LR_Y": 30.0, "NAME": "40N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 30.0 ], [ 50.0, 40.0 ], [ 60.0, 40.0 ], [ 60.0, 30.0 ], [ 50.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 50.0, "LR_X": 60.0, "LR_Y": 40.0, "NAME": "50N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 40.0 ], [ 50.0, 50.0 ], [ 60.0, 50.0 ], [ 60.0, 40.0 ], [ 50.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 60.0, "LR_X": 60.0, "LR_Y": 50.0, "NAME": "60N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 50.0 ], [ 50.0, 60.0 ], [ 60.0, 60.0 ], [ 60.0, 50.0 ], [ 50.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 70.0, "LR_X": 60.0, "LR_Y": 60.0, "NAME": "70N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 60.0 ], [ 50.0, 70.0 ], [ 60.0, 70.0 ], [ 60.0, 60.0 ], [ 50.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 50.0, "UL_Y": 80.0, "LR_X": 60.0, "LR_Y": 70.0, "NAME": "80N_050E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 50.0, 70.0 ], [ 50.0, 80.0 ], [ 60.0, 80.0 ], [ 60.0, 70.0 ], [ 50.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": -50.0, "LR_X": 70.0, "LR_Y": -60.0, "NAME": "50S_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, -60.0 ], [ 60.0, -50.0 ], [ 70.0, -50.0 ], [ 70.0, -60.0 ], [ 60.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": -40.0, "LR_X": 70.0, "LR_Y": -50.0, "NAME": "40S_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, -50.0 ], [ 60.0, -40.0 ], [ 70.0, -40.0 ], [ 70.0, -50.0 ], [ 60.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": -30.0, "LR_X": 70.0, "LR_Y": -40.0, "NAME": "30S_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, -40.0 ], [ 60.0, -30.0 ], [ 70.0, -30.0 ], [ 70.0, -40.0 ], [ 60.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": -20.0, "LR_X": 70.0, "LR_Y": -30.0, "NAME": "20S_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, -30.0 ], [ 60.0, -20.0 ], [ 70.0, -20.0 ], [ 70.0, -30.0 ], [ 60.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": -10.0, "LR_X": 70.0, "LR_Y": -20.0, "NAME": "10S_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, -20.0 ], [ 60.0, -10.0 ], [ 70.0, -10.0 ], [ 70.0, -20.0 ], [ 60.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 0.0, "LR_X": 70.0, "LR_Y": -10.0, "NAME": "00N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, -10.0 ], [ 60.0, 0.0 ], [ 70.0, 0.0 ], [ 70.0, -10.0 ], [ 60.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 10.0, "LR_X": 70.0, "LR_Y": 0.0, "NAME": "10N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 0.0 ], [ 60.0, 10.0 ], [ 70.0, 10.0 ], [ 70.0, 0.0 ], [ 60.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 20.0, "LR_X": 70.0, "LR_Y": 10.0, "NAME": "20N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 10.0 ], [ 60.0, 20.0 ], [ 70.0, 20.0 ], [ 70.0, 10.0 ], [ 60.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 30.0, "LR_X": 70.0, "LR_Y": 20.0, "NAME": "30N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 20.0 ], [ 60.0, 30.0 ], [ 70.0, 30.0 ], [ 70.0, 20.0 ], [ 60.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 40.0, "LR_X": 70.0, "LR_Y": 30.0, "NAME": "40N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 30.0 ], [ 60.0, 40.0 ], [ 70.0, 40.0 ], [ 70.0, 30.0 ], [ 60.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 50.0, "LR_X": 70.0, "LR_Y": 40.0, "NAME": "50N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 40.0 ], [ 60.0, 50.0 ], [ 70.0, 50.0 ], [ 70.0, 40.0 ], [ 60.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 60.0, "LR_X": 70.0, "LR_Y": 50.0, "NAME": "60N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 50.0 ], [ 60.0, 60.0 ], [ 70.0, 60.0 ], [ 70.0, 50.0 ], [ 60.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 70.0, "LR_X": 70.0, "LR_Y": 60.0, "NAME": "70N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 60.0 ], [ 60.0, 70.0 ], [ 70.0, 70.0 ], [ 70.0, 60.0 ], [ 60.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 60.0, "UL_Y": 80.0, "LR_X": 70.0, "LR_Y": 70.0, "NAME": "80N_060E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 60.0, 70.0 ], [ 60.0, 80.0 ], [ 70.0, 80.0 ], [ 70.0, 70.0 ], [ 60.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": -50.0, "LR_X": 80.0, "LR_Y": -60.0, "NAME": "50S_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, -60.0 ], [ 70.0, -50.0 ], [ 80.0, -50.0 ], [ 80.0, -60.0 ], [ 70.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": -40.0, "LR_X": 80.0, "LR_Y": -50.0, "NAME": "40S_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, -50.0 ], [ 70.0, -40.0 ], [ 80.0, -40.0 ], [ 80.0, -50.0 ], [ 70.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": -30.0, "LR_X": 80.0, "LR_Y": -40.0, "NAME": "30S_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, -40.0 ], [ 70.0, -30.0 ], [ 80.0, -30.0 ], [ 80.0, -40.0 ], [ 70.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": -20.0, "LR_X": 80.0, "LR_Y": -30.0, "NAME": "20S_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, -30.0 ], [ 70.0, -20.0 ], [ 80.0, -20.0 ], [ 80.0, -30.0 ], [ 70.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": -10.0, "LR_X": 80.0, "LR_Y": -20.0, "NAME": "10S_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, -20.0 ], [ 70.0, -10.0 ], [ 80.0, -10.0 ], [ 80.0, -20.0 ], [ 70.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 0.0, "LR_X": 80.0, "LR_Y": -10.0, "NAME": "00N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, -10.0 ], [ 70.0, 0.0 ], [ 80.0, 0.0 ], [ 80.0, -10.0 ], [ 70.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 10.0, "LR_X": 80.0, "LR_Y": 0.0, "NAME": "10N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 0.0 ], [ 70.0, 10.0 ], [ 80.0, 10.0 ], [ 80.0, 0.0 ], [ 70.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 20.0, "LR_X": 80.0, "LR_Y": 10.0, "NAME": "20N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 10.0 ], [ 70.0, 20.0 ], [ 80.0, 20.0 ], [ 80.0, 10.0 ], [ 70.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 30.0, "LR_X": 80.0, "LR_Y": 20.0, "NAME": "30N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 20.0 ], [ 70.0, 30.0 ], [ 80.0, 30.0 ], [ 80.0, 20.0 ], [ 70.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 40.0, "LR_X": 80.0, "LR_Y": 30.0, "NAME": "40N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 30.0 ], [ 70.0, 40.0 ], [ 80.0, 40.0 ], [ 80.0, 30.0 ], [ 70.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 50.0, "LR_X": 80.0, "LR_Y": 40.0, "NAME": "50N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 40.0 ], [ 70.0, 50.0 ], [ 80.0, 50.0 ], [ 80.0, 40.0 ], [ 70.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 60.0, "LR_X": 80.0, "LR_Y": 50.0, "NAME": "60N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 50.0 ], [ 70.0, 60.0 ], [ 80.0, 60.0 ], [ 80.0, 50.0 ], [ 70.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 70.0, "LR_X": 80.0, "LR_Y": 60.0, "NAME": "70N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 60.0 ], [ 70.0, 70.0 ], [ 80.0, 70.0 ], [ 80.0, 60.0 ], [ 70.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 70.0, "UL_Y": 80.0, "LR_X": 80.0, "LR_Y": 70.0, "NAME": "80N_070E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 70.0, 70.0 ], [ 70.0, 80.0 ], [ 80.0, 80.0 ], [ 80.0, 70.0 ], [ 70.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": -50.0, "LR_X": 90.0, "LR_Y": -60.0, "NAME": "50S_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, -60.0 ], [ 80.0, -50.0 ], [ 90.0, -50.0 ], [ 90.0, -60.0 ], [ 80.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": -40.0, "LR_X": 90.0, "LR_Y": -50.0, "NAME": "40S_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, -50.0 ], [ 80.0, -40.0 ], [ 90.0, -40.0 ], [ 90.0, -50.0 ], [ 80.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": -30.0, "LR_X": 90.0, "LR_Y": -40.0, "NAME": "30S_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, -40.0 ], [ 80.0, -30.0 ], [ 90.0, -30.0 ], [ 90.0, -40.0 ], [ 80.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": -20.0, "LR_X": 90.0, "LR_Y": -30.0, "NAME": "20S_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, -30.0 ], [ 80.0, -20.0 ], [ 90.0, -20.0 ], [ 90.0, -30.0 ], [ 80.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": -10.0, "LR_X": 90.0, "LR_Y": -20.0, "NAME": "10S_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, -20.0 ], [ 80.0, -10.0 ], [ 90.0, -10.0 ], [ 90.0, -20.0 ], [ 80.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 0.0, "LR_X": 90.0, "LR_Y": -10.0, "NAME": "00N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, -10.0 ], [ 80.0, 0.0 ], [ 90.0, 0.0 ], [ 90.0, -10.0 ], [ 80.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 10.0, "LR_X": 90.0, "LR_Y": 0.0, "NAME": "10N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 0.0 ], [ 80.0, 10.0 ], [ 90.0, 10.0 ], [ 90.0, 0.0 ], [ 80.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 20.0, "LR_X": 90.0, "LR_Y": 10.0, "NAME": "20N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 10.0 ], [ 80.0, 20.0 ], [ 90.0, 20.0 ], [ 90.0, 10.0 ], [ 80.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 30.0, "LR_X": 90.0, "LR_Y": 20.0, "NAME": "30N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 20.0 ], [ 80.0, 30.0 ], [ 90.0, 30.0 ], [ 90.0, 20.0 ], [ 80.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 40.0, "LR_X": 90.0, "LR_Y": 30.0, "NAME": "40N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 30.0 ], [ 80.0, 40.0 ], [ 90.0, 40.0 ], [ 90.0, 30.0 ], [ 80.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 50.0, "LR_X": 90.0, "LR_Y": 40.0, "NAME": "50N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 40.0 ], [ 80.0, 50.0 ], [ 90.0, 50.0 ], [ 90.0, 40.0 ], [ 80.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 60.0, "LR_X": 90.0, "LR_Y": 50.0, "NAME": "60N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 50.0 ], [ 80.0, 60.0 ], [ 90.0, 60.0 ], [ 90.0, 50.0 ], [ 80.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 70.0, "LR_X": 90.0, "LR_Y": 60.0, "NAME": "70N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 60.0 ], [ 80.0, 70.0 ], [ 90.0, 70.0 ], [ 90.0, 60.0 ], [ 80.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 80.0, "UL_Y": 80.0, "LR_X": 90.0, "LR_Y": 70.0, "NAME": "80N_080E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 80.0, 70.0 ], [ 80.0, 80.0 ], [ 90.0, 80.0 ], [ 90.0, 70.0 ], [ 80.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": -50.0, "LR_X": 100.0, "LR_Y": -60.0, "NAME": "50S_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, -60.0 ], [ 90.0, -50.0 ], [ 100.0, -50.0 ], [ 100.0, -60.0 ], [ 90.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": -40.0, "LR_X": 100.0, "LR_Y": -50.0, "NAME": "40S_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, -50.0 ], [ 90.0, -40.0 ], [ 100.0, -40.0 ], [ 100.0, -50.0 ], [ 90.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": -30.0, "LR_X": 100.0, "LR_Y": -40.0, "NAME": "30S_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, -40.0 ], [ 90.0, -30.0 ], [ 100.0, -30.0 ], [ 100.0, -40.0 ], [ 90.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": -20.0, "LR_X": 100.0, "LR_Y": -30.0, "NAME": "20S_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, -30.0 ], [ 90.0, -20.0 ], [ 100.0, -20.0 ], [ 100.0, -30.0 ], [ 90.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": -10.0, "LR_X": 100.0, "LR_Y": -20.0, "NAME": "10S_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, -20.0 ], [ 90.0, -10.0 ], [ 100.0, -10.0 ], [ 100.0, -20.0 ], [ 90.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 0.0, "LR_X": 100.0, "LR_Y": -10.0, "NAME": "00N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, -10.0 ], [ 90.0, 0.0 ], [ 100.0, 0.0 ], [ 100.0, -10.0 ], [ 90.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 10.0, "LR_X": 100.0, "LR_Y": 0.0, "NAME": "10N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 0.0 ], [ 90.0, 10.0 ], [ 100.0, 10.0 ], [ 100.0, 0.0 ], [ 90.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 20.0, "LR_X": 100.0, "LR_Y": 10.0, "NAME": "20N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 10.0 ], [ 90.0, 20.0 ], [ 100.0, 20.0 ], [ 100.0, 10.0 ], [ 90.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 30.0, "LR_X": 100.0, "LR_Y": 20.0, "NAME": "30N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 20.0 ], [ 90.0, 30.0 ], [ 100.0, 30.0 ], [ 100.0, 20.0 ], [ 90.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 40.0, "LR_X": 100.0, "LR_Y": 30.0, "NAME": "40N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 30.0 ], [ 90.0, 40.0 ], [ 100.0, 40.0 ], [ 100.0, 30.0 ], [ 90.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 50.0, "LR_X": 100.0, "LR_Y": 40.0, "NAME": "50N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 40.0 ], [ 90.0, 50.0 ], [ 100.0, 50.0 ], [ 100.0, 40.0 ], [ 90.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 60.0, "LR_X": 100.0, "LR_Y": 50.0, "NAME": "60N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 50.0 ], [ 90.0, 60.0 ], [ 100.0, 60.0 ], [ 100.0, 50.0 ], [ 90.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 70.0, "LR_X": 100.0, "LR_Y": 60.0, "NAME": "70N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 60.0 ], [ 90.0, 70.0 ], [ 100.0, 70.0 ], [ 100.0, 60.0 ], [ 90.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 90.0, "UL_Y": 80.0, "LR_X": 100.0, "LR_Y": 70.0, "NAME": "80N_090E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 90.0, 70.0 ], [ 90.0, 80.0 ], [ 100.0, 80.0 ], [ 100.0, 70.0 ], [ 90.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": -50.0, "LR_X": 110.0, "LR_Y": -60.0, "NAME": "50S_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, -60.0 ], [ 100.0, -50.0 ], [ 110.0, -50.0 ], [ 110.0, -60.0 ], [ 100.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": -40.0, "LR_X": 110.0, "LR_Y": -50.0, "NAME": "40S_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, -50.0 ], [ 100.0, -40.0 ], [ 110.0, -40.0 ], [ 110.0, -50.0 ], [ 100.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": -30.0, "LR_X": 110.0, "LR_Y": -40.0, "NAME": "30S_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, -40.0 ], [ 100.0, -30.0 ], [ 110.0, -30.0 ], [ 110.0, -40.0 ], [ 100.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": -20.0, "LR_X": 110.0, "LR_Y": -30.0, "NAME": "20S_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, -30.0 ], [ 100.0, -20.0 ], [ 110.0, -20.0 ], [ 110.0, -30.0 ], [ 100.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": -10.0, "LR_X": 110.0, "LR_Y": -20.0, "NAME": "10S_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, -20.0 ], [ 100.0, -10.0 ], [ 110.0, -10.0 ], [ 110.0, -20.0 ], [ 100.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 0.0, "LR_X": 110.0, "LR_Y": -10.0, "NAME": "00N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, -10.0 ], [ 100.0, 0.0 ], [ 110.0, 0.0 ], [ 110.0, -10.0 ], [ 100.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 10.0, "LR_X": 110.0, "LR_Y": 0.0, "NAME": "10N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 0.0 ], [ 100.0, 10.0 ], [ 110.0, 10.0 ], [ 110.0, 0.0 ], [ 100.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 20.0, "LR_X": 110.0, "LR_Y": 10.0, "NAME": "20N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 10.0 ], [ 100.0, 20.0 ], [ 110.0, 20.0 ], [ 110.0, 10.0 ], [ 100.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 30.0, "LR_X": 110.0, "LR_Y": 20.0, "NAME": "30N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 20.0 ], [ 100.0, 30.0 ], [ 110.0, 30.0 ], [ 110.0, 20.0 ], [ 100.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 40.0, "LR_X": 110.0, "LR_Y": 30.0, "NAME": "40N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 30.0 ], [ 100.0, 40.0 ], [ 110.0, 40.0 ], [ 110.0, 30.0 ], [ 100.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 50.0, "LR_X": 110.0, "LR_Y": 40.0, "NAME": "50N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 40.0 ], [ 100.0, 50.0 ], [ 110.0, 50.0 ], [ 110.0, 40.0 ], [ 100.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 60.0, "LR_X": 110.0, "LR_Y": 50.0, "NAME": "60N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 50.0 ], [ 100.0, 60.0 ], [ 110.0, 60.0 ], [ 110.0, 50.0 ], [ 100.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 70.0, "LR_X": 110.0, "LR_Y": 60.0, "NAME": "70N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 60.0 ], [ 100.0, 70.0 ], [ 110.0, 70.0 ], [ 110.0, 60.0 ], [ 100.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 100.0, "UL_Y": 80.0, "LR_X": 110.0, "LR_Y": 70.0, "NAME": "80N_100E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 100.0, 70.0 ], [ 100.0, 80.0 ], [ 110.0, 80.0 ], [ 110.0, 70.0 ], [ 100.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": -50.0, "LR_X": 120.0, "LR_Y": -60.0, "NAME": "50S_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, -60.0 ], [ 110.0, -50.0 ], [ 120.0, -50.0 ], [ 120.0, -60.0 ], [ 110.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": -40.0, "LR_X": 120.0, "LR_Y": -50.0, "NAME": "40S_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, -50.0 ], [ 110.0, -40.0 ], [ 120.0, -40.0 ], [ 120.0, -50.0 ], [ 110.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": -30.0, "LR_X": 120.0, "LR_Y": -40.0, "NAME": "30S_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, -40.0 ], [ 110.0, -30.0 ], [ 120.0, -30.0 ], [ 120.0, -40.0 ], [ 110.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": -20.0, "LR_X": 120.0, "LR_Y": -30.0, "NAME": "20S_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, -30.0 ], [ 110.0, -20.0 ], [ 120.0, -20.0 ], [ 120.0, -30.0 ], [ 110.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": -10.0, "LR_X": 120.0, "LR_Y": -20.0, "NAME": "10S_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, -20.0 ], [ 110.0, -10.0 ], [ 120.0, -10.0 ], [ 120.0, -20.0 ], [ 110.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 0.0, "LR_X": 120.0, "LR_Y": -10.0, "NAME": "00N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, -10.0 ], [ 110.0, 0.0 ], [ 120.0, 0.0 ], [ 120.0, -10.0 ], [ 110.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 10.0, "LR_X": 120.0, "LR_Y": 0.0, "NAME": "10N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 0.0 ], [ 110.0, 10.0 ], [ 120.0, 10.0 ], [ 120.0, 0.0 ], [ 110.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 20.0, "LR_X": 120.0, "LR_Y": 10.0, "NAME": "20N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 10.0 ], [ 110.0, 20.0 ], [ 120.0, 20.0 ], [ 120.0, 10.0 ], [ 110.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 30.0, "LR_X": 120.0, "LR_Y": 20.0, "NAME": "30N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 20.0 ], [ 110.0, 30.0 ], [ 120.0, 30.0 ], [ 120.0, 20.0 ], [ 110.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 40.0, "LR_X": 120.0, "LR_Y": 30.0, "NAME": "40N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 30.0 ], [ 110.0, 40.0 ], [ 120.0, 40.0 ], [ 120.0, 30.0 ], [ 110.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 50.0, "LR_X": 120.0, "LR_Y": 40.0, "NAME": "50N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 40.0 ], [ 110.0, 50.0 ], [ 120.0, 50.0 ], [ 120.0, 40.0 ], [ 110.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 60.0, "LR_X": 120.0, "LR_Y": 50.0, "NAME": "60N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 50.0 ], [ 110.0, 60.0 ], [ 120.0, 60.0 ], [ 120.0, 50.0 ], [ 110.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 70.0, "LR_X": 120.0, "LR_Y": 60.0, "NAME": "70N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 60.0 ], [ 110.0, 70.0 ], [ 120.0, 70.0 ], [ 120.0, 60.0 ], [ 110.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 110.0, "UL_Y": 80.0, "LR_X": 120.0, "LR_Y": 70.0, "NAME": "80N_110E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 110.0, 70.0 ], [ 110.0, 80.0 ], [ 120.0, 80.0 ], [ 120.0, 70.0 ], [ 110.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": -50.0, "LR_X": 130.0, "LR_Y": -60.0, "NAME": "50S_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, -60.0 ], [ 120.0, -50.0 ], [ 130.0, -50.0 ], [ 130.0, -60.0 ], [ 120.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": -40.0, "LR_X": 130.0, "LR_Y": -50.0, "NAME": "40S_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, -50.0 ], [ 120.0, -40.0 ], [ 130.0, -40.0 ], [ 130.0, -50.0 ], [ 120.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": -30.0, "LR_X": 130.0, "LR_Y": -40.0, "NAME": "30S_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, -40.0 ], [ 120.0, -30.0 ], [ 130.0, -30.0 ], [ 130.0, -40.0 ], [ 120.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": -20.0, "LR_X": 130.0, "LR_Y": -30.0, "NAME": "20S_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, -30.0 ], [ 120.0, -20.0 ], [ 130.0, -20.0 ], [ 130.0, -30.0 ], [ 120.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": -10.0, "LR_X": 130.0, "LR_Y": -20.0, "NAME": "10S_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, -20.0 ], [ 120.0, -10.0 ], [ 130.0, -10.0 ], [ 130.0, -20.0 ], [ 120.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 0.0, "LR_X": 130.0, "LR_Y": -10.0, "NAME": "00N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, -10.0 ], [ 120.0, 0.0 ], [ 130.0, 0.0 ], [ 130.0, -10.0 ], [ 120.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 10.0, "LR_X": 130.0, "LR_Y": 0.0, "NAME": "10N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 0.0 ], [ 120.0, 10.0 ], [ 130.0, 10.0 ], [ 130.0, 0.0 ], [ 120.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 20.0, "LR_X": 130.0, "LR_Y": 10.0, "NAME": "20N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 10.0 ], [ 120.0, 20.0 ], [ 130.0, 20.0 ], [ 130.0, 10.0 ], [ 120.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 30.0, "LR_X": 130.0, "LR_Y": 20.0, "NAME": "30N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 20.0 ], [ 120.0, 30.0 ], [ 130.0, 30.0 ], [ 130.0, 20.0 ], [ 120.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 40.0, "LR_X": 130.0, "LR_Y": 30.0, "NAME": "40N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 30.0 ], [ 120.0, 40.0 ], [ 130.0, 40.0 ], [ 130.0, 30.0 ], [ 120.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 50.0, "LR_X": 130.0, "LR_Y": 40.0, "NAME": "50N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 40.0 ], [ 120.0, 50.0 ], [ 130.0, 50.0 ], [ 130.0, 40.0 ], [ 120.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 60.0, "LR_X": 130.0, "LR_Y": 50.0, "NAME": "60N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 50.0 ], [ 120.0, 60.0 ], [ 130.0, 60.0 ], [ 130.0, 50.0 ], [ 120.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 70.0, "LR_X": 130.0, "LR_Y": 60.0, "NAME": "70N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 60.0 ], [ 120.0, 70.0 ], [ 130.0, 70.0 ], [ 130.0, 60.0 ], [ 120.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 120.0, "UL_Y": 80.0, "LR_X": 130.0, "LR_Y": 70.0, "NAME": "80N_120E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 120.0, 70.0 ], [ 120.0, 80.0 ], [ 130.0, 80.0 ], [ 130.0, 70.0 ], [ 120.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": -50.0, "LR_X": 140.0, "LR_Y": -60.0, "NAME": "50S_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, -60.0 ], [ 130.0, -50.0 ], [ 140.0, -50.0 ], [ 140.0, -60.0 ], [ 130.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": -40.0, "LR_X": 140.0, "LR_Y": -50.0, "NAME": "40S_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, -50.0 ], [ 130.0, -40.0 ], [ 140.0, -40.0 ], [ 140.0, -50.0 ], [ 130.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": -30.0, "LR_X": 140.0, "LR_Y": -40.0, "NAME": "30S_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, -40.0 ], [ 130.0, -30.0 ], [ 140.0, -30.0 ], [ 140.0, -40.0 ], [ 130.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": -20.0, "LR_X": 140.0, "LR_Y": -30.0, "NAME": "20S_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, -30.0 ], [ 130.0, -20.0 ], [ 140.0, -20.0 ], [ 140.0, -30.0 ], [ 130.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": -10.0, "LR_X": 140.0, "LR_Y": -20.0, "NAME": "10S_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, -20.0 ], [ 130.0, -10.0 ], [ 140.0, -10.0 ], [ 140.0, -20.0 ], [ 130.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 0.0, "LR_X": 140.0, "LR_Y": -10.0, "NAME": "00N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, -10.0 ], [ 130.0, 0.0 ], [ 140.0, 0.0 ], [ 140.0, -10.0 ], [ 130.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 10.0, "LR_X": 140.0, "LR_Y": 0.0, "NAME": "10N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 0.0 ], [ 130.0, 10.0 ], [ 140.0, 10.0 ], [ 140.0, 0.0 ], [ 130.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 20.0, "LR_X": 140.0, "LR_Y": 10.0, "NAME": "20N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 10.0 ], [ 130.0, 20.0 ], [ 140.0, 20.0 ], [ 140.0, 10.0 ], [ 130.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 30.0, "LR_X": 140.0, "LR_Y": 20.0, "NAME": "30N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 20.0 ], [ 130.0, 30.0 ], [ 140.0, 30.0 ], [ 140.0, 20.0 ], [ 130.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 40.0, "LR_X": 140.0, "LR_Y": 30.0, "NAME": "40N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 30.0 ], [ 130.0, 40.0 ], [ 140.0, 40.0 ], [ 140.0, 30.0 ], [ 130.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 50.0, "LR_X": 140.0, "LR_Y": 40.0, "NAME": "50N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 40.0 ], [ 130.0, 50.0 ], [ 140.0, 50.0 ], [ 140.0, 40.0 ], [ 130.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 60.0, "LR_X": 140.0, "LR_Y": 50.0, "NAME": "60N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 50.0 ], [ 130.0, 60.0 ], [ 140.0, 60.0 ], [ 140.0, 50.0 ], [ 130.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 70.0, "LR_X": 140.0, "LR_Y": 60.0, "NAME": "70N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 60.0 ], [ 130.0, 70.0 ], [ 140.0, 70.0 ], [ 140.0, 60.0 ], [ 130.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 130.0, "UL_Y": 80.0, "LR_X": 140.0, "LR_Y": 70.0, "NAME": "80N_130E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 130.0, 70.0 ], [ 130.0, 80.0 ], [ 140.0, 80.0 ], [ 140.0, 70.0 ], [ 130.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": -50.0, "LR_X": 150.0, "LR_Y": -60.0, "NAME": "50S_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, -60.0 ], [ 140.0, -50.0 ], [ 150.0, -50.0 ], [ 150.0, -60.0 ], [ 140.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": -40.0, "LR_X": 150.0, "LR_Y": -50.0, "NAME": "40S_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, -50.0 ], [ 140.0, -40.0 ], [ 150.0, -40.0 ], [ 150.0, -50.0 ], [ 140.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": -30.0, "LR_X": 150.0, "LR_Y": -40.0, "NAME": "30S_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, -40.0 ], [ 140.0, -30.0 ], [ 150.0, -30.0 ], [ 150.0, -40.0 ], [ 140.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": -20.0, "LR_X": 150.0, "LR_Y": -30.0, "NAME": "20S_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, -30.0 ], [ 140.0, -20.0 ], [ 150.0, -20.0 ], [ 150.0, -30.0 ], [ 140.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": -10.0, "LR_X": 150.0, "LR_Y": -20.0, "NAME": "10S_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, -20.0 ], [ 140.0, -10.0 ], [ 150.0, -10.0 ], [ 150.0, -20.0 ], [ 140.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 0.0, "LR_X": 150.0, "LR_Y": -10.0, "NAME": "00N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, -10.0 ], [ 140.0, 0.0 ], [ 150.0, 0.0 ], [ 150.0, -10.0 ], [ 140.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 10.0, "LR_X": 150.0, "LR_Y": 0.0, "NAME": "10N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 0.0 ], [ 140.0, 10.0 ], [ 150.0, 10.0 ], [ 150.0, 0.0 ], [ 140.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 20.0, "LR_X": 150.0, "LR_Y": 10.0, "NAME": "20N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 10.0 ], [ 140.0, 20.0 ], [ 150.0, 20.0 ], [ 150.0, 10.0 ], [ 140.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 30.0, "LR_X": 150.0, "LR_Y": 20.0, "NAME": "30N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 20.0 ], [ 140.0, 30.0 ], [ 150.0, 30.0 ], [ 150.0, 20.0 ], [ 140.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 40.0, "LR_X": 150.0, "LR_Y": 30.0, "NAME": "40N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 30.0 ], [ 140.0, 40.0 ], [ 150.0, 40.0 ], [ 150.0, 30.0 ], [ 140.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 50.0, "LR_X": 150.0, "LR_Y": 40.0, "NAME": "50N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 40.0 ], [ 140.0, 50.0 ], [ 150.0, 50.0 ], [ 150.0, 40.0 ], [ 140.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 60.0, "LR_X": 150.0, "LR_Y": 50.0, "NAME": "60N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 50.0 ], [ 140.0, 60.0 ], [ 150.0, 60.0 ], [ 150.0, 50.0 ], [ 140.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 70.0, "LR_X": 150.0, "LR_Y": 60.0, "NAME": "70N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 60.0 ], [ 140.0, 70.0 ], [ 150.0, 70.0 ], [ 150.0, 60.0 ], [ 140.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 140.0, "UL_Y": 80.0, "LR_X": 150.0, "LR_Y": 70.0, "NAME": "80N_140E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 140.0, 70.0 ], [ 140.0, 80.0 ], [ 150.0, 80.0 ], [ 150.0, 70.0 ], [ 140.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": -50.0, "LR_X": 160.0, "LR_Y": -60.0, "NAME": "50S_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, -60.0 ], [ 150.0, -50.0 ], [ 160.0, -50.0 ], [ 160.0, -60.0 ], [ 150.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": -40.0, "LR_X": 160.0, "LR_Y": -50.0, "NAME": "40S_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, -50.0 ], [ 150.0, -40.0 ], [ 160.0, -40.0 ], [ 160.0, -50.0 ], [ 150.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": -30.0, "LR_X": 160.0, "LR_Y": -40.0, "NAME": "30S_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, -40.0 ], [ 150.0, -30.0 ], [ 160.0, -30.0 ], [ 160.0, -40.0 ], [ 150.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": -20.0, "LR_X": 160.0, "LR_Y": -30.0, "NAME": "20S_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, -30.0 ], [ 150.0, -20.0 ], [ 160.0, -20.0 ], [ 160.0, -30.0 ], [ 150.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": -10.0, "LR_X": 160.0, "LR_Y": -20.0, "NAME": "10S_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, -20.0 ], [ 150.0, -10.0 ], [ 160.0, -10.0 ], [ 160.0, -20.0 ], [ 150.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 0.0, "LR_X": 160.0, "LR_Y": -10.0, "NAME": "00N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, -10.0 ], [ 150.0, 0.0 ], [ 160.0, 0.0 ], [ 160.0, -10.0 ], [ 150.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 10.0, "LR_X": 160.0, "LR_Y": 0.0, "NAME": "10N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 0.0 ], [ 150.0, 10.0 ], [ 160.0, 10.0 ], [ 160.0, 0.0 ], [ 150.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 20.0, "LR_X": 160.0, "LR_Y": 10.0, "NAME": "20N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 10.0 ], [ 150.0, 20.0 ], [ 160.0, 20.0 ], [ 160.0, 10.0 ], [ 150.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 30.0, "LR_X": 160.0, "LR_Y": 20.0, "NAME": "30N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 20.0 ], [ 150.0, 30.0 ], [ 160.0, 30.0 ], [ 160.0, 20.0 ], [ 150.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 40.0, "LR_X": 160.0, "LR_Y": 30.0, "NAME": "40N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 30.0 ], [ 150.0, 40.0 ], [ 160.0, 40.0 ], [ 160.0, 30.0 ], [ 150.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 50.0, "LR_X": 160.0, "LR_Y": 40.0, "NAME": "50N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 40.0 ], [ 150.0, 50.0 ], [ 160.0, 50.0 ], [ 160.0, 40.0 ], [ 150.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 60.0, "LR_X": 160.0, "LR_Y": 50.0, "NAME": "60N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 50.0 ], [ 150.0, 60.0 ], [ 160.0, 60.0 ], [ 160.0, 50.0 ], [ 150.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 70.0, "LR_X": 160.0, "LR_Y": 60.0, "NAME": "70N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 60.0 ], [ 150.0, 70.0 ], [ 160.0, 70.0 ], [ 160.0, 60.0 ], [ 150.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 150.0, "UL_Y": 80.0, "LR_X": 160.0, "LR_Y": 70.0, "NAME": "80N_150E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 150.0, 70.0 ], [ 150.0, 80.0 ], [ 160.0, 80.0 ], [ 160.0, 70.0 ], [ 150.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": -50.0, "LR_X": 170.0, "LR_Y": -60.0, "NAME": "50S_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, -60.0 ], [ 160.0, -50.0 ], [ 170.0, -50.0 ], [ 170.0, -60.0 ], [ 160.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": -40.0, "LR_X": 170.0, "LR_Y": -50.0, "NAME": "40S_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, -50.0 ], [ 160.0, -40.0 ], [ 170.0, -40.0 ], [ 170.0, -50.0 ], [ 160.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": -30.0, "LR_X": 170.0, "LR_Y": -40.0, "NAME": "30S_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, -40.0 ], [ 160.0, -30.0 ], [ 170.0, -30.0 ], [ 170.0, -40.0 ], [ 160.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": -20.0, "LR_X": 170.0, "LR_Y": -30.0, "NAME": "20S_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, -30.0 ], [ 160.0, -20.0 ], [ 170.0, -20.0 ], [ 170.0, -30.0 ], [ 160.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": -10.0, "LR_X": 170.0, "LR_Y": -20.0, "NAME": "10S_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, -20.0 ], [ 160.0, -10.0 ], [ 170.0, -10.0 ], [ 170.0, -20.0 ], [ 160.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 0.0, "LR_X": 170.0, "LR_Y": -10.0, "NAME": "00N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, -10.0 ], [ 160.0, 0.0 ], [ 170.0, 0.0 ], [ 170.0, -10.0 ], [ 160.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 10.0, "LR_X": 170.0, "LR_Y": 0.0, "NAME": "10N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 0.0 ], [ 160.0, 10.0 ], [ 170.0, 10.0 ], [ 170.0, 0.0 ], [ 160.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 20.0, "LR_X": 170.0, "LR_Y": 10.0, "NAME": "20N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 10.0 ], [ 160.0, 20.0 ], [ 170.0, 20.0 ], [ 170.0, 10.0 ], [ 160.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 30.0, "LR_X": 170.0, "LR_Y": 20.0, "NAME": "30N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 20.0 ], [ 160.0, 30.0 ], [ 170.0, 30.0 ], [ 170.0, 20.0 ], [ 160.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 40.0, "LR_X": 170.0, "LR_Y": 30.0, "NAME": "40N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 30.0 ], [ 160.0, 40.0 ], [ 170.0, 40.0 ], [ 170.0, 30.0 ], [ 160.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 50.0, "LR_X": 170.0, "LR_Y": 40.0, "NAME": "50N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 40.0 ], [ 160.0, 50.0 ], [ 170.0, 50.0 ], [ 170.0, 40.0 ], [ 160.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 60.0, "LR_X": 170.0, "LR_Y": 50.0, "NAME": "60N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 50.0 ], [ 160.0, 60.0 ], [ 170.0, 60.0 ], [ 170.0, 50.0 ], [ 160.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 70.0, "LR_X": 170.0, "LR_Y": 60.0, "NAME": "70N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 60.0 ], [ 160.0, 70.0 ], [ 170.0, 70.0 ], [ 170.0, 60.0 ], [ 160.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 160.0, "UL_Y": 80.0, "LR_X": 170.0, "LR_Y": 70.0, "NAME": "80N_160E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 160.0, 70.0 ], [ 160.0, 80.0 ], [ 170.0, 80.0 ], [ 170.0, 70.0 ], [ 160.0, 70.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": -50.0, "LR_X": 180.0, "LR_Y": -60.0, "NAME": "50S_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, -60.0 ], [ 170.0, -50.0 ], [ 180.0, -50.0 ], [ 180.0, -60.0 ], [ 170.0, -60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": -40.0, "LR_X": 180.0, "LR_Y": -50.0, "NAME": "40S_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, -50.0 ], [ 170.0, -40.0 ], [ 180.0, -40.0 ], [ 180.0, -50.0 ], [ 170.0, -50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": -30.0, "LR_X": 180.0, "LR_Y": -40.0, "NAME": "30S_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, -40.0 ], [ 170.0, -30.0 ], [ 180.0, -30.0 ], [ 180.0, -40.0 ], [ 170.0, -40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": -20.0, "LR_X": 180.0, "LR_Y": -30.0, "NAME": "20S_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, -30.0 ], [ 170.0, -20.0 ], [ 180.0, -20.0 ], [ 180.0, -30.0 ], [ 170.0, -30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": -10.0, "LR_X": 180.0, "LR_Y": -20.0, "NAME": "10S_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, -20.0 ], [ 170.0, -10.0 ], [ 180.0, -10.0 ], [ 180.0, -20.0 ], [ 170.0, -20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 0.0, "LR_X": 180.0, "LR_Y": -10.0, "NAME": "00N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, -10.0 ], [ 170.0, 0.0 ], [ 180.0, 0.0 ], [ 180.0, -10.0 ], [ 170.0, -10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 10.0, "LR_X": 180.0, "LR_Y": 0.0, "NAME": "10N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 0.0 ], [ 170.0, 10.0 ], [ 180.0, 10.0 ], [ 180.0, 0.0 ], [ 170.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 20.0, "LR_X": 180.0, "LR_Y": 10.0, "NAME": "20N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 10.0 ], [ 170.0, 20.0 ], [ 180.0, 20.0 ], [ 180.0, 10.0 ], [ 170.0, 10.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 30.0, "LR_X": 180.0, "LR_Y": 20.0, "NAME": "30N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 20.0 ], [ 170.0, 30.0 ], [ 180.0, 30.0 ], [ 180.0, 20.0 ], [ 170.0, 20.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 40.0, "LR_X": 180.0, "LR_Y": 30.0, "NAME": "40N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 30.0 ], [ 170.0, 40.0 ], [ 180.0, 40.0 ], [ 180.0, 30.0 ], [ 170.0, 30.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 50.0, "LR_X": 180.0, "LR_Y": 40.0, "NAME": "50N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 40.0 ], [ 170.0, 50.0 ], [ 180.0, 50.0 ], [ 180.0, 40.0 ], [ 170.0, 40.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 60.0, "LR_X": 180.0, "LR_Y": 50.0, "NAME": "60N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 50.0 ], [ 170.0, 60.0 ], [ 180.0, 60.0 ], [ 180.0, 50.0 ], [ 170.0, 50.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 70.0, "LR_X": 180.0, "LR_Y": 60.0, "NAME": "70N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 60.0 ], [ 170.0, 70.0 ], [ 180.0, 70.0 ], [ 180.0, 60.0 ], [ 170.0, 60.0 ] ] ] } }, +{ "type": "Feature", "properties": { "UL_X": 170.0, "UL_Y": 80.0, "LR_X": 180.0, "LR_Y": 70.0, "NAME": "80N_170E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 170.0, 70.0 ], [ 170.0, 80.0 ], [ 180.0, 80.0 ], [ 180.0, 70.0 ], [ 170.0, 70.0 ] ] ] } } +] +} diff --git a/op_resources/glad_tile_geometry/NOTICE.md b/op_resources/glad_tile_geometry/NOTICE.md new file mode 100644 index 00000000..60fcb08e --- /dev/null +++ b/op_resources/glad_tile_geometry/NOTICE.md @@ -0,0 +1,9 @@ +# NOTICE + +In this directory, the [`10d_tiles.geojson`](./10d_tiles.geojson) file was created using the +[10x10 degrees shape file](https://glad.umd.edu/users/Potapov/GLCLUC2020/10d_tiles.zip) +from the Global Land Cover and Land Use Change (GLAD) dataset. + +This worldwide dataset is freely accessible online and can be redistributed or +utilized without any restrictions, as long as the appropriate citation is given +in accordance with the Creative Commons Attribution License (CC BY). diff --git a/op_resources/sentinel_tile_geometry/NOTICE.md b/op_resources/sentinel_tile_geometry/NOTICE.md new file mode 100644 index 00000000..64eee1e3 --- /dev/null +++ b/op_resources/sentinel_tile_geometry/NOTICE.md @@ -0,0 +1,7 @@ +# NOTICE + +The kml file in this directory is Copernicus Sentinel data (2015) and was +published by the European Commission (Copernicus), ESA. + +Sentinel data is free, full and open for public use under EU law. For full details of use, refer to the +[Copernicus Sentinel Data Terms and Conditions](https://scihub.copernicus.eu/twiki/pub/SciHubWebPortal/TermsConditions/Sentinel_Data_Terms_and_Conditions.pdf) \ No newline at end of file diff --git a/op_resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml b/op_resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml new file mode 100644 index 00000000..1bacab76 --- /dev/null +++ b/op_resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffcd58f1443f01f1c6f96baeef83a96239f86b50911729ed12195690ebc4de61 +size 108817408 diff --git a/op_resources/shadow_models/NOTICE.md b/op_resources/shadow_models/NOTICE.md new file mode 100644 index 00000000..33515751 --- /dev/null +++ b/op_resources/shadow_models/NOTICE.md @@ -0,0 +1,12 @@ +# NOTICE + +The model in this directory represents a fully convolutional model for +producing segmentation maps of cloud shadows in Sentinel-2 L2A imagery. + +The L2A cloud shadow dataset was built by merging annotations from two cloud mask datasets that also +included cloud shadow annotations: + - [Sentinel-2 Cloud Mask Catalogue](https://zenodo.org/record/4172871) and + - [Sentinel-2 KappaZeta Cloud and Cloud Shadow Masks](https://zenodo.org/record/5095024) + +Both datasets are available under the License +[Creative Commons Attribution 4.0 International](https://creativecommons.org/licenses/by/4.0/legalcode). \ No newline at end of file diff --git a/op_resources/shadow_models/shadow.onnx b/op_resources/shadow_models/shadow.onnx new file mode 100644 index 00000000..ae6075d9 --- /dev/null +++ b/op_resources/shadow_models/shadow.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88fae46b0afb6a83ccc27862fee312f8a562925b051b5a6dce2b5a91c81008e9 +size 52302553 diff --git a/op_resources/spaceeye_models/spaceeye.onnx b/op_resources/spaceeye_models/spaceeye.onnx new file mode 100644 index 00000000..8ca3eb9a --- /dev/null +++ b/op_resources/spaceeye_models/spaceeye.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed8447392326bdeb562b2bbbdf61b3faf5cd56541739773a6f0f37a139c7c06b +size 211010 diff --git a/op_resources/spectral_extension_model/spectral_extension.onnx b/op_resources/spectral_extension_model/spectral_extension.onnx new file mode 100644 index 00000000..863f0602 --- /dev/null +++ b/op_resources/spectral_extension_model/spectral_extension.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46de812e5a0996e9a381e81905a9bef9363456b95f4839b6f031dd3d2bc16012 +size 530041 diff --git a/ops/admag/admag_seasonal_field.yaml b/ops/admag/admag_seasonal_field.yaml new file mode 100644 index 00000000..42501c91 --- /dev/null +++ b/ops/admag/admag_seasonal_field.yaml @@ -0,0 +1,17 @@ +name: admag_seasonal_field +inputs: + admag_input: ADMAgSeasonalFieldInput +output: + seasonal_field: SeasonalFieldInformation +parameters: + base_url: + client_id: + client_secret: + authority: + default_scope: +entrypoint: + file: admag_seasonal_field_op.py + callback_builder: ADMAgConnector +version: 2 +description: + short_description: Establishes the connection with ADMAg and fetches seasonal field information. diff --git a/ops/admag/admag_seasonal_field_op.py b/ops/admag/admag_seasonal_field_op.py new file mode 100644 index 00000000..12c98100 --- /dev/null +++ b/ops/admag/admag_seasonal_field_op.py @@ -0,0 +1,314 @@ +from datetime import datetime +from typing import Any, Dict, Tuple + +from vibe_core.admag_client import ADMAgClient +from vibe_core.data import ( + ADMAgSeasonalFieldInput, + FertilizerInformation, + HarvestInformation, + OrganicAmendmentInformation, + SeasonalFieldInformation, + TillageInformation, + gen_guid, +) + +API_VERSION = "2023-11-01-preview" + + +class ADMAgConnector: + def __init__( + self, + base_url: str, + client_id: str, + client_secret: str, + authority: str, + default_scope: str, + ): + self.admag_client = ADMAgClient( + base_url=base_url, + api_version=API_VERSION, + client_id=client_id, + client_secret=client_secret, + authority=authority, + default_scope=default_scope, + ) + self.date_fmt = "%Y-%m-%dT%H:%M:%S%z" + + def get_field_entities( + self, admag_input: ADMAgSeasonalFieldInput + ) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]: + seasonal_field_info: Dict[str, Any] = self.admag_client.get_seasonal_field( + admag_input.party_id, admag_input.seasonal_field_id + ) + + field_info = self.admag_client.get_field( + admag_input.party_id, seasonal_field_info["fieldId"] + ) + + season_info: Dict[str, Any] = self.admag_client.get_season(seasonal_field_info["seasonId"]) + + return ( + seasonal_field_info, + field_info, + season_info, + ) + + def get_harvests( + self, + party_id: str, + intersects_with_geometry: Dict[str, Any], + min_start_operation: str, + max_end_operation: str, + associated_resource: Dict[str, str], + ): + def check_harvest_properties(harvest: Dict[str, Any]) -> Dict[str, Any]: + if "gfsrt" not in harvest["properties"]: + raise ValueError( + "Harvest does not have gfsrt property. " + f"Please check harvest properties with id={harvest['id']} in Admag. " + "havest['properties']['gfsrt'] = True, means the crop is grain." + ) + + if "strawStoverHayRemoval" not in harvest["properties"]: + raise ValueError( + "Harvest does not have strawStoverHayRemoval property " + f"for entity with id={harvest['id']}. " + "Please check harvest properties in Admag. " + "strawStoverHayremoval is percentage of straw, " + "stover, and hay removed at harvest." + ) + + return harvest + + harvest_result = self.admag_client.get_harvest_info( + party_id, + intersects_with_geometry, + min_start_operation, + max_end_operation, + associated_resource, + ) + + [check_harvest_properties(harvest) for harvest in harvest_result["value"]] + + return [ + HarvestInformation( + is_grain=harvest["properties"]["gfsrt"] == "True", + start_date=harvest["operationStartDateTime"], + end_date=harvest["operationEndDateTime"], + crop_yield=harvest["totalYield"]["value"], + stray_stover_hay_removal=harvest["properties"]["strawStoverHayRemoval"], + ) + for harvest in harvest_result["value"] + ] + + def get_latest_harvest( + self, + operation_params: Dict[str, Any], + ) -> Dict[str, Any]: + harvest_result = self.admag_client.get_harvest_info(**operation_params) + if "value" in harvest_result and len(harvest_result["value"]) == 0: + raise ValueError(f"No harvest found with parameters: {operation_params}") + latest_harvest = max(harvest_result["value"], key=lambda x: x["operationEndDateTime"]) + return latest_harvest + + def get_fertilizers( + self, + party_id: str, + intersects_with_geometry: Dict[str, Any], + min_start_operation: str, + max_end_operation: str, + associated_resource: Dict[str, str], + ): + def check_fertilizer_properties(fertilizer: Dict[str, Any]): + if "totalNitrogen" not in fertilizer["properties"]: + raise ValueError( + "Fertilizer does not have totalNitrogen property. " + f"Please check ADMAg application with id={fertilizer['id']}. " + "totalNitrogen is the total amount of nitrogen applied (lbs N/acre)." + ) + + if "eep" not in fertilizer["properties"]: + raise ValueError( + "Fertilizer does not have eep property. " + f"Please check ADMAg application with id={fertilizer['id']}. " + "eep is the enhanced efficiency phosphorus." + ) + + possible_eeps = ["None", "Slow Release", "Nitrification Inhibitor"] + if fertilizer["properties"]["eep"] not in possible_eeps: + raise ValueError( + f"eep property of ADMAg application with id={fertilizer['id']} " + "is not one of the allowed values. " + f"Allowed values are {possible_eeps}" + ) + + fertilizer_result = self.admag_client.get_fertilizer_info( + party_id, + intersects_with_geometry, + min_start_operation, + max_end_operation, + associated_resource, + ) + + [check_fertilizer_properties(fertilizer) for fertilizer in fertilizer_result["value"]] + + return [ + FertilizerInformation( + start_date=fertilizer["operationStartDateTime"], + end_date=fertilizer["operationEndDateTime"], + application_type=fertilizer["name"], + total_nitrogen=fertilizer["properties"]["totalNitrogen"], + enhanced_efficiency_phosphorus=fertilizer["properties"]["eep"], + ) + for fertilizer in fertilizer_result["value"] + ] + + def get_first_planting( + self, + operation_params: Dict[str, Any], + ): + operation_result = self.admag_client.get_planting_info(**operation_params) + + if "value" in operation_result and len(operation_result["value"]) == 0: + raise ValueError(f"No planting found with parameters: {operation_params}") + obj_start = min(operation_result["value"], key=lambda x: x["operationStartDateTime"]) + return obj_start["operationStartDateTime"] + + def get_tillages( + self, + party_id: str, + intersects_with_geometry: Dict[str, Any], + min_start_operation: str, + max_end_operation: str, + associated_resource: Dict[str, str], + ): + tillage_result = self.admag_client.get_tillage_info( + party_id, + intersects_with_geometry, + min_start_operation, + max_end_operation, + associated_resource, + ) + + return [ + TillageInformation( + implement=tilage["name"], + start_date=tilage["operationStartDateTime"], + end_date=tilage["operationEndDateTime"], + ) + for tilage in tillage_result["value"] + ] + + def get_organic_amendments( + self, + party_id: str, + intersects_with_geometry: Dict[str, Any], + min_start_operation: str, + max_end_operation: str, + associated_resource: Dict[str, str], + ): + def check_organic_amendment_properties(organic_amendments: Dict[str, Any]): + if "type" not in organic_amendments["properties"]: + raise ValueError( + "Organic amendment does not have type property. " + f"Please check ADMAg application with id={organic_amendments['id']}. " + "Type is the type of organic amendment. Check Comet-Farm API documentation " + "for the list of allowed values." + ) + + if "amount" not in organic_amendments["properties"]: + raise ValueError( + "Organic amendment does not have amount property. " + f"Please check ADMAg application with id={organic_amendments['id']}. " + "Amount is the amount of organic amendment applied (tons/acre)." + ) + + if "percentN" not in organic_amendments["properties"]: + raise ValueError( + "Organic amendment does not have percentN property. " + f"Please check ADMAg application with id={organic_amendments['id']}. " + "percentN is the percent nitrogen in the organic amendment." + ) + + if "CNratio" not in organic_amendments["properties"]: + raise ValueError( + "Organic amendment does not have CNratio property. " + f"Please check ADMAg application with id={organic_amendments['id']}. " + "CNratio is the carbon nitrogen ratio of the organic amendment." + ) + + omad_result = self.admag_client.get_organic_amendments_info( + party_id, + intersects_with_geometry, + min_start_operation, + max_end_operation, + associated_resource, + ) + + [ + check_organic_amendment_properties(organic_amendment) + for organic_amendment in omad_result["value"] + ] + + return [ + OrganicAmendmentInformation( + start_date=omad["operationStartDateTime"], + end_date=omad["operationEndDateTime"], + organic_amendment_type=omad["properties"]["type"], + organic_amendment_amount=omad["properties"]["amount"], + organic_amendment_percent_nitrogen=omad["properties"]["percentN"], + organic_amendment_carbon_nitrogen_ratio=omad["properties"]["CNratio"], + ) + for omad in omad_result["value"] + ] + + def get_season_field_data( + self, + party_id: str, + seasonal_field_info: Dict[str, Any], + season_info: Dict[str, Any], + field_info: Dict[str, Any], + ) -> SeasonalFieldInformation: + associated_resource = {"type": "SeasonalField", "id": seasonal_field_info["id"]} + + operation_params = { + "party_id": party_id, + "intersects_with_geometry": seasonal_field_info["geometry"], + "min_start_operation": season_info["startDateTime"], + "max_end_operation": season_info["endDateTime"], + "associated_resource": associated_resource, + } + + latest_harvest = self.get_latest_harvest(operation_params) + + planting_start_time = self.get_first_planting(operation_params) + + return SeasonalFieldInformation( + id=gen_guid(), + time_range=( + datetime.strptime(planting_start_time, self.date_fmt), + datetime.strptime(latest_harvest["operationEndDateTime"], self.date_fmt), + ), + geometry=seasonal_field_info["geometry"], + assets=[], + crop_name=seasonal_field_info["name"], + crop_type=seasonal_field_info["description"], + fertilizers=self.get_fertilizers(**operation_params), + harvests=self.get_harvests(**operation_params), + tillages=self.get_tillages(**operation_params), + organic_amendments=self.get_organic_amendments(**operation_params), + properties=field_info["properties"], + ) + + def __call__(self): + def get_admag_seasonal_field( + admag_input: ADMAgSeasonalFieldInput, + ) -> Dict[str, SeasonalFieldInformation]: + seasonal_field_info, field_info, season_info = self.get_field_entities(admag_input) + seasonal_field = self.get_season_field_data( + admag_input.party_id, seasonal_field_info, season_info, field_info + ) + + return {"seasonal_field": seasonal_field} + + return get_admag_seasonal_field diff --git a/ops/admag/get_prescription.py b/ops/admag/get_prescription.py new file mode 100644 index 00000000..4b4c9101 --- /dev/null +++ b/ops/admag/get_prescription.py @@ -0,0 +1,43 @@ +from typing import Dict + +from vibe_core.admag_client import ADMAgClient +from vibe_core.data import ADMAgPrescription, ADMAgPrescriptionInput + +API_VERSION = "2023-11-01-preview" + + +class CallbackBuilder: + def __init__( + self, + base_url: str, + client_id: str, + client_secret: str, + authority: str, + default_scope: str, + ): + self.admag_client = ADMAgClient( + base_url=base_url, + api_version=API_VERSION, + client_id=client_id, + client_secret=client_secret, + authority=authority, + default_scope=default_scope, + ) + + def prescriptions(self, user_input: ADMAgPrescriptionInput) -> ADMAgPrescription: + response = self.admag_client.get_prescription( + user_input.party_id, user_input.prescription_id + ) + + prescription = ADMAgPrescription(**response) + + return prescription + + def __call__(self): + def prescriptions_init( + prescription_without_geom_input: ADMAgPrescriptionInput, + ) -> Dict[str, ADMAgPrescription]: + out_prescriptions = self.prescriptions(prescription_without_geom_input) + return {"prescription_with_geom": out_prescriptions} + + return prescriptions_init diff --git a/ops/admag/get_prescription.yaml b/ops/admag/get_prescription.yaml new file mode 100644 index 00000000..64e3affe --- /dev/null +++ b/ops/admag/get_prescription.yaml @@ -0,0 +1,17 @@ +name: get_prescription +inputs: + prescription_without_geom_input: ADMAgPrescriptionInput +output: + prescription_with_geom: ADMAgPrescription +parameters: + base_url: + client_id: + client_secret: + authority: + default_scope: +entrypoint: + file: get_prescription.py + callback_builder: CallbackBuilder +description: + short_description: + Get prescription using ADMAg API. \ No newline at end of file diff --git a/ops/admag/list_prescriptions.py b/ops/admag/list_prescriptions.py new file mode 100644 index 00000000..d88ed8f0 --- /dev/null +++ b/ops/admag/list_prescriptions.py @@ -0,0 +1,94 @@ +from typing import Any, Dict, List, Tuple + +from vibe_core.admag_client import ADMAgClient +from vibe_core.data import ADMAgPrescriptionInput, ADMAgSeasonalFieldInput + +API_VERSION = "2023-11-01-preview" + + +class CallbackBuilder: + def __init__( + self, + base_url: str, + client_id: str, + client_secret: str, + authority: str, + default_scope: str, + ): + self.admag_client = ADMAgClient( + base_url=base_url, + api_version=API_VERSION, + client_id=client_id, + client_secret=client_secret, + authority=authority, + default_scope=default_scope, + ) + + def get_prescriptions( + self, party_id: str, field_info: Dict[str, str], intersect_geometry: Dict[str, Any] + ) -> List[ADMAgPrescriptionInput]: + response = self.admag_client.get_prescription_map_id( + party_id=party_id, + field_id=field_info["fieldId"], + crop_id=field_info["cropId"], + ) + + prescription_map_id = None + for p_map in response["value"]: + if "properties" in p_map and "seasonal_field_id" in p_map["properties"]: + if p_map["properties"]["seasonal_field_id"] == field_info["seasonal_field_id"]: + prescription_map_id = p_map["id"] + break + + if not prescription_map_id: + raise ValueError("Prescription map not found") + + response = self.admag_client.get_prescriptions( + party_id, prescription_map_id, geometry=intersect_geometry + ) + + prescriptions = [] + + for value in response["value"]: + prescriptions.append( + ADMAgPrescriptionInput( + prescription_id=value["id"], + party_id=value["partyId"], + ) + ) + + return prescriptions + + def get_field_info( + self, party_id: str, seasonal_field_id: str + ) -> Tuple[Dict[str, str], Dict[str, Any]]: + response = self.admag_client.get_seasonal_field(party_id, seasonal_field_id) + field_info = { + "fieldId": response["fieldId"], + "cropId": response["cropId"], + "seasonId": response["seasonId"], + "createdDateTime": response["createdDateTime"], + "modifiedDateTime": response["modifiedDateTime"], + "seasonal_field_id": seasonal_field_id, + } + geometry = response["geometry"] + return field_info, geometry + + def prescriptions(self, user_input: ADMAgSeasonalFieldInput) -> List[ADMAgPrescriptionInput]: + field_info, geometry = self.get_field_info( + user_input.party_id, user_input.seasonal_field_id + ) + + list_prescriptions = self.get_prescriptions( + user_input.party_id, field_info, intersect_geometry=geometry + ) + return list_prescriptions + + def __call__(self): + def prescriptions_init( + admag_input: ADMAgSeasonalFieldInput, + ) -> Dict[str, List[ADMAgPrescriptionInput]]: + out_prescriptions = self.prescriptions(admag_input) + return {"prescriptions": out_prescriptions} + + return prescriptions_init diff --git a/ops/admag/list_prescriptions.yaml b/ops/admag/list_prescriptions.yaml new file mode 100644 index 00000000..dfc33027 --- /dev/null +++ b/ops/admag/list_prescriptions.yaml @@ -0,0 +1,16 @@ +name: get_prescription_map +inputs: + admag_input: ADMAgSeasonalFieldInput +output: + prescriptions: List[ADMAgPrescriptionInput] +parameters: + base_url: + client_id: + client_secret: + authority: + default_scope: +entrypoint: + callback_builder: CallbackBuilder + file: list_prescriptions.py +description: + short_description: List available prescriptions using prescription map. \ No newline at end of file diff --git a/ops/admag/prescriptions.py b/ops/admag/prescriptions.py new file mode 100644 index 00000000..06de40a3 --- /dev/null +++ b/ops/admag/prescriptions.py @@ -0,0 +1,107 @@ +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Tuple + +import pandas as pd +from geopandas.geodataframe import GeoDataFrame +from shapely.geometry import shape + +from vibe_core.admag_client import ADMAgClient +from vibe_core.data import ( + ADMAgPrescription, + ADMAgSeasonalFieldInput, + AssetVibe, + GeometryCollection, + gen_guid, + gen_hash_id, +) + +API_VERSION = "2023-11-01-preview" +DATE_FORMAT = "%Y-%m-%dT%H:%M:%S%z" + + +class CallbackBuilder: + def __init__( + self, + base_url: str, + client_id: str, + client_secret: str, + authority: str, + default_scope: str, + ): + self.temp_dir = TemporaryDirectory() + + self.admag_client = ADMAgClient( + base_url=base_url, + api_version=API_VERSION, + client_id=client_id, + client_secret=client_secret, + authority=authority, + default_scope=default_scope, + ) + + def get_prescriptions(self, prescriptions: List[ADMAgPrescription]) -> AssetVibe: + if not prescriptions: + raise ValueError("No prescriptions found") + + measures = [item.measurements for item in prescriptions] + geometry = [shape(item.geometry) for item in prescriptions] + df = pd.DataFrame(measures) + + for column in df.columns: + df[column] = df[column].apply(lambda x: x["value"]) # type: ignore + + df["geometry"] = geometry + + df = GeoDataFrame(data=df, geometry="geometry") # type: ignore + out_path = f"{self.temp_dir.name}/prescription.geojson" + df.to_file(out_path, driver="GeoJSON") + asset_vibe = AssetVibe(reference=out_path, type="application/json", id=gen_guid()) + return asset_vibe + + def get_field_info( + self, party_id: str, seasonal_field_id: str + ) -> Tuple[Dict[str, str], Dict[str, Any]]: + response = self.admag_client.get_seasonal_field(party_id, seasonal_field_id) + field_info = { + "fieldId": response["fieldId"], + "cropId": response["cropId"], + "seasonId": response["seasonId"], + "createdDateTime": response["createdDateTime"], + "modifiedDateTime": response["modifiedDateTime"], + } + geometry = response["geometry"] + return field_info, geometry + + def prescriptions( + self, user_input: ADMAgSeasonalFieldInput, prescriptions: List[ADMAgPrescription] + ) -> GeometryCollection: + field_info, geometry = self.get_field_info( + user_input.party_id, user_input.seasonal_field_id + ) + asset_vibe = self.get_prescriptions(prescriptions) + + time_range = ( + datetime.strptime(prescriptions[0].createdDateTime, DATE_FORMAT), + datetime.strptime(prescriptions[0].modifiedDateTime, DATE_FORMAT), + ) + return GeometryCollection( + id=gen_hash_id("heatmap_nutrients", geometry, time_range), + time_range=time_range, + geometry=geometry, + assets=[asset_vibe], + ) + + def __call__(self): + def prescriptions_init( + admag_input: ADMAgSeasonalFieldInput, + prescriptions_with_geom_input: List[ADMAgPrescription], + ) -> Dict[str, GeometryCollection]: + out_prescriptions = self.prescriptions(admag_input, prescriptions_with_geom_input) + return {"response": out_prescriptions} + + return prescriptions_init + + def __del__(self): + if self.temp_dir: + self.temp_dir.cleanup() diff --git a/ops/admag/prescriptions.yaml b/ops/admag/prescriptions.yaml new file mode 100644 index 00000000..b3f224ba --- /dev/null +++ b/ops/admag/prescriptions.yaml @@ -0,0 +1,18 @@ +name: prescriptions +inputs: + admag_input: ADMAgSeasonalFieldInput + prescriptions_with_geom_input: List[ADMAgPrescription] +output: + response: GeometryCollection +parameters: + base_url: + client_id: + client_secret: + authority: + default_scope: +entrypoint: + file: prescriptions.py + callback_builder: CallbackBuilder +version: 2 +description: + short_description: Downloads boundary and prescriptions linked to seasonal field from ADMAg data source. diff --git a/ops/admag/test_admag.py b/ops/admag/test_admag.py new file mode 100644 index 00000000..f21357a1 --- /dev/null +++ b/ops/admag/test_admag.py @@ -0,0 +1,1010 @@ +import copy +import json +import os +from typing import Any, Dict, List, cast +from unittest.mock import MagicMock, Mock, patch + +import geopandas as gpd +import pytest +from shapely import geometry as shpg + +from vibe_core.admag_client import ADMAgClient +from vibe_core.data import ( + ADMAgPrescription, + ADMAgPrescriptionInput, + ADMAgSeasonalFieldInput, + AssetVibe, +) +from vibe_dev.mock_utils import Request +from vibe_dev.testing.op_tester import OpTester + +HERE = os.path.dirname(os.path.abspath(__file__)) +ADMAG_SEASONAL_FIELD_OP = os.path.join(HERE, "admag_seasonal_field.yaml") + + +@pytest.fixture +@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") +def admag_client(get_token: MagicMock): + return ADMAgClient( + base_url="fake_url", + api_version="fake_admag_version", + client_id="fake_client_id", + client_secret="fake_client_secret", + authority="fake_authority", + default_scope="fake_scope", + ) + + +@pytest.fixture +def fake_get_response_without_next_link() -> Dict[str, Any]: + return { + "value": [ + { + "fake_key": "fake_value", + }, + ], + } + + +@pytest.fixture +def fake_get_response_with_next_link() -> Dict[str, Any]: + return { + "value": [ + { + "fake_key": "fake_value", + }, + ], + "nextLink": "http://fake-url", + } + + +@pytest.fixture +def fake_input_data() -> ADMAgSeasonalFieldInput: + return ADMAgSeasonalFieldInput( + party_id="fake-party-id", + seasonal_field_id="fake-seasonal-field-id", + ) + + +@pytest.fixture +def fake_prescription_input_data() -> ADMAgPrescriptionInput: + return ADMAgPrescriptionInput( + party_id="fake-party-id", + prescription_id="fake-prescription-id", + ) + + +@patch.object(ADMAgClient, "_request") +def test_admag_client_get_limit_requests( + _request: MagicMock, + monkeypatch: pytest.MonkeyPatch, + admag_client: ADMAgClient, + fake_get_response_with_next_link: Dict[str, Any], + fake_get_response_without_next_link: Dict[str, Any], +): + fake_response_different_link = fake_get_response_with_next_link.copy() + fake_response_different_link.update({"nextLink": "different_fake_link"}) + fake_response_another_link = fake_get_response_with_next_link.copy() + fake_response_another_link.update({"nextLink": "another_fake_link"}) + + monkeypatch.setattr(ADMAgClient, "NEXT_PAGES_LIMIT", 1) + _request.side_effect = [ + fake_get_response_with_next_link, + fake_response_different_link, + fake_get_response_without_next_link, + ] + + with pytest.raises(RuntimeError): + admag_client._get("fake_url") + + +@patch.object(ADMAgClient, "_request") +def test_admag_client_get_repeated_link( + _request: MagicMock, + admag_client: ADMAgClient, + fake_get_response_with_next_link: Dict[str, Any], + fake_get_response_without_next_link: Dict[str, Any], +): + _request.side_effect = [ + fake_get_response_with_next_link, + fake_get_response_with_next_link, + fake_get_response_without_next_link, + ] + + with pytest.raises(RuntimeError): + admag_client._get("fake_url") + + +@patch.object(ADMAgClient, "_request") +def test_admag_client_get_follow_link( + _request: MagicMock, + admag_client: ADMAgClient, + fake_get_response_with_next_link: Dict[str, Any], + fake_get_response_without_next_link: Dict[str, Any], +): + fake_response_different_link = fake_get_response_with_next_link.copy() + fake_response_different_link.update({"nextLink": "different_fake_link"}) + _request.side_effect = [ + fake_get_response_with_next_link, + fake_response_different_link, + fake_get_response_without_next_link, + ] + + result = admag_client._get("fake_url") + assert len(result["value"]) == 3 + + +def test_admag_client_creation(admag_client: ADMAgClient): + assert admag_client.header() == { + "Authorization": "Bearer my_fake_token", + "Content-Type": "application/merge-patch+json", + } + + +@pytest.fixture +def seasonal_field_info(vibe_geometry_dict: Dict[str, Any]) -> Dict[str, Any]: + return { + "partyId": "fake-party-id", + "farmId": "fake-farm-id", + "fieldId": "fake-field-id", + "seasonId": "fake-season-id", + "cropId": "fake-crop-id", + "id": "fake-seasonal-field-id", + "eTag": "fake-etag", + "status": "Active", + "createdDateTime": "2001-01-01T00:00:00Z", + "modifiedDateTime": "2001-01-01T00:00:00Z", + "name": "fake-seasonal-field-name", + "description": "fake-description", + "geometry": vibe_geometry_dict, + "properties": { + "plantingDateTime": "2001-01-01T00:00:00Z", + }, + } + + +@patch("vibe_core.admag_client.ADMAgClient._get") +def test_get_seasonal_field( + _get: MagicMock, seasonal_field_info: Dict[str, Any], admag_client: ADMAgClient +): + _get.return_value = seasonal_field_info + seasonal_field_result = admag_client.get_seasonal_field( + party_id="fake-party-id", + seasonal_field_id="fake-seasonal-field-id", + ) + assert seasonal_field_result + assert "name" in seasonal_field_result + assert "description" in seasonal_field_result + assert "geometry" in seasonal_field_result + + +@pytest.fixture +def season_info() -> Dict[str, Any]: + return { + "startDateTime": "2001-01-01T00:00:00Z", + "endDateTime": "2001-12-31T00:00:00Z", + "year": 2001, + "id": "fake-season-id", + "eTag": "fake-etag", + "status": "Active", + "createdDateTime": "2001-01-01T00:00:00Z", + "modifiedDateTime": "2001-01-01T00:00:00Z", + "name": "fake-season-name", + } + + +@patch("vibe_core.admag_client.ADMAgClient._get") +def test_get_season(_get: MagicMock, season_info: Dict[str, Any], admag_client: ADMAgClient): + _get.return_value = season_info + season_result = admag_client.get_season( + season_id="fake-season-id", + ) + assert season_result + assert "startDateTime" in season_result + assert "endDateTime" in season_result + assert "year" in season_result + + +@pytest.fixture +def field_info(vibe_geometry_dict: Dict[str, Any]) -> Dict[str, Any]: + return { + "partyId": "fake-party-id", + "farmId": "fake-farm-id", + "geometry": vibe_geometry_dict, + "eTag": "fake-etag", + "id": "fake-field-id", + "status": "Active", + "createdDateTime": "2001-01-01T00:00:00Z", + "modifiedDateTime": "2001-01-01T00:00:00Z", + "name": "fake-field-name", + "description": "Fake description", + "properties": { + "pre_1980": "Lowland Non-Irrigate...Pre 1980s)", + "crp_type": "None", + "crp_start": "", + "crp_end": "", + "year_1980_2000": "Irrigated: Continuous Hay", + "year_1980_2000_tillage": "Intensive Tillage", + }, + } + + +@pytest.fixture +def prescription_geom_input() -> List[ADMAgPrescription]: + prescription = { + "partyId": "ae880a1b-4597-46d7-83ac-bfc6a1ae4116-16", + "prescriptionMapId": "831989c4-c15a-4fc5-837b-4c0289d53010", + "productCode": "1635", + "productName": "Nutrient", + "type": "Nutrient", + "measurements": { + "N": {"value": 47.1}, + "P": {"value": 34.99769206227461}, + "pH": {"value": 4.978131831743143}, + "C": {"value": 0.046408031802193}, + }, + "id": "880094d0-1c48-4d7c-b0d3-f7477a937473", + "eTag": "24009696-0000-0100-0000-65fb20540000", + "status": "Active", + "createdDateTime": "2024-03-20T17:43:48Z", + "modifiedDateTime": "2024-03-20T17:43:48Z", + "source": "IOT device", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-117.03642546099948, 47.044663835752566], + [-117.05642546099949, 47.044663835752566], + [-117.05642546099949, 47.02466383575257], + [-117.03642546099948, 47.02466383575257], + [-117.03642546099948, 47.044663835752566], + ] + ], + }, + "name": "Nitrogen Nutrient", + "description": "", + "createdBy": "f8c6c349-b484-4863-af76-d10eee669306", + "modifiedBy": "f8c6c349-b484-4863-af76-d10eee669306", + } + + return [ADMAgPrescription(**prescription)] + + +@patch("vibe_core.admag_client.ADMAgClient._get") +def test_get_field(_get: MagicMock, field_info: Dict[str, Any], admag_client: ADMAgClient): + _get.return_value = field_info + field_result = admag_client.get_field( + party_id="fake-party-id", + field_id="fake-field-id", + ) + assert field_result + assert "properties" in field_result + properties = field_result["properties"] + assert "pre_1980" in properties + assert "crp_type" in properties + assert "crp_start" in properties + assert "crp_end" in properties + assert "year_1980_2000" in properties + assert "year_1980_2000_tillage" in properties + + +@pytest.fixture +def harvest_result(vibe_geometry_dict: Dict[str, Any]) -> Dict[str, Any]: + return { + "value": [ + { + "geometry": vibe_geometry_dict, + "attachmentsLink": "https://fake-attachment.bla", + "createdDateTime": "2021-12-10T00:18:33Z", + "eTag": "5500c45e-0000-0100-0000-61b29cd90000", + "partyId": "fake-party-id", + "id": "fake-harvest-id", + "modifiedDateTime": "2021-12-10T00:18:33Z", + "operationEndDateTime": "2001-09-05T00:00:00Z", + "operationStartDateTime": "2001-09-05T00:00:00Z", + "properties": {"gfsrt": "True", "strawStoverHayRemoval": "0"}, + "source": "Farming", + "status": "Active", + "totalYield": {"unit": "tons", "value": 39.0}, + }, + ] + } + + +@pytest.fixture +def planting_result(vibe_geometry_dict: Dict[str, Any]) -> Dict[str, Any]: + return { + "value": [ + { + "partyId": "fake-party-id", + "id": "fake-id", + "source": "Manual", + "name": "Planting data for North Farm", + "description": "some description", + "status": "Active", + "operationStartDateTime": "2021-02-25T16:57:04Z", + "operationEndDateTime": "2021-02-27T10:13:06Z", + "operationModifiedDateTime": "2021-02-28T10:14:12Z", + "avgPlantingRate": {"unit": "seedsperacre", "value": 30}, + "area": {"unit": "acre", "value": 30}, + "totalMaterial": {"unit": "seeds", "value": 758814}, + "avgMaterial": {"unit": "seedsperacre", "value": 25293}, + "plantingProductDetails": [ + { + "productName": "VAR1", + "area": {"unit": "acre", "value": 20}, + "totalMaterial": {"unit": "seeds", "value": 389214}, + "avgMaterial": {"unit": "seedsperacre", "value": 19460}, + } + ], + "properties": {"Region": "Europe", "CountyCode": 123}, + "createdDateTime": "2022-05-11T07:00:10.2750191Z", + "modifiedDateTime": "2022-05-11T07:00:10.2750191Z", + "eTag": "cb00a3ac-0000-0100-0000-601d21ec0000", + }, + ] + } + + +@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") +@patch("vibe_core.admag_client.ADMAgClient._post") +def test_get_harvest_info( + _post: MagicMock, + get_token: MagicMock, + harvest_result: Dict[str, Any], + admag_client: ADMAgClient, + vibe_geometry_dict: Dict[str, Any], +): + _post.return_value = harvest_result + harvest_result = admag_client.get_harvest_info( + party_id="fake-party-id", + intersects_with_geometry=vibe_geometry_dict, + min_start_operation="2001-01-01T00:00:00Z", + max_end_operation="2001-01-01T00:00:00Z", + associated_resource={"type": "SeasonalField", "id": "fake-seasonal-field-id"}, + ) + assert "value" in harvest_result + harvest_list = harvest_result["value"] + assert len(harvest_result) > 0 + harvest_entry = harvest_list[0] + assert "operationStartDateTime" in harvest_entry + assert "operationEndDateTime" in harvest_entry + assert "properties" in harvest_entry + harvest_properties = harvest_entry["properties"] + assert "gfsrt" in harvest_properties + assert "strawStoverHayRemoval" in harvest_properties + assert "totalYield" in harvest_entry + harvest_yield = harvest_entry["totalYield"] + assert "value" in harvest_yield + + +@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") +@patch("vibe_core.admag_client.ADMAgClient.get_field") +@patch("vibe_core.admag_client.ADMAgClient.get_seasonal_field") +@patch("vibe_core.admag_client.ADMAgClient.get_season") +@patch("vibe_core.admag_client.ADMAgClient.get_harvest_info") +@patch("vibe_core.admag_client.ADMAgClient.get_fertilizer_info") +@patch("vibe_core.admag_client.ADMAgClient.get_tillage_info") +@patch("vibe_core.admag_client.ADMAgClient.get_organic_amendments_info") +def test_admag_incomplete_fertilizer( + get_organic_amendments_info: MagicMock, + get_tillage_info: MagicMock, + get_fertilizer_info: MagicMock, + get_harvest_info: MagicMock, + get_season: MagicMock, + get_seasonal_field: MagicMock, + get_field: MagicMock, + get_token: MagicMock, + seasonal_field_info: Dict[str, Any], + field_info: Dict[str, Any], + season_info: Dict[str, Any], + harvest_result: Dict[str, Any], + fertilizer_result: Dict[str, Any], + tillage_result: Dict[str, Any], + omad_result: Dict[str, Any], + fake_input_data: ADMAgSeasonalFieldInput, +): + get_seasonal_field.return_value = seasonal_field_info + get_field.return_value = field_info + get_season.return_value = season_info + get_harvest_info.return_value = harvest_result + get_tillage_info.return_value = tillage_result + get_organic_amendments_info.return_value = omad_result + + fertilizer_missing_total_N = copy.deepcopy(fertilizer_result) + fertilizer_missing_total_N["value"][0]["properties"].pop("totalNitrogen") + get_fertilizer_info.return_value = fertilizer_missing_total_N + + with pytest.raises(ValueError): + OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) + + fertilizer_missing_eep = copy.deepcopy(fertilizer_result) + fertilizer_missing_eep["value"][0]["properties"].pop("eep") + get_fertilizer_info.return_value = fertilizer_missing_eep + + with pytest.raises(ValueError): + OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) + + fertilizer_wrong_eep = copy.deepcopy(fertilizer_result) + fertilizer_wrong_eep["value"][0]["properties"]["eep"] = "fake-eep" + get_fertilizer_info.return_value = fertilizer_wrong_eep + + with pytest.raises(ValueError): + OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) + + +@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") +@patch("vibe_core.admag_client.ADMAgClient.get_field") +@patch("vibe_core.admag_client.ADMAgClient.get_seasonal_field") +@patch("vibe_core.admag_client.ADMAgClient.get_season") +@patch("vibe_core.admag_client.ADMAgClient.get_harvest_info") +@patch("vibe_core.admag_client.ADMAgClient.get_fertilizer_info") +@patch("vibe_core.admag_client.ADMAgClient.get_tillage_info") +@patch("vibe_core.admag_client.ADMAgClient.get_organic_amendments_info") +def test_admag_incomplete_harvest( + get_organic_amendments_info: MagicMock, + get_tillage_info: MagicMock, + get_fertilizer_info: MagicMock, + get_harvest_info: MagicMock, + get_season: MagicMock, + get_seasonal_field: MagicMock, + get_field: MagicMock, + _: MagicMock, + seasonal_field_info: Dict[str, Any], + field_info: Dict[str, Any], + season_info: Dict[str, Any], + harvest_result: Dict[str, Any], + fertilizer_result: Dict[str, Any], + tillage_result: Dict[str, Any], + omad_result: Dict[str, Any], + fake_input_data: ADMAgSeasonalFieldInput, +): + get_seasonal_field.return_value = seasonal_field_info + get_field.return_value = field_info + get_season.return_value = season_info + get_fertilizer_info.return_value = fertilizer_result + get_tillage_info.return_value = tillage_result + get_organic_amendments_info.return_value = omad_result + + # Don't remove code, it may required for different crop + # harvest_missing_gfsrt = copy.deepcopy(harvest_result) + # harvest_missing_gfsrt["value"][0]["properties"].pop("gfsrt") + + # get_harvest_info.return_value = harvest_missing_gfsrt + + with pytest.raises(ValueError): + OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) + + # Don't remove code, it may required for different crop + # harvest_missing_straw_stover_hay_removal = copy.deepcopy(harvest_result) + # harvest_missing_straw_stover_hay_removal["value"][0]["properties"].pop( + # "strawStoverHayRemoval" + # ) + # get_harvest_info.return_value = harvest_missing_straw_stover_hay_removal + + with pytest.raises(ValueError): + OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) + + +@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") +@patch("vibe_core.admag_client.ADMAgClient.get_field") +@patch("vibe_core.admag_client.ADMAgClient.get_seasonal_field") +@patch("vibe_core.admag_client.ADMAgClient.get_season") +@patch("vibe_core.admag_client.ADMAgClient.get_harvest_info") +@patch("vibe_core.admag_client.ADMAgClient.get_fertilizer_info") +@patch("vibe_core.admag_client.ADMAgClient.get_tillage_info") +@patch("vibe_core.admag_client.ADMAgClient.get_organic_amendments_info") +def test_admag_incomplete_organic_amendments( + get_organic_amendments_info: MagicMock, + get_tillage_info: MagicMock, + get_fertilizer_info: MagicMock, + get_harvest_info: MagicMock, + get_season: MagicMock, + get_seasonal_field: MagicMock, + get_field: MagicMock, + _: MagicMock, + seasonal_field_info: Dict[str, Any], + field_info: Dict[str, Any], + season_info: Dict[str, Any], + harvest_result: Dict[str, Any], + fertilizer_result: Dict[str, Any], + tillage_result: Dict[str, Any], + omad_result: Dict[str, Any], + fake_input_data: ADMAgSeasonalFieldInput, +): + get_seasonal_field.return_value = seasonal_field_info + get_field.return_value = field_info + get_season.return_value = season_info + get_harvest_info.return_value = harvest_result + get_fertilizer_info.return_value = fertilizer_result + get_tillage_info.return_value = tillage_result + + organic_amendments_missing_type = copy.deepcopy(omad_result) + organic_amendments_missing_type["value"][0]["properties"].pop("type") + get_organic_amendments_info.return_value = organic_amendments_missing_type + + with pytest.raises(ValueError): + OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) + + organic_amendments_missing_amount = copy.deepcopy(omad_result) + organic_amendments_missing_amount["value"][0]["properties"].pop("amount") + get_organic_amendments_info.return_value = organic_amendments_missing_amount + + with pytest.raises(ValueError): + OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) + + organic_amendments_missing_percentN = copy.deepcopy(omad_result) + organic_amendments_missing_percentN["value"][0]["properties"].pop("percentN") + get_organic_amendments_info.return_value = organic_amendments_missing_percentN + + with pytest.raises(ValueError): + OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) + + organic_amendments_missing_CNratio = copy.deepcopy(omad_result) + organic_amendments_missing_CNratio["value"][0]["properties"].pop("CNratio") + get_organic_amendments_info.return_value = organic_amendments_missing_CNratio + + with pytest.raises(ValueError): + OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) + + +@pytest.fixture +def fertilizer_result() -> Dict[str, Any]: + return { + "value": [ + { + "totalMaterial": {"unit": "tons/ac", "value": 5.0}, + "operationStartDateTime": "2000-01-01T00:00:00Z", + "operationEndDateTime": "2000-01-01T00:00:00Z", + "attachmentsLink": "http://fake-url.com/attachments", + "partyId": "fake-party-id", + "id": "fake-fertilizer-id", + "eTag": "fake-etag", + "createdDateTime": "2021-12-10T00:03:37Z", + "modifiedDateTime": "2021-12-10T00:03:37Z", + "source": "Fertilizer", + "name": "Ammonium Nitrate (34-0-0)", + "properties": { + "eep": "None", + "totalNitrogen": 4.0, + "method": "Surface Band / Sidedress", + }, + } + ], + "nextLink": "https://fake-next-link.com", + } + + +@patch("vibe_core.admag_client.ADMAgClient._post") +def test_get_fertilizer_info( + _post: MagicMock, + fertilizer_result: Dict[str, Any], + admag_client: ADMAgClient, + vibe_geometry_dict: Dict[str, Any], +): + _post.return_value = fertilizer_result + fertilizer_result = admag_client.get_fertilizer_info( + party_id="fake-party-id", + intersects_with_geometry=vibe_geometry_dict, + min_start_operation="2001-01-01T00:00:00Z", + max_end_operation="2001-01-01T00:00:00Z", + associated_resource={"type": "SeasonalField", "id": "fake-seasonal_field-id"}, + ) + assert "value" in fertilizer_result + fertilizer_list = fertilizer_result["value"] + assert len(fertilizer_result) > 0 + fertilizer_entry = fertilizer_list[0] + assert "operationStartDateTime" in fertilizer_entry + assert "operationEndDateTime" in fertilizer_entry + assert "name" in fertilizer_entry + fertilizer_properties = fertilizer_entry["properties"] + assert "totalNitrogen" in fertilizer_properties + assert "eep" in fertilizer_properties + + +@pytest.fixture +def tillage_result(vibe_geometry_dict: Dict[str, Any]) -> Dict[str, Any]: + return { + "value": [ + { + "geometry": vibe_geometry_dict, + "attachmentsLink": "fake-attachment-link", + "createdDateTime": "2021-12-10T00:18:33Z", + "eTag": "fake-etag", + "partyId": "fake-party-id", + "id": "fake-tillage-id", + "modifiedDateTime": "2021-12-10T00:18:33Z", + "name": "Fake Tillage", + "operationEndDateTime": "2001-01-01T00:00:00Z", + "operationStartDateTime": "2001-01-01T00:00:00Z", + "source": "fake-source", + "status": "Active", + }, + ] + } + + +@patch("vibe_core.admag_client.ADMAgClient._post") +def test_get_tillage_info( + _post: MagicMock, + tillage_result: Dict[str, Any], + admag_client: ADMAgClient, + vibe_geometry_dict: Dict[str, Any], +): + _post.return_value = tillage_result + tillage_result = admag_client.get_tillage_info( + party_id="fake-party-id", + intersects_with_geometry=vibe_geometry_dict, + min_start_operation="2001-01-01T00:00:00Z", + max_end_operation="2001-01-01T00:00:00Z", + associated_resource={"type": "SeasonalField", "id": "fake-seasonal_field-id"}, + ) + assert "value" in tillage_result + tillage_list = tillage_result["value"] + assert len(tillage_result) > 0 + tillage_entry = tillage_list[0] + assert "operationStartDateTime" in tillage_entry + assert "operationEndDateTime" in tillage_entry + assert "name" in tillage_entry + + +@pytest.fixture +def omad_result(vibe_geometry_dict: Dict[str, Any]) -> Dict[str, Any]: + return { + "value": [ + { + "geometry": vibe_geometry_dict, + "attachmentsLink": "fake-attachment-link", + "createdDateTime": "2021-12-10T00:18:33Z", + "eTag": "fake-etag", + "partyId": "fake-party-id", + "id": "fake-tillage-id", + "modifiedDateTime": "2021-12-10T00:18:33Z", + "name": "Fake Tillage", + "operationEndDateTime": "2001-01-01T00:00:00Z", + "operationStartDateTime": "2001-01-01T00:00:00Z", + "source": "fake-source", + "status": "Active", + "properties": { + "type": "fake-omad-tyoe", + "amount": "100", + "percentN": "200", + "CNratio": "0.05", + }, + }, + ] + } + + +@patch("vibe_core.admag_client.ADMAgClient._post") +def test_get_organic_amendments_info( + _post: MagicMock, + omad_result: Dict[str, Any], + admag_client: ADMAgClient, + vibe_geometry_dict: Dict[str, Any], +): + _post.return_value = omad_result + omad_result = admag_client.get_organic_amendments_info( + party_id="fake-party-id", + intersects_with_geometry=vibe_geometry_dict, + min_start_operation="2001-01-01T00:00:00Z", + max_end_operation="2001-01-01T00:00:00Z", + associated_resource={"type": "SeasonalField", "id": "fake-seasonal_field-id"}, + ) + assert "value" in omad_result + omad_list = omad_result["value"] + assert len(omad_result) > 0 + omad_entry = omad_list[0] + assert "operationStartDateTime" in omad_entry + assert "operationEndDateTime" in omad_entry + assert "properties" in omad_entry + omad_properties = omad_entry["properties"] + assert "type" in omad_properties + assert "amount" in omad_properties + assert "percentN" in omad_properties + assert "CNratio" in omad_properties + + +@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") +@patch("vibe_core.admag_client.ADMAgClient.get_field") +@patch("vibe_core.admag_client.ADMAgClient.get_seasonal_field") +@patch("vibe_core.admag_client.ADMAgClient.get_season") +@patch("vibe_core.admag_client.ADMAgClient.get_harvest_info") +@patch("vibe_core.admag_client.ADMAgClient.get_fertilizer_info") +@patch("vibe_core.admag_client.ADMAgClient.get_tillage_info") +@patch("vibe_core.admag_client.ADMAgClient.get_organic_amendments_info") +@patch("vibe_core.admag_client.ADMAgClient.get_planting_info") +def test_admag_op( + get_planting_info: MagicMock, + get_organic_amendments_info: MagicMock, + get_tillage_info: MagicMock, + get_fertilizer_info: MagicMock, + get_harvest_info: MagicMock, + get_season: MagicMock, + get_seasonal_field: MagicMock, + get_field: MagicMock, + get_token: MagicMock, + seasonal_field_info: Dict[str, Any], + field_info: Dict[str, Any], + season_info: Dict[str, Any], + harvest_result: Dict[str, Any], + fertilizer_result: Dict[str, Any], + tillage_result: Dict[str, Any], + omad_result: Dict[str, Any], + planting_result: Dict[str, Any], + fake_input_data: ADMAgSeasonalFieldInput, +): + get_seasonal_field.return_value = seasonal_field_info + get_field.return_value = field_info + get_season.return_value = season_info + get_harvest_info.return_value = harvest_result + get_fertilizer_info.return_value = fertilizer_result + get_tillage_info.return_value = tillage_result + get_organic_amendments_info.return_value = omad_result + get_planting_info.return_value = planting_result + + output_data = OpTester(ADMAG_SEASONAL_FIELD_OP).run(admag_input=fake_input_data) + assert output_data + + +@pytest.fixture +def vibe_geometry_dict() -> Dict[str, Any]: + farm_boundary = { + "type": "FeatureCollection", + "name": "small_block_new_new", + "crs": { + "type": "name", + "properties": {"name": "urn:ogc:def:crs:OGC:1.3:CRS84"}, + }, + "features": [ + { + "type": "Feature", + "properties": {"id": 1}, + "geometry": { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [-117.046717186923388, 47.036308491044693], + [-117.04260145498948, 47.036329968998508], + [-117.042643698734992, 47.034569687054848], + [-117.046686589954575, 47.034558181995273], + [-117.046717186923388, 47.036308491044693], + ] + ] + ], + }, + } + ], + } + data_frame = gpd.read_file(json.dumps(farm_boundary), driver="GeoJSON") + + if not data_frame.empty: + geometry = shpg.mapping(data_frame["geometry"][0]) # type: ignore + return geometry + else: + raise Exception("No geometry found in farm boundary") + + +@pytest.fixture +def admag_prescriptions() -> Request: + data = { + "value": [ + { + "partyId": "ae880a1b-4597-46d7-83ac-bfc6a1ae4116-16", + "prescriptionMapId": "831989c4-c15a-4fc5-837b-4c0289d53010", + "productCode": "1635", + "productName": "Nutrient", + "type": "Nutrient", + "measurements": { + "N": {"value": 47.1}, + "P": {"value": 34.99769206227461}, + "pH": {"value": 4.978131831743143}, + "C": {"value": 0.046408031802193}, + }, + "id": "880094d0-1c48-4d7c-b0d3-f7477a937473", + "eTag": "24009696-0000-0100-0000-65fb20540000", + "status": "Active", + "createdDateTime": "2024-03-20T17:43:48Z", + "modifiedDateTime": "2024-03-20T17:43:48Z", + "source": "IOT device", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-117.03642546099948, 47.044663835752566], + [-117.05642546099949, 47.044663835752566], + [-117.05642546099949, 47.02466383575257], + [-117.03642546099948, 47.02466383575257], + [-117.03642546099948, 47.044663835752566], + ] + ], + }, + "name": "Nitrogen Nutrient", + "description": "", + "createdBy": "f8c6c349-b484-4863-af76-d10eee669306", + "modifiedBy": "f8c6c349-b484-4863-af76-d10eee669306", + } + ] + } + data = Request(**{"text": json.dumps(data)}) + return data + + +@pytest.fixture +def admag_prescriptions_dict() -> Request: + data = { + "partyId": "ae880a1b-4597-46d7-83ac-bfc6a1ae4116-16", + "prescriptionMapId": "831989c4-c15a-4fc5-837b-4c0289d53010", + "productCode": "1635", + "productName": "Nutrient", + "type": "Nutrient", + "measurements": { + "N": {"value": 47.1}, + "P": {"value": 34.99769206227461}, + "pH": {"value": 4.978131831743143}, + "C": {"value": 0.046408031802193}, + }, + "id": "880094d0-1c48-4d7c-b0d3-f7477a937473", + "eTag": "24009696-0000-0100-0000-65fb20540000", + "status": "Active", + "createdDateTime": "2024-03-20T17:43:48Z", + "modifiedDateTime": "2024-03-20T17:43:48Z", + "source": "IOT device", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-117.03642546099948, 47.044663835752566], + [-117.05642546099949, 47.044663835752566], + [-117.05642546099949, 47.02466383575257], + [-117.03642546099948, 47.02466383575257], + [-117.03642546099948, 47.044663835752566], + ] + ], + }, + "name": "Nitrogen Nutrient", + "description": "", + "createdBy": "f8c6c349-b484-4863-af76-d10eee669306", + "modifiedBy": "f8c6c349-b484-4863-af76-d10eee669306", + } + + data = Request(**{"text": json.dumps(data)}) + return data + + +@pytest.fixture +def admag_get_field_info() -> Request: + data = { + "fieldId": "63c94ae9-b0b6-46b7-8e65-311b9b44191f", + "cropId": "ae600a8a-3011-4d7c-8146-1f039ba619d0", + "seasonId": "ae600a8a-3011-4d7c-8146-1f039ba619d0", + "createdDateTime": "2021-03-21T01:37:06Z", + "modifiedDateTime": "2021-03-21T01:37:06Z", + "seasonal_field_id": "", + } + + data = Request(**{"text": json.dumps(data)}) + return data + + +@pytest.fixture +def admag_get_prescription_map_id() -> Request: + data = { + "value": [ + { + "partyId": "ae880a1b-4597-46d7-83ac-bfc6a1ae4116-16", + "type": "Soil Nutrient Map", + "seasonId": "ae600a8a-3011-4d7c-8146-1f039ba619d0-16", + "cropId": "d4c8427b-4540-4c05-82f6-27c771e48b7c", + "fieldId": "04b1d9f6-7444-4df5-b468-9a4e4c96314e-16", + "id": "831989c4-c15a-4fc5-837b-4c0289d53050", + "eTag": "8400e17b-0000-0100-0000-660075240000", + "status": "Active", + "createdDateTime": "2024-03-21T14:48:27Z", + "modifiedDateTime": "2024-03-24T18:47:00Z", + "source": "IOT devices", + "name": "Prescription test Map", + "description": "Farmbeats Agriculture research", + "createdBy": "f8c6c349-b484-4863-af76-d10eee669306", + "modifiedBy": "255a13c4-c1e0-4ac9-9e60-5139b3f8e0a3", + "properties": {"seasonal_field_id": "fake-seasonal-field-id"}, + } + ] + } + data = Request(**{"text": json.dumps(data)}) + return data + + +@pytest.fixture +def admag_seasonal_field_info(seasonal_field_info: Dict[str, Any]) -> Request: + data = Request(**{"text": json.dumps(seasonal_field_info)}) + return data + + +@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") +@patch("requests.Session.request") +def test_prescriptions( + session_mock: Mock, + _: MagicMock, + admag_prescriptions: str, + admag_seasonal_field_info: str, + fake_input_data: ADMAgSeasonalFieldInput, + prescription_geom_input: List[ADMAgPrescription], +): + session_mock.side_effect = [ + admag_seasonal_field_info, + admag_prescriptions, + ] + parameters = { + "base_url": "base_url", + "client_id": "client_id", + "client_secret": "client_secret", + "authority": "authority", + "default_scope": "default_scope", + } + CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "prescriptions.yaml") + op_ = OpTester(CONFIG_PATH) + op_.update_parameters(parameters) + output_data = op_.run( + admag_input=fake_input_data, + prescriptions_with_geom_input=prescription_geom_input, # type: ignore + ) + assets = cast(List[AssetVibe], output_data["response"].assets) # type: ignore + assert len(assets[0].path_or_url) > 0 + + +@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") +@patch("requests.Session.request") +def test_list_prescriptions( + session_mock: Mock, + _: MagicMock, + admag_prescriptions: str, + admag_get_prescription_map_id: str, + admag_seasonal_field_info: str, + fake_input_data: ADMAgSeasonalFieldInput, +): + session_mock.side_effect = [ + admag_seasonal_field_info, + admag_get_prescription_map_id, + admag_prescriptions, + ] + parameters = { + "base_url": "base_url", + "client_id": "client_id", + "client_secret": "client_secret", + "authority": "authority", + "default_scope": "default_scope", + } + CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "list_prescriptions.yaml" + ) + op_ = OpTester(CONFIG_PATH) + op_.update_parameters(parameters) + output_data = op_.run(admag_input=fake_input_data) + assert "prescriptions" in output_data + + +@patch("vibe_core.admag_client.ADMAgClient.get_token", return_value="my_fake_token") +@patch("requests.Session.request") +def test_get_prescriptions( + session_mock: Mock, + _: MagicMock, + admag_prescriptions_dict: str, + fake_prescription_input_data: ADMAgPrescriptionInput, +): + session_mock.side_effect = [ + admag_prescriptions_dict, + ] + parameters = { + "base_url": "base_url", + "client_id": "client_id", + "client_secret": "client_secret", + "authority": "authority", + "default_scope": "default_scope", + } + CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "get_prescription.yaml") + op_ = OpTester(CONFIG_PATH) + op_.update_parameters(parameters) + output_data = op_.run(prescription_without_geom_input=fake_prescription_input_data) + assert "prescription_with_geom" in output_data diff --git a/ops/aggregate_statistics_timeseries/aggregate_statistics_timeseries.yaml b/ops/aggregate_statistics_timeseries/aggregate_statistics_timeseries.yaml new file mode 100644 index 00000000..1d76494d --- /dev/null +++ b/ops/aggregate_statistics_timeseries/aggregate_statistics_timeseries.yaml @@ -0,0 +1,15 @@ +name: aggregate_statistics_timeseries +inputs: + stats: List[DataSummaryStatistics] +output: + timeseries: List[TimeSeries] +parameters: + masked_thr: .8 +entrypoint: + file: aggregate_timeseries.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - masked_thr +description: + short_description: Aggregates list of summary statistics into a timeseries. \ No newline at end of file diff --git a/ops/aggregate_statistics_timeseries/aggregate_timeseries.py b/ops/aggregate_statistics_timeseries/aggregate_timeseries.py new file mode 100644 index 00000000..d353fb3b --- /dev/null +++ b/ops/aggregate_statistics_timeseries/aggregate_timeseries.py @@ -0,0 +1,51 @@ +import os +from tempfile import TemporaryDirectory +from typing import Dict, List, cast + +import pandas as pd + +from vibe_core.data import AssetVibe, DataSummaryStatistics, TimeSeries, gen_guid + + +class CallbackBuilder: + def __init__(self, masked_thr: float): + self.tmp_dir = TemporaryDirectory() + self.masked_thr = masked_thr + + def __call__(self): + def callback(stats: List[DataSummaryStatistics]) -> Dict[str, List[TimeSeries]]: + df = pd.concat( + cast( + List[pd.DataFrame], + [ + pd.read_csv(s.assets[0].url, index_col="date", parse_dates=True) + for s in stats + ], + ) + ) + assert df is not None, "DataFrame is None, that should not happen" + # Filter out items above threshold + df = cast(pd.DataFrame, df[df["masked_ratio"] <= self.masked_thr]) # type: ignore + if df.empty: + raise RuntimeError( + f"No available data with less than {self.masked_thr:.1%} masked data" + ) + df.sort_index(inplace=True) + guid = gen_guid() + filepath = os.path.join(self.tmp_dir.name, f"{guid}.csv") + df.to_csv(filepath) + min_date = df.index.min().to_pydatetime() # type: ignore + max_date = df.index.max().to_pydatetime() # type: ignore + timeseries = TimeSeries( + gen_guid(), + time_range=(min_date, max_date), # type: ignore + geometry=stats[0].geometry, + assets=[AssetVibe(reference=filepath, type="text/csv", id=guid)], + ) + + return {"timeseries": [timeseries]} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/carbon_local/test_whatif.py b/ops/carbon_local/test_whatif.py new file mode 100644 index 00000000..edab312e --- /dev/null +++ b/ops/carbon_local/test_whatif.py @@ -0,0 +1,298 @@ +import os +from datetime import datetime +from typing import List +from unittest.mock import Mock, patch + +import pytest +from pyngrok.exception import PyngrokError + +from vibe_core.data import CarbonOffsetInfo, SeasonalFieldInformation +from vibe_dev.testing.op_tester import OpTester + + +@pytest.fixture +def baseline_information(): + field_info = [ + { + "id": "25e96fa0-9cf8-4b31-ac9e-24e30c37aeaf", + "time_range": [ + datetime(year=2020, month=2, day=15), + datetime(year=2023, month=9, day=15), + ], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-87.414918, 37.463251], + [-87.399025, 37.470226], + [-87.393499, 37.472569], + [-87.39827, 37.479898], + [-87.405993, 37.478046], + [-87.407538, 37.47761], + [-87.408122, 37.477501], + [-87.408636, 37.477092], + [-87.409048, 37.476602], + [-87.414918, 37.463251], + ] + ], + }, + "assets": [], + "crop_name": "Alfalfa", + "crop_type": "annual", + "properties": { + "pre_1980": "Lowland Non-Irrigated (Pre 1980s)", + "crp_type": "None", + "crp_start": "", + "crp_end": "", + "year_1980_2000": "Irrigated: Continuous Hay", + "year_1980_2000_tillage": "Intensive Tillage", + }, + "fertilizers": [], + "harvests": [ + { + "is_grain": True, + "start_date": "2000-09-05T00:00:00Z", + "end_date": "2000-09-05T00:00:00Z", + "crop_yield": 39.0, + "stray_stover_hay_removal": "0", + }, + ], + "tillages": [ + { + "start_date": "2000-01-01T00:00:00Z", + "end_date": "2000-01-01T00:00:00Z", + "implement": "Reduced Tillage", + } + ], + "organic_amendments": [], + } + ] + + fi = [SeasonalFieldInformation(**item) for item in field_info] + return fi + + +@pytest.fixture +def scenario_information(): + field_info = [ + { + "id": "0e16be1a-eb0f-4b55-a69c-4fa79af8f406", + "time_range": [ + datetime(year=2023, month=2, day=15), + datetime(year=2025, month=9, day=15), + ], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-87.414918, 37.463251], + [-87.399025, 37.470226], + [-87.393499, 37.472569], + [-87.39827, 37.479898], + [-87.405993, 37.478046], + [-87.407538, 37.47761], + [-87.408122, 37.477501], + [-87.408636, 37.477092], + [-87.409048, 37.476602], + [-87.414918, 37.463251], + ] + ], + }, + "assets": [], + "crop_name": "Barley", + "crop_type": "annual", + "properties": { + "pre_1980": "Lowland Non-Irrigated (Pre 1980s)", + "crp_type": "None", + "crp_start": "", + "crp_end": "", + "year_1980_2000": "Irrigated: Continuous Hay", + "year_1980_2000_tillage": "Intensive Tillage", + }, + "fertilizers": [], + "harvests": [ + { + "is_grain": True, + "start_date": "2023-11-11T00:00:00Z", + "end_date": "2023-11-11T00:00:00Z", + "crop_yield": 30.0, + "stray_stover_hay_removal": "0", + } + ], + "tillages": [ + { + "start_date": "2023-01-01T00:00:00Z", + "end_date": "2023-01-01T00:00:00Z", + "implement": "Zero Soil Disturbance", + } + ], + "organic_amendments": [], + } + ] + + fi = [SeasonalFieldInformation(**item) for item in field_info] + return fi + + +@pytest.fixture +def fake_comet_error(): + return { + "Errors": { + "ModelRun": { + "@name": "sdk_int1", + "Error": { + "@index": "0", + "@message": "You entered 200 in tag OMADPercentN for " + "CropYear: 2000 and CropScenario: Current " + ".Percent Nitrogen needs to between 0 and 100", + }, + } + } + } + + +@pytest.fixture +def fake_comet_response(): + return { + "Day": { + "@cometEmailId": "fake-email", + "@CFARMVersion": "appengine cometfarm v0-10 build 3.2.8472.37261 (03/13/2023)", + "Cropland": { + "ModelRun": { + "@name": "sdk_int1", + "Scenario": [ + { + "@name": "scenario: 17/03/2023 16:00:01", + "Carbon": { + "SoilCarbon": "1234.4321", + "BiomassBurningCarbon": "0", + "SoilCarbonStock2000": "1234.4321", + "SoilCarbonStockBegin": "1234.4321", + "SoilCarbonStockEnd": "1234.4321", + }, + "CO2": { + "LimingCO2": "0", + "UreaFertilizationCO2": "0", + "DrainedOrganicSoilsCO2": "0", + }, + "N2O": { + "SoilN2O": "1234.4321", + "SoilN2O_Direct": "1234.4321", + "SoilN2O_Indirect_Volatilization": "1234.4321", + "SoilN2O_Indirect_Leaching": "1234.4321", + "WetlandRiceCultivationN2O": "0", + "BiomassBurningN2O": "0", + "DrainedOrganicSoilsN2O": "0", + }, + "CH4": { + "SoilCH4": "0", + "WetlandRiceCultivationCH4": "0", + "BiomassBurningCH4": "0", + }, + } + ], + } + }, + } + } + + +@patch("http.server.HTTPServer.server_bind") +@patch("vibe_lib.comet_farm.comet_server.CometHTTPServer.start_ngrok") +@patch("vibe_lib.comet_farm.comet_server.CometHTTPServer.start") +@patch("vibe_lib.comet_farm.comet_server.CometHTTPServer.shutdown") +@patch("vibe_lib.comet_farm.comet_requester.CometRequester.get_comet_raw_output") +@patch("vibe_lib.comet_farm.comet_requester.CometRequester.parse_comet_response") +def test_whatif_request( + parse_comet_response: Mock, + _: Mock, + __: Mock, + ___: Mock, + ____: Mock, + _____: Mock, + baseline_information: List[SeasonalFieldInformation], + scenario_information: List[SeasonalFieldInformation], + fake_comet_response: str, +): + CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "whatif_comet_local_op.yaml" + ) + parse_comet_response.return_value = fake_comet_response + parameters = { + "comet_support_email": "fake_email", + "ngrok_token": "fake_ngrok", + } + + op_ = OpTester(CONFIG_PATH) + op_.update_parameters(parameters) + + output_data = op_.run( + # pyright misidentifies types here + baseline_seasonal_fields=baseline_information, # type: ignore + scenario_seasonal_fields=scenario_information, # type: ignore + ) + + assert "carbon_output" in output_data + assert isinstance(output_data["carbon_output"], CarbonOffsetInfo) + assert "Mg Co2e/year" in output_data["carbon_output"].carbon + + +@patch("http.server.HTTPServer.server_bind") +@patch("vibe_lib.comet_farm.comet_server.CometHTTPServer.start_ngrok") +@patch("vibe_lib.comet_farm.comet_server.CometHTTPServer.start") +@patch("vibe_lib.comet_farm.comet_requester.CometRequester.get_comet_raw_output") +@patch("vibe_lib.comet_farm.comet_requester.CometRequester.parse_comet_response") +def test_whatif_request_comet_error( + parse_comet_response: Mock, + _: Mock, + __: Mock, + ___: Mock, + ____: Mock, + baseline_information: List[SeasonalFieldInformation], + scenario_information: List[SeasonalFieldInformation], + fake_comet_error: str, +): + CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "whatif_comet_local_op.yaml" + ) + parse_comet_response.return_value = fake_comet_error + parameters = { + "comet_support_email": "fake_email", + "ngrok_token": "fake_ngrok", + } + + op_ = OpTester(CONFIG_PATH) + op_.update_parameters(parameters) + + with pytest.raises(RuntimeError): + op_.run( + # pyright misidentifies types here + baseline_seasonal_fields=baseline_information, # type: ignore + scenario_seasonal_fields=scenario_information, # type: ignore + ) + + +@patch("pyngrok.ngrok.set_auth_token") +def test_whatif_start_ngrok_error( + set_auth_token: Mock, + baseline_information: List[SeasonalFieldInformation], + scenario_information: List[SeasonalFieldInformation], +): + CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "whatif_comet_local_op.yaml" + ) + set_auth_token.side_effect = PyngrokError("Fake Error") + parameters = { + "comet_support_email": "fake_email", + "ngrok_token": "fake_ngrok", + } + + op_ = OpTester(CONFIG_PATH) + op_.update_parameters(parameters) + + with pytest.raises(Exception): + op_.run( + # pyright misidentifies types here + baseline_seasonal_fields=baseline_information, # type: ignore + scenario_seasonal_fields=scenario_information, # type: ignore + ) diff --git a/ops/carbon_local/whatif_comet_local.py b/ops/carbon_local/whatif_comet_local.py new file mode 100644 index 00000000..96546842 --- /dev/null +++ b/ops/carbon_local/whatif_comet_local.py @@ -0,0 +1,245 @@ +import xml.etree.ElementTree as ET +from datetime import datetime, timezone +from typing import Any, Dict, List + +from pyngrok import ngrok +from pyproj import Geod +from shapely.geometry import shape + +from vibe_core.data import ( + CarbonOffsetInfo, + FertilizerInformation, + HarvestInformation, + OrganicAmendmentInformation, + SeasonalFieldInformation, + TillageInformation, + gen_guid, +) +from vibe_lib.comet_farm.comet_requester import CometRequester, CometServerParameters +from vibe_lib.comet_farm.comet_server import HTTP_SERVER_HOST, HTTP_SERVER_PORT + +WEBHOOK_URL = f"http://{HTTP_SERVER_HOST}:{HTTP_SERVER_PORT}" + + +class SeasonalFieldConverter: + def get_location(self, geojson: Dict[str, Any]): + """ + calculate area and center point of polygon + """ + s = shape(geojson) + + location = (s.centroid.x, s.centroid.y) # type: ignore + + geod = Geod("+a=6378137 +f=0.0033528106647475126") + area_in_acres = geod.geometry_area_perimeter(s)[0] * 0.000247105 + + return (area_in_acres, location) + + def format_datetime(self, date: str) -> str: + date_obj = datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ") + return date_obj.strftime("%m/%d/%Y") + + def _add_historical(self, historical_data: Dict[str, Any], cropland: ET.Element): + ET.SubElement(cropland, "Pre-1980").text = historical_data["pre_1980"] + ET.SubElement(cropland, "CRP").text = historical_data["crp_type"] + ET.SubElement(cropland, "CRPStartYear").text = historical_data["crp_start"] + ET.SubElement(cropland, "CRPEndYear").text = historical_data["crp_end"] + ET.SubElement(cropland, "CRPType").text = historical_data["crp_type"] + ET.SubElement(cropland, "Year1980-2000").text = historical_data["year_1980_2000"] + ET.SubElement(cropland, "Year1980-2000_Tillage").text = historical_data[ + "year_1980_2000_tillage" + ] + + def _add_harvest_information(self, harvest_data: HarvestInformation, harvest_list: ET.Element): + if isinstance(harvest_data, dict): + harvest_data = HarvestInformation(**harvest_data) + harvest = ET.SubElement(harvest_list, "HarvestEvent") + + ET.SubElement(harvest, "HarvestDate").text = self.format_datetime(harvest_data.end_date) + ET.SubElement(harvest, "Grain").text = "Yes" if harvest_data.is_grain else "No" + ET.SubElement(harvest, "yield").text = str(harvest_data.crop_yield) + ET.SubElement(harvest, "StrawStoverHayRemoval").text = str( + harvest_data.stray_stover_hay_removal + ) + + def _add_tillage_information(self, tillage_data: TillageInformation, tillage_list: ET.Element): + if isinstance(tillage_data, dict): + tillage_data = TillageInformation(**tillage_data) + tillage = ET.SubElement(tillage_list, "TillageEvent") + ET.SubElement(tillage, "TillageDate").text = self.format_datetime(tillage_data.end_date) + ET.SubElement(tillage, "TillageType").text = tillage_data.implement + + def _add_fertilization_information( + self, fertilizer_data: FertilizerInformation, fertilization_list: ET.Element + ): + if isinstance(fertilizer_data, dict): + fertilizer_data = FertilizerInformation(**fertilizer_data) + fertilizer = ET.SubElement(fertilization_list, "NApplicationEvent") + fertilizer_date = self.format_datetime(fertilizer_data.end_date) + ET.SubElement(fertilizer, "NApplicationDate").text = fertilizer_date + ET.SubElement(fertilizer, "NApplicationType").text = fertilizer_data.application_type + ET.SubElement(fertilizer, "NApplicationAmount").text = str(fertilizer_data.total_nitrogen) + ET.SubElement(fertilizer, "NApplicationMethod").text = "Surface Band / Sidedress" + ET.SubElement(fertilizer, "EEP").text = fertilizer_data.enhanced_efficiency_phosphorus + + def _add_organic_amendmentes_information( + self, omad_data: OrganicAmendmentInformation, omad_list: ET.Element + ): + if isinstance(omad_data, dict): + # Same restriction of previous method + omad_data = OrganicAmendmentInformation(**omad_data) + omadevent = ET.SubElement(omad_list, "OMADApplicationEvent") + ET.SubElement(omadevent, "OMADApplicationDate").text = self.format_datetime( + omad_data.end_date + ) + ET.SubElement(omadevent, "OMADType").text = omad_data.organic_amendment_type + ET.SubElement(omadevent, "OMADAmount").text = str(omad_data.organic_amendment_amount) + ET.SubElement(omadevent, "OMADPercentN").text = str( + omad_data.organic_amendment_percent_nitrogen + ) + ET.SubElement(omadevent, "OMADCNRatio").text = str( + omad_data.organic_amendment_carbon_nitrogen_ratio + ) + + def _add_seasonal_field( + self, seasonal_field: SeasonalFieldInformation, year: ET.Element, crop_number: int + ): + crop = ET.SubElement(year, "Crop") + # According to COMET documentation crop numbers + # can be only 1, 2 or -1 if cover + crop_number = crop_number + 1 + crop_number = min(crop_number, 2) + crop.attrib["CropNumber"] = ( + "-1" if "cover" in seasonal_field.crop_type.lower() else str(crop_number) + ) + ET.SubElement(crop, "CropName").text = seasonal_field.crop_name + # We assume SeasonalField.time_range = (plantingDate, lastHarvestDate) + ET.SubElement(crop, "PlantingDate").text = seasonal_field.time_range[0].strftime("%m/%d/%Y") + ET.SubElement(crop, "ContinueFromPreviousYear").text = "N" + + harvest_list = ET.SubElement(crop, "HarvestList") + [ + self._add_harvest_information(harvest_data, harvest_list) + for harvest_data in seasonal_field.harvests + ] + ET.SubElement(crop, "GrazingList") + + tillage_list = ET.SubElement(crop, "TillageList") + [ + self._add_tillage_information(tillage_data, tillage_list) + for tillage_data in seasonal_field.tillages + ] + + fertilizer_list = ET.SubElement(crop, "NApplicationList") + [ + self._add_fertilization_information(fertilizer_data, fertilizer_list) + for fertilizer_data in seasonal_field.fertilizers + ] + + omad_application_list = ET.SubElement(crop, "OMADApplicationList") + [ + self._add_organic_amendmentes_information(omad_data, omad_application_list) + for omad_data in seasonal_field.organic_amendments + ] + + ET.SubElement(crop, "IrrigationList") + + pass + + def _add_scenario(self, seasonal_fields: List[SeasonalFieldInformation], scenario: ET.Element): + min_year = min(seasonal_fields, key=lambda x: x.time_range[0].year).time_range[0].year + max_year = max(seasonal_fields, key=lambda x: x.time_range[0].year).time_range[0].year + + for crop_year in list(range(min_year, max_year + 1)): + if any(s.time_range[0].year == crop_year for s in seasonal_fields): + year_element = ET.SubElement(scenario, "CropYear") + year_element.attrib["Year"] = str(crop_year) + for crop_number, seasonal_field in enumerate( + filter(lambda s: s.time_range[0].year == crop_year, seasonal_fields) + ): + self._add_seasonal_field(seasonal_field, year_element, crop_number) + + def build_comet_request( + self, + support_email: str, + baseline_seasonal_fields: List[SeasonalFieldInformation], + scenario_seasonal_fields: List[SeasonalFieldInformation], + ) -> str: + root = ET.fromstring("") + tree = ET.ElementTree(root) + root.attrib["cometEmailId"] = support_email + + cropland = ET.SubElement(root, "Cropland") + cropland.attrib["name"] = "sdk_int1" + + # Baseline field + baseline_field = baseline_seasonal_fields[0] + + # cropland elements + farm_location = self.get_location(baseline_field.geometry) + + geom = ET.SubElement(cropland, "GEOM") + geom.attrib["SRID"] = "4326" + geom.attrib["AREA"] = str(farm_location[0]) + geom.text = f"POINT({farm_location[1][0]} {farm_location[1][1]})" + + self._add_historical(baseline_field.properties, cropland) + + scenario = ET.SubElement(cropland, "CropScenario") + scenario.attrib["Name"] = "Current" + self._add_scenario(seasonal_fields=baseline_seasonal_fields, scenario=scenario) + + scenario = ET.SubElement(cropland, "CropScenario") + scenario.attrib["Name"] = "scenario: " + datetime.now().strftime("%d/%m/%Y %H:%M:%S") + self._add_scenario(seasonal_fields=scenario_seasonal_fields, scenario=scenario) + + return ET.tostring(tree.getroot(), encoding="unicode") + + +class CallbackBuilder: + def __init__(self, comet_url: str, comet_support_email: str, ngrok_token: str): + self.cometRequest = CometServerParameters( + url=comet_url, + webhook=WEBHOOK_URL, + supportEmail=comet_support_email, + ngrokToken=ngrok_token, + ) + + self.comet_requester = CometRequester(self.cometRequest) + + self.start_date = datetime.now(timezone.utc) + self.end_date = datetime.now(timezone.utc) + + def get_carbon_offset( + self, + baseline_seasonal_fields: List[SeasonalFieldInformation], + scenario_seasonal_fields: List[SeasonalFieldInformation], + ) -> Dict[str, CarbonOffsetInfo]: + converter = SeasonalFieldConverter() + xml_str = converter.build_comet_request( + self.cometRequest.supportEmail, baseline_seasonal_fields, scenario_seasonal_fields + ) + + comet_response = self.comet_requester.run_comet_request(xml_str) + + obj_carbon = CarbonOffsetInfo( + id=gen_guid(), + geometry=scenario_seasonal_fields[-1].geometry, + time_range=( + baseline_seasonal_fields[0].time_range[0], + scenario_seasonal_fields[-1].time_range[1], + ), + assets=[], + carbon=comet_response, + ) + + return {"carbon_output": obj_carbon} + + def __call__(self): + return self.get_carbon_offset + + def __del__(self): + try: + ngrok.kill() + except Exception: + pass diff --git a/ops/carbon_local/whatif_comet_local_op.yaml b/ops/carbon_local/whatif_comet_local_op.yaml new file mode 100644 index 00000000..6a2bb593 --- /dev/null +++ b/ops/carbon_local/whatif_comet_local_op.yaml @@ -0,0 +1,18 @@ +name: whatif_comet_op +inputs: + baseline_seasonal_fields: List[SeasonalFieldInformation] + scenario_seasonal_fields: List[SeasonalFieldInformation] +output: + carbon_output: CarbonOffsetInfo +parameters: + comet_url: "https://comet-farm.com/ApiMain/AddToQueue" + comet_support_email: + ngrok_token: +entrypoint: + file: whatif_comet_local.py + callback_builder: CallbackBuilder +version: 2 +description: + short_description: + Computes the offset amount of carbon that would be sequestered in a seasonal field using the + baseline (historical) and scenario (time range interested in) information. diff --git a/ops/chunk_raster/chunk_raster.py b/ops/chunk_raster/chunk_raster.py new file mode 100644 index 00000000..a25edcc2 --- /dev/null +++ b/ops/chunk_raster/chunk_raster.py @@ -0,0 +1,152 @@ +import hashlib +from typing import Any, Dict, List, Tuple, Union + +import numpy as np +import pyproj +import rioxarray +import xarray as xr +from numpy.typing import NDArray +from rasterio.windows import Window, bounds +from shapely import geometry as shpg +from shapely.geometry import mapping +from shapely.ops import transform + +from vibe_core.data import ChunkLimits, Raster, RasterChunk, RasterSequence, gen_guid +from vibe_lib.spaceeye.dataset import get_read_intervals, get_write_intervals + +PosChunk = Tuple[int, int] + + +def get_geometry(limits: ChunkLimits, ref: xr.DataArray) -> Dict[str, Any]: + """ + return geojson with the geometry of the particular chunk + """ + p = shpg.box(*bounds(Window(*limits), ref.rio.transform())) # type: ignore + + # convert polygon to lat lon + if ref.rio.crs is not None and str(ref.rio.crs) != "EPSG:4326": + crs = str(ref.rio.crs) + origin = pyproj.CRS(crs) + dest = pyproj.CRS("EPSG:4326") + project = pyproj.Transformer.from_crs(origin, dest, always_xy=True).transform + return mapping(transform(project, p)) + else: + return mapping(p) + + +def make_chunk( + pos: PosChunk, + size: Tuple[int, int], + limits: ChunkLimits, + write_rel_limits: ChunkLimits, + rasters: List[Raster], +) -> RasterChunk: + chunk_id = hashlib.sha256( + (f"chunk-{str(limits)}" + "".join(i.id for i in rasters)).encode() + ).hexdigest() + + # instead of using the geometry of the rasters, using the computed geometry of + # the specific chunk + geom = get_geometry( + limits, # type: ignore + rioxarray.open_rasterio(rasters[0].raster_asset.path_or_url), # type: ignore + ) + + time_range = [rasters[0].time_range[0], rasters[-1].time_range[0]] + res = RasterChunk.clone_from( + rasters[0], + id=chunk_id, + assets=[], + time_range=time_range, + geometry=geom, + limits=limits, + chunk_pos=pos, + num_chunks=size, + write_rel_limits=write_rel_limits, + ) + return res + + +def meshgrid_1d_array( + y: NDArray[np.int_], x: NDArray[np.int_] +) -> Tuple[NDArray[np.int_], NDArray[np.int_]]: + return tuple(i.reshape(-1) for i in np.meshgrid(y, x, indexing="ij")) + + +def get_limits( + start_col: NDArray[np.int_], + start_row: NDArray[np.int_], + width: NDArray[np.int_], + height: NDArray[np.int_], +) -> List[ChunkLimits]: + Y, X = meshgrid_1d_array(start_row, start_col) + H, W = meshgrid_1d_array(height, width) + return [tuple(i) for i in np.stack((X, Y, W, H)).T.tolist()] + + +def make_chunks( + shape: Tuple[int, ...], step_y: int, step_x: int, rasters: List[Raster] +) -> List[RasterChunk]: + if len(shape) == 2 or len(shape) == 3: + # assuming the spatial dimensions are the last two + end_y, end_x = shape[-2:] + else: + raise ValueError(f"Chunk assumes rasters have dimension 2 or 3, but {len(shape)} found") + + start_abs_read_y, end_abs_read_y = get_read_intervals(end_y, step_y, step_y, 0) + start_abs_read_x, end_abs_read_x = get_read_intervals(end_x, step_x, step_x, 0) + _, rel_write_y = get_write_intervals(end_y, step_y, step_y, 0) + _, rel_write_x = get_write_intervals(end_x, step_x, step_x, 0) + start_rel_write_y, end_rel_write_y = rel_write_y + start_rel_write_x, end_rel_write_x = rel_write_x + + size = (len(start_abs_read_y), len(start_abs_read_x)) + abs_read_limits = get_limits( + start_abs_read_x, + start_abs_read_y, + end_abs_read_x - start_abs_read_x, + end_abs_read_y - start_abs_read_y, + ) + rel_write_limits = get_limits( + start_rel_write_x, + start_rel_write_y, + end_rel_write_x - start_rel_write_x, + end_rel_write_y - start_rel_write_y, + ) + Y, X = meshgrid_1d_array(np.arange(size[0]), np.arange(size[1])) + positions = [tuple(i) for i in np.stack((Y, X)).T.tolist()] + + res = [] + for position, read_limits, write_limits in zip(positions, abs_read_limits, rel_write_limits): + res.append(make_chunk(position, size, read_limits, write_limits, rasters)) + + return res + + +class CallbackBuilder: + def __init__(self, step_y: int, step_x: int): + self.step_y = step_y + self.step_x = step_x + + def __call__(self): + def chunk_callback( + rasters: Union[List[Raster], RasterSequence], + ) -> Dict[str, List[RasterChunk]]: + # the latest raster is the reference for shape and for (later) to warp all images + if isinstance(rasters, RasterSequence): + rasters = [ + Raster.clone_from(rasters, gen_guid(), assets=[i]) + for i in rasters.get_ordered_assets() # type: ignore + ] + else: + rasters = sorted(rasters, key=lambda x: x.time_range[0], reverse=True) + + ref = rasters[0] + + shape = rioxarray.open_rasterio(ref.raster_asset.path_or_url).shape # type: ignore + + chunks = make_chunks(shape, self.step_y, self.step_x, rasters) + + return {"chunk_series": chunks} + + return chunk_callback diff --git a/ops/chunk_raster/chunk_raster.yaml b/ops/chunk_raster/chunk_raster.yaml new file mode 100644 index 00000000..b87219f7 --- /dev/null +++ b/ops/chunk_raster/chunk_raster.yaml @@ -0,0 +1,17 @@ +name: chunk_raster +inputs: + rasters: List[Raster] +output: + chunk_series: List[RasterChunk] +parameters: + step_y: 1000 + step_x: 1000 +dependencies: + parameters: + - step_y + - step_x +entrypoint: + file: chunk_raster.py + callback_builder: CallbackBuilder +description: + short_description: Splits input rasters into a series of chunks. \ No newline at end of file diff --git a/ops/chunk_raster/chunk_sequence_raster.yaml b/ops/chunk_raster/chunk_sequence_raster.yaml new file mode 100644 index 00000000..7b6e1c32 --- /dev/null +++ b/ops/chunk_raster/chunk_sequence_raster.yaml @@ -0,0 +1,17 @@ +name: chunk_sequence_raster +inputs: + rasters: RasterSequence +output: + chunk_series: List[RasterChunk] +parameters: + step_y: 1000 + step_x: 1000 +dependencies: + parameters: + - step_y + - step_x +entrypoint: + file: chunk_raster.py + callback_builder: CallbackBuilder +description: + short_description: Splits input rasters into a series of chunks. \ No newline at end of file diff --git a/ops/clip_raster/clip_raster.py b/ops/clip_raster/clip_raster.py new file mode 100644 index 00000000..9f083dee --- /dev/null +++ b/ops/clip_raster/clip_raster.py @@ -0,0 +1,66 @@ +import logging +import os +from tempfile import TemporaryDirectory +from typing import Dict, TypeVar, cast + +import rioxarray as rio +import xarray as xr +from shapely import geometry as shpg + +from vibe_core.data import AssetVibe, DataVibe, Raster, gen_guid, gen_hash_id + +LOGGER = logging.getLogger(__name__) +T = TypeVar("T", bound=Raster) + + +class CallbackBuilder: + def __init__(self, hard_clip: bool): + self.tmp_dir = TemporaryDirectory() + self.hard_clip = hard_clip + + def __call__(self): + def operator_callback(input_item: DataVibe, raster: T) -> Dict[str, T]: + ref_geometry = shpg.shape(input_item.geometry) + + raster_shpg = shpg.shape(raster.geometry) + if raster_shpg.intersects(ref_geometry): + intersecting_geometry = raster_shpg.intersection(ref_geometry) + + if not self.hard_clip: + out_raster = type(raster).clone_from( + raster, + id=gen_hash_id( + f"{raster.id}_soft_clip", intersecting_geometry, raster.time_range + ), + geometry=shpg.mapping(intersecting_geometry), + assets=raster.assets, + ) + else: + da = cast(xr.DataArray, rio.open_rasterio(raster.raster_asset.path_or_url)) + fpath = os.path.join(self.tmp_dir.name, "clip.tif") + da.rio.clip( + [intersecting_geometry], crs="EPSG:4326", from_disk=True + ).rio.to_raster(fpath) + new_raster_asset = AssetVibe(reference=fpath, type="image/tiff", id=gen_guid()) + assets = raster.assets.copy() + assets.remove(raster.raster_asset) + assets.append(new_raster_asset) + out_raster = type(raster).clone_from( + raster, + id=gen_hash_id( + f"{raster.id}_hard_clip", intersecting_geometry, raster.time_range + ), + geometry=shpg.mapping(intersecting_geometry), + assets=assets, + ) + + return {"clipped_raster": out_raster} + else: + raise ValueError( + "Input reference geometry does not intersect with raster geometry." + ) + + return operator_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/clip_raster/clip_raster.yaml b/ops/clip_raster/clip_raster.yaml new file mode 100644 index 00000000..350e616d --- /dev/null +++ b/ops/clip_raster/clip_raster.yaml @@ -0,0 +1,20 @@ +name: clip_raster +inputs: + input_item: DataVibe + raster: Raster +output: + clipped_raster: "@INHERIT(raster)" +parameters: + hard_clip: false +entrypoint: + file: clip_raster.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - hard_clip +description: + short_description: clips the input raster based on the provided referente geometry. + parameters: + hard_clip: > + if true, keeps only data inside the intersection of reference and input geometries, soft clip + otherwise. diff --git a/ops/combine_chunks/combine_chunks.py b/ops/combine_chunks/combine_chunks.py new file mode 100644 index 00000000..c7ae7931 --- /dev/null +++ b/ops/combine_chunks/combine_chunks.py @@ -0,0 +1,112 @@ +import hashlib +import logging +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Tuple + +import geopandas as gpd +import rasterio +from rasterio.windows import Window +from shapely import geometry as shpg + +from vibe_core.data import ChunkLimits, RasterChunk +from vibe_core.data.core_types import AssetVibe, BBox, gen_guid +from vibe_core.data.rasters import Raster +from vibe_lib.raster import FLOAT_COMPRESSION_KWARGS, INT_COMPRESSION_KWARGS + +LOGGER = logging.getLogger(__name__) + + +def get_abs_write_limits( + read_abs_limits: ChunkLimits, write_rel_limits: ChunkLimits +) -> ChunkLimits: + return ( + read_abs_limits[0] + write_rel_limits[0], + read_abs_limits[1] + write_rel_limits[1], + write_rel_limits[2], + write_rel_limits[3], + ) + + +def get_structure_and_meta( + chunks: List[RasterChunk], +) -> Tuple[Dict[Tuple[int, int], Any], Dict[str, Any]]: + cs = {} + for c in chunks: + cs[(c.chunk_pos)] = dict( + chunk=c, write_limits=get_abs_write_limits(c.limits, c.write_rel_limits) + ) + with rasterio.open(cs[(0, 0)]["chunk"].raster_asset.path_or_url) as src: + meta = src.meta + ncol, nrow = cs[(0, 0)]["chunk"].num_chunks + meta["width"] = ( + cs[(ncol - 1, nrow - 1)]["write_limits"][0] + cs[(ncol - 1, nrow - 1)]["write_limits"][2] + ) + meta["height"] = ( + cs[(ncol - 1, nrow - 1)]["write_limits"][1] + cs[(ncol - 1, nrow - 1)]["write_limits"][3] + ) + meta["mode"] = "w" + if meta["dtype"].lower().find("float") >= 0: + meta.update(FLOAT_COMPRESSION_KWARGS) + else: + meta.update(INT_COMPRESSION_KWARGS) + return cs, meta + + +def get_combined_tif_and_bounds( + cs: Dict[Tuple[int, int], Any], + meta: Dict[str, Any], + path: str, +) -> Tuple[str, BBox]: + fname = "combined_image.tif" + path = os.path.join(path, fname) + with rasterio.open(path, **meta) as dst: + bounds = dst.bounds + for v in cs.values(): + c = v["chunk"] + write_limits = v["write_limits"] + window_out = Window(*write_limits) + window_in = Window(*c.write_rel_limits) + with rasterio.open(c.raster_asset.path_or_url) as src: + arr = src.read(window=window_in) + dst.write(arr, window=window_out) + return path, bounds + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def combine_chunks_callback(chunks: List[RasterChunk]) -> Dict[str, Raster]: + cs, meta = get_structure_and_meta(chunks) + + path, bounds = get_combined_tif_and_bounds(cs, meta, self.tmp_dir.name) + + asset = AssetVibe(reference=path, type=mimetypes.types_map[".tif"], id=gen_guid()) + res_id = hashlib.sha256("".join(i.id for i in chunks).encode()).hexdigest() + proj_geom = shpg.box(*bounds) + proj_crs = meta.get("crs") + if proj_crs is not None: + geom = gpd.GeoSeries(proj_geom, crs=proj_crs).to_crs("epsg:4326").iloc[0] + else: + LOGGER.warning( + "Could not find projected coordinate system for combined raster," + " using geometry as is" + ) + geom = proj_geom + res = Raster( + id=res_id, + time_range=chunks[0].time_range, + geometry=shpg.mapping(geom), + assets=[asset], + bands=chunks[0].bands, + ) + + return {"raster": res} + + return combine_chunks_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/combine_chunks/combine_chunks.yaml b/ops/combine_chunks/combine_chunks.yaml new file mode 100644 index 00000000..60c91225 --- /dev/null +++ b/ops/combine_chunks/combine_chunks.yaml @@ -0,0 +1,12 @@ +name: combine_chunks +inputs: + chunks: List[RasterChunk] +output: + raster: Raster +parameters: +entrypoint: + file: combine_chunks.py + callback_builder: CallbackBuilder +version: 2 +description: + short_description: Combines series of chunks into a final raster. diff --git a/ops/compute_cloud_prob/compute_cloud_prob.py b/ops/compute_cloud_prob/compute_cloud_prob.py new file mode 100644 index 00000000..d3ab8465 --- /dev/null +++ b/ops/compute_cloud_prob/compute_cloud_prob.py @@ -0,0 +1,117 @@ +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict + +import numpy as np +import onnxruntime as ort +from numpy.typing import NDArray +from rasterio.enums import Resampling + +from vibe_core.data import ( + AssetVibe, + S2ProcessingLevel, + Sentinel2CloudProbability, + Sentinel2Raster, + gen_guid, +) +from vibe_lib.raster import DEFAULT_NODATA, resample_raster +from vibe_lib.spaceeye.chip import ChipDataset, Dims, InMemoryReader, get_loader, predict_chips +from vibe_lib.spaceeye.utils import verify_processing_level + + +def softmax(x: NDArray[Any]) -> NDArray[Any]: + """Compute softmax values for each sets of scores in x.""" + x = np.exp(x - np.max(x, axis=1, keepdims=True)) + return x / x.sum(axis=1) + + +def post_process( + chip_data: NDArray[Any], chip_mask: NDArray[Any], model_out: NDArray[Any] +) -> NDArray[Any]: + """ + After prediction, we set nodata (all zeros) regions as 100% cloud + """ + nodata_mask = chip_mask.any(axis=1, keepdims=True) + model_prob = softmax(model_out)[:, 1:] + model_prob[nodata_mask] = 1 + return model_prob + + +class CallbackBuilder: + def __init__( + self, + downsampling: int, + root_dir: str, + model_path: str, + window_size: int, + overlap: float, + batch_size: int, + num_workers: int, + in_memory: bool, + ): + self.downsampling = downsampling + self.root_dir = root_dir + self.model_path = model_path + self.window_size = window_size + self.overlap = overlap + self.batch_size = batch_size + self.num_workers = num_workers + self.in_memory = in_memory + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def compute_cloud_prob( + sentinel_raster: Sentinel2Raster, + ) -> Dict[str, Sentinel2CloudProbability]: + verify_processing_level((sentinel_raster,), S2ProcessingLevel.L2A, prefix="Cloud model") + + if self.downsampling < 1: + raise ValueError( + f"Downsampling must be equal or larger than 1, found {self.downsampling}" + ) + model_path = os.path.join(self.root_dir, self.model_path) + model = ort.InferenceSession(model_path) + chip_size = self.window_size + step_size = int(chip_size * (1 - self.overlap)) + dataset = ChipDataset( + [sentinel_raster], + chip_size=Dims(chip_size, chip_size, 1), + step_size=Dims(step_size, step_size, 1), + downsampling=self.downsampling, + nodata=DEFAULT_NODATA, + reader=InMemoryReader(self.downsampling) if self.in_memory else None, + ) + + dataloader = get_loader( + dataset, self.batch_size, self.num_workers if not self.in_memory else 0 + ) + pred_filepaths = predict_chips( + model, + dataloader, + self.tmp_dir.name, + skip_nodata=True, + post_process=post_process, + ) + assert ( + len(pred_filepaths) == 1 + ), f"Expected one prediction file, found: {len(pred_filepaths)}" + mask_filepath = resample_raster( + pred_filepaths[0], + self.tmp_dir.name, + dataset.width, + dataset.height, + dataset.transform, + Resampling.bilinear, + ) + asset = AssetVibe(reference=mask_filepath, type="image/tiff", id=gen_guid()) + + cloud_mask = Sentinel2CloudProbability.clone_from( + sentinel_raster, id=gen_guid(), assets=[asset] + ) + + return {"cloud_probability": cloud_mask} + + return compute_cloud_prob + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_cloud_prob/compute_cloud_prob.yaml b/ops/compute_cloud_prob/compute_cloud_prob.yaml new file mode 100644 index 00000000..c7cc6b06 --- /dev/null +++ b/ops/compute_cloud_prob/compute_cloud_prob.yaml @@ -0,0 +1,25 @@ +name: compute_cloud_prob +inputs: + sentinel_raster: Sentinel2Raster +output: + cloud_probability: Sentinel2CloudProbability +parameters: + downsampling: 1 + root_dir: /opt/terravibes/ops/resources/cloud_models + model_path: cloud_model2_cpu.onnx + window_size: 512 + overlap: .25 + batch_size: 1 + num_workers: 0 + in_memory: false +entrypoint: + file: compute_cloud_prob.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - model_path + - downsampling + - window_size + - overlap +description: + short_description: Computes cloud probabilities using a convolutional segmentation model for L2A. \ No newline at end of file diff --git a/ops/compute_cloud_water_mask/compute_cloud_water_mask.py b/ops/compute_cloud_water_mask/compute_cloud_water_mask.py new file mode 100644 index 00000000..7df295b8 --- /dev/null +++ b/ops/compute_cloud_water_mask/compute_cloud_water_mask.py @@ -0,0 +1,63 @@ +import os +from tempfile import TemporaryDirectory +from typing import Dict + +import numpy as np +import rioxarray as rio +import xarray as xr + +from vibe_core.data import AssetVibe, LandsatRaster, Raster, gen_guid +from vibe_lib.raster import load_raster_match + +# QA_PIXEL mask for cloud cover +CLOUD_DILATED_CLOUD_BIT = 6 + + +class CallbackBuilder: + def __init__(self, ndvi_threshold: float): + # Create temporary directory to store our new data, which will be transfered to our storage + # automatically when the op is run in a workflow + self.tmp_dir = TemporaryDirectory() + # Define the parameters + self.ndvi_threshold = ndvi_threshold + + def __call__(self): + def callback(landsat_raster: LandsatRaster, ndvi_raster: Raster) -> Dict[str, Raster]: + # Get QA band from the Landsat raster + qa_pixel = rio.open_rasterio(landsat_raster.raster_asset.path_or_url)[ + landsat_raster.bands["qa_pixel"] + ] + qa_pixel = qa_pixel.astype(np.uint16) + + # Calculate the cloud mask + cloud_mask = (qa_pixel & (1 << CLOUD_DILATED_CLOUD_BIT)) > 0 + # Assign pixels without cloud contamination as 1 and nan for pixels with cloud + cloud_mask = xr.where(cloud_mask > 0, 1, np.nan) + + # Retrieve ndvi layer + ndvi = load_raster_match(ndvi_raster, landsat_raster)[0] + + # Assign pixel value of water bodies as nan and rest as 1 + ndvi_mask = xr.where(ndvi > self.ndvi_threshold, 1, np.nan) + + # Merge cloud and ndvi mask + cloud_water_mask = cloud_mask * ndvi_mask + + # Save final mask + filepath = os.path.join(self.tmp_dir.name, "cloud_water_mask.tif") + cloud_water_mask.rio.to_raster(filepath) + cwm_asset = AssetVibe(reference=filepath, type="image/tiff", id=gen_guid()) + + return { + "cloud_water_mask": Raster.clone_from( + landsat_raster, + id=gen_guid(), + assets=[cwm_asset], + bands={"cloud_water_mask": 0}, + ), + } + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_cloud_water_mask/compute_cloud_water_mask.yaml b/ops/compute_cloud_water_mask/compute_cloud_water_mask.yaml new file mode 100644 index 00000000..a1cead6a --- /dev/null +++ b/ops/compute_cloud_water_mask/compute_cloud_water_mask.yaml @@ -0,0 +1,23 @@ +name: compute_cloud_water_mask +inputs: + landsat_raster: LandsatRaster + ndvi_raster: Raster +output: + cloud_water_mask: Raster +parameters: + ndvi_threshold: 0.0 +entrypoint: + file: compute_cloud_water_mask.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - ndvi_threshold +description: + short_description: Merges landsat cloud mask and NDVI-based mask to produce a cloud water mask. + long_description: NDVI index with less than or equal to 0 values are used to identify water bodies, and qa_pixel band + is used to identify cloudy areas. Both of these are masked out. The clould_water_mask has a value of 1 or 0 per pixel; + 1 meaning the pixel is free of clouds and water bodies, and 0 meaning the pixel is contaminated with either cloud or + waterbodies, or both. + parameters: + ndvi_threshold: NDVI threshold for excluding water bodies + (everything under this threshold is assumed to be water). \ No newline at end of file diff --git a/ops/compute_conservation_practice/compute_conservation_practice.py b/ops/compute_conservation_practice/compute_conservation_practice.py new file mode 100644 index 00000000..0f6e6352 --- /dev/null +++ b/ops/compute_conservation_practice/compute_conservation_practice.py @@ -0,0 +1,109 @@ +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict + +import numpy as np +import onnxruntime as ort +import rasterio +from numpy.typing import NDArray +from rasterio import Affine +from rasterio.enums import Resampling + +from vibe_core.data import AssetVibe, gen_guid +from vibe_core.data.rasters import Raster +from vibe_lib.raster import DEFAULT_NODATA, resample_raster +from vibe_lib.spaceeye.chip import Dims, StackOnChannelsChipDataset, get_loader, predict_chips + + +def post_process(_: NDArray[Any], __: NDArray[Any], model_out: NDArray[Any]) -> NDArray[Any]: + """ + After prediction, we transform probabilities into classes via argmax + """ + model_classes = np.argmax(model_out, axis=1, keepdims=True) + return model_classes + + +def get_meta(in_path: str, width: int, height: int, transform: Affine) -> Dict[str, Any]: + with rasterio.open(in_path) as src: + kwargs = src.meta.copy() + kwargs.update( + { + "nodata": 0, + "width": width, + "height": height, + "transform": transform, + } + ) + return kwargs + + +class CallbackBuilder: + def __init__( + self, + downsampling: int, + root_dir: str, + model_path: str, + window_size: int, + overlap: float, + batch_size: int, + num_workers: int, + ): + self.downsampling = downsampling + self.root_dir = root_dir + self.model_path = model_path + self.window_size = window_size + self.overlap = overlap + self.batch_size = batch_size + self.num_workers = num_workers + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def compute_conservation_practice( + elevation_gradient: Raster, average_elevation: Raster + ) -> Dict[str, Raster]: + if self.downsampling < 1: + raise ValueError( + f"Downsampling must be equal or larger than 1, found {self.downsampling}" + ) + model_path = os.path.join(self.root_dir, self.model_path) + model = ort.InferenceSession(model_path) + chip_size = self.window_size + step_size = int(chip_size * (1 - self.overlap)) + + dataset = StackOnChannelsChipDataset( + [[elevation_gradient], [average_elevation]], + chip_size=Dims(chip_size, chip_size, 1), + step_size=Dims(step_size, step_size, 1), + downsampling=self.downsampling, + nodata=DEFAULT_NODATA, + ) + + dataloader = get_loader(dataset, self.batch_size, self.num_workers) + + pred_filepaths = predict_chips( + model, + dataloader, + self.tmp_dir.name, + skip_nodata=False, + post_process=post_process, + ) + assert ( + len(pred_filepaths) == 1 + ), f"Expected one prediction file, found: {len(pred_filepaths)}" + out_filepath = resample_raster( + pred_filepaths[0], + self.tmp_dir.name, + dataset.width, + dataset.height, + dataset.transform, + Resampling.nearest, + ) + asset = AssetVibe(reference=out_filepath, type="image/tiff", id=gen_guid()) + pred = Raster.clone_from(elevation_gradient, id=gen_guid(), assets=[asset]) + + return {"output_raster": pred} + + return compute_conservation_practice + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_conservation_practice/compute_conservation_practice.yaml b/ops/compute_conservation_practice/compute_conservation_practice.yaml new file mode 100644 index 00000000..35ecafa1 --- /dev/null +++ b/ops/compute_conservation_practice/compute_conservation_practice.yaml @@ -0,0 +1,30 @@ +# Compute terraces and grassed waterways classes on pixel level +name: compute_conservation_practice +inputs: + elevation_gradient: Raster + average_elevation: Raster +output: + output_raster: Raster +parameters: + downsampling: 2 + root_dir: /opt/terravibes/ops/resources/conservation_practices_models + model_path: terraces_grassed_waterways.onnx + window_size: 512 + overlap: .25 + batch_size: 1 + num_workers: 4 +entrypoint: + file: compute_conservation_practice.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - model_path + - downsampling + - root_dir + - model_path + - window_size + - overlap + - batch_size + - num_workers +description: + short_description: Classifies pixels in either terraces or grassed waterways using a CNN model. \ No newline at end of file diff --git a/ops/compute_evaporative_fraction/compute_evaporative_fraction.py b/ops/compute_evaporative_fraction/compute_evaporative_fraction.py new file mode 100644 index 00000000..0d74d261 --- /dev/null +++ b/ops/compute_evaporative_fraction/compute_evaporative_fraction.py @@ -0,0 +1,158 @@ +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict, cast + +import numpy as np +import xarray as xr +from numpy.typing import NDArray +from scipy import ndimage + +from vibe_core.data import AssetVibe, LandsatRaster, Raster, gen_guid +from vibe_lib.raster import load_raster, load_raster_match + +# DEFINE CONSTANTS +# source: Senay et. al (2013) +K1 = 0.35 +K2 = 0.7 +LP = 0.65 +# Set threshold of minimum pixel size +PIXEL_SIZE_THRESHOLD = 9 + + +class CallbackBuilder: + def __init__(self, ndvi_hot_threshold: float): + self.tmp_dir = TemporaryDirectory() + self.ndvi_hot_threshold = ndvi_hot_threshold + + def __call__(self): + def calculate_hot_pixels( + lst_elev_m: xr.DataArray, ndvi_hot_mask: NDArray[Any] + ) -> NDArray[Any]: + # Calculate percentile value of lst_elev + lst_elev_p90 = np.nanpercentile(lst_elev_m, 90) + lst_elev_p95 = np.nanpercentile(lst_elev_m, 95) + + lst_hot_mask = np.where(lst_elev_m > lst_elev_p90, lst_elev_m, np.nan) + lst_hot_mask = np.where(lst_hot_mask < lst_elev_p95, lst_hot_mask, np.nan) + + ndvi_hot_mask = np.where(ndvi_hot_mask > self.ndvi_hot_threshold, ndvi_hot_mask, np.nan) + ndvi_hot_mask = np.where(ndvi_hot_mask > 0, 1, np.nan) + + hot_pixels = lst_hot_mask * ndvi_hot_mask + return hot_pixels + + def calculate_cold_pixels( + lst_elev_m: xr.DataArray, ndvi_cold_mask: NDArray[Any] + ) -> NDArray[Any]: + # Calculate percentile value of lst_elev + lst_elev_p02 = np.nanpercentile(lst_elev_m, 2) + lst_elev_p04 = np.nanpercentile(lst_elev_m, 4) + + lst_cold_mask = np.where(lst_elev_m > lst_elev_p02, lst_elev_m, np.nan) + lst_cold_mask = np.where(lst_cold_mask < lst_elev_p04, lst_cold_mask, np.nan) + + ndvi_cold_mask = np.where(ndvi_cold_mask > 0, 1, np.nan) + + cold_pixels = lst_cold_mask * ndvi_cold_mask + return cold_pixels + + def calculate_evap_frxn( + etrf: xr.DataArray, lst: xr.DataArray, hot_pixel_value: float, cold_pixel_value: float + ) -> NDArray[Any]: + etf_nom = hot_pixel_value - lst + etf_dom = hot_pixel_value - cold_pixel_value + etf = etf_nom / etf_dom + evap_frxn = etrf * etf + evap_frxn = np.where(evap_frxn < 0, 0, evap_frxn) + return evap_frxn + + def main_processing( + landsat_raster: LandsatRaster, + dem_raster: Raster, + ndvi_raster: Raster, + cloud_water_mask_raster: Raster, + ) -> xr.DataArray: + lst = load_raster(landsat_raster, bands=["lwir11"])[0] + lst = (lst * 0.00341802) + 149 + + dem = load_raster_match(dem_raster, landsat_raster)[0] + ndvi = load_raster_match(ndvi_raster, landsat_raster)[0] + + lst_elev = lst + (0.0065 * dem) + cloud_water_mask = load_raster_match(cloud_water_mask_raster, landsat_raster)[0] + + lst_elev_m = lst_elev * cloud_water_mask + ndvi_m = ndvi * cloud_water_mask + + # Calculate percentile value of ndvi + ndvi_p01 = np.nanpercentile(ndvi_m, 1) + ndvi_p90 = np.nanpercentile(ndvi_m, 90) + ndvi_p95 = np.nanpercentile(ndvi_m, 95) + + # Define ndvi_hot_mask and ndvi_cold_mask here + ndvi_hot_mask = np.where(ndvi_m < ndvi_p01, ndvi_m, np.nan) + ndvi_hot_mask = np.where(ndvi_hot_mask > self.ndvi_hot_threshold, ndvi_hot_mask, np.nan) + ndvi_hot_mask = np.where(ndvi_hot_mask > 0, 1, np.nan) + + ndvi_cold_mask = np.where(ndvi_m > ndvi_p90, ndvi_m, np.nan) + ndvi_cold_mask = np.where(ndvi_cold_mask < ndvi_p95, ndvi_cold_mask, np.nan) + ndvi_cold_mask = np.where(ndvi_cold_mask > 0, 1, np.nan) + + hot_pixels = calculate_hot_pixels(lst_elev_m, ndvi_hot_mask) + cold_pixels = calculate_cold_pixels(lst_elev_m, ndvi_cold_mask) + + hot_pixels_binary = (hot_pixels > 0).astype(int) + labels, _ = ndimage.label(hot_pixels_binary) # type: ignore + sizes = np.bincount(labels.ravel()) + mask_sizes = sizes > PIXEL_SIZE_THRESHOLD + hot_pixels[~mask_sizes[labels]] = 0 # type: ignore + hot_pixels = np.where(hot_pixels > 0, hot_pixels, np.nan) + hot_pixel_value = cast(float, np.nanmedian(hot_pixels)) + + cold_pixels_binary = (cold_pixels > 0).astype(int) + labels, _ = ndimage.label(cold_pixels_binary) # type: ignore + sizes = np.bincount(labels.ravel()) + mask_sizes = sizes > PIXEL_SIZE_THRESHOLD + cold_pixels[~mask_sizes[labels]] = 0 # type: ignore + cold_pixels = np.where(cold_pixels > 0, cold_pixels, np.nan) + cold_pixel_value = cast(float, np.nanmin(cold_pixels)) + + etrf = ndvi * K1 + etrf = etrf / K2 + etrf = etrf + LP + + evap_frxn = calculate_evap_frxn(etrf, lst, hot_pixel_value, cold_pixel_value) + + evap_frxn_xr = xr.DataArray( + evap_frxn, dims=cloud_water_mask.dims, coords=cloud_water_mask.coords + ) + + return evap_frxn_xr + + def callback( + landsat_raster: LandsatRaster, + dem_raster: Raster, + ndvi_raster: Raster, + cloud_water_mask_raster: Raster, + ) -> Dict[str, Raster]: + evap_frxn_xr_result = main_processing( + landsat_raster, dem_raster, ndvi_raster, cloud_water_mask_raster + ) + + filepath = os.path.join(self.tmp_dir.name, "evaporative_fraction.tif") + evap_frxn_xr_result.rio.to_raster(filepath) + etrf_asset = AssetVibe(reference=filepath, type="image/tiff", id=gen_guid()) + + return { + "evaporative_fraction": Raster.clone_from( + landsat_raster, + id=gen_guid(), + assets=[etrf_asset], + bands={"evaporative_fraction": 0}, + ) + } + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_evaporative_fraction/compute_evaporative_fraction.yaml b/ops/compute_evaporative_fraction/compute_evaporative_fraction.yaml new file mode 100644 index 00000000..0ed716be --- /dev/null +++ b/ops/compute_evaporative_fraction/compute_evaporative_fraction.yaml @@ -0,0 +1,25 @@ +name: compute_evaporative_fraction +inputs: + landsat_raster: LandsatRaster + dem_raster: Raster + ndvi_raster: Raster + cloud_water_mask_raster: Raster +output: + evaporative_fraction: Raster +parameters: + ndvi_hot_threshold: 0.02 +entrypoint: + file: compute_evaporative_fraction.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - ndvi_hot_threshold +description: + short_description: Computes evaporative fraction layer based on the percentile values of lst_dem (created by + treating land surface temperature with dem) and ndvi layers. The source of constants used is "Senay, G.B.; Bohms, S.; Singh, R.K.; + Gowda, P.H.; Velpuri, N.M.; Alemu, H.; Verdin, J.P. Operational Evapotranspiration Mapping Using Remote Sensing + and Weather Datasets - A New Parameterization for the SSEB Approach. JAWRA J. Am. Water Resour. Assoc. 2013, 49, 577–591. + The land surface elevation data source are 10m USGS DEM, and 30m Copernicus DEM; but Copernicus DEM is set as default + source in the workflow. + parameters: + ndvi_hot_threshold: Pixels with ndvi values of this threshold will not be considered for hot pixel selection. diff --git a/ops/compute_fcover/compute_fcover.yaml b/ops/compute_fcover/compute_fcover.yaml new file mode 100644 index 00000000..992d11ae --- /dev/null +++ b/ops/compute_fcover/compute_fcover.yaml @@ -0,0 +1,10 @@ +name: compute_fcover +inputs: + raster: Raster + angles: Raster +output: + fcover: Raster +parameters: +entrypoint: + file: fcover.py + callback_builder: CallbackBuilder diff --git a/ops/compute_fcover/fcover.py b/ops/compute_fcover/fcover.py new file mode 100644 index 00000000..7618b018 --- /dev/null +++ b/ops/compute_fcover/fcover.py @@ -0,0 +1,225 @@ +""" +FCOVER computation using a neural network as described in +https://step.esa.int/docs/extra/ATBD_S2ToolBox_L2B_V1.1.pdf +https://github.com/senbox-org/s2tbx/blob/master/s2tbx-biophysical/src/main/java/org/esa/s2tbx/biophysical +https://www.sciencedirect.com/science/article/pii/S0034425710002853 +https://custom-scripts.sentinel-hub.com/custom-scripts/sentinel-2/fcover/ + +Following implementation from Sentinel-2 Toolbox +https://github.com/senbox-org/s2tbx/blob/master/s2tbx-biophysical/src/main/java/org/esa/s2tbx/biophysical/BiophysicalOp.java + +Normalization params and weights from Sentinel-2 Toolbox for L2A +https://github.com/senbox-org/s2tbx/tree/master/s2tbx-biophysical/src/main/resources/auxdata/3_0/S2A/FCOVER +""" + +from tempfile import TemporaryDirectory +from typing import Any, Dict, cast, overload + +import numpy as np +import xarray as xr +from numpy.typing import NDArray +from rasterio.warp import Resampling + +from vibe_core.data import Raster, gen_guid +from vibe_lib.raster import get_cmap, json_to_asset, load_raster, save_raster_to_asset + +BANDS = ["B03", "B04", "B05", "B06", "B07", "B8A", "B11", "B12"] + +# Normalization params: min - max for each band +BANDS_NORM = np.array( + ( + (0, 0.23901527463861838), + (0, 0.29172736471507876), + (0, 0.32652671459255694), + (0.008717364330310326, 0.5938903910368211), + (0.019693160430621366, 0.7466909927207045), + (0.026217828282102625, 0.7582393779705984), + (0.018931934894415213, 0.4929337190581187), + (0, 0.4877499217101771), + ) +) +ANGLES_NORM = np.array( + ( + (0.979624800125421, 0.9999999999691099), + (0.342108564072183, 0.9274847491748729), + (-0.9999999986740542, 0.9999999998869543), + ) +) + +DENORMALIZATION = np.array((0.0001143371095669865, 0.9994883064311412)) + +# NN Weights +# Layer 1: 5 hidden neurons +# 5 x 11 matrix +W1 = np.array( + ( + ( + -0.09299549787532572, + 0.03711751310275837, + 0.35917948087916934, + -2.0327599053936245, + -0.3004739931440174, + 5.081364269387806, + -0.5509229514856009, + -1.8459014400791363, + 0.04210879716286216, + -0.1433820536680042, + -0.0919637992244123, + ), + ( + 0.17782538722557306, + -0.3793824396587722, + -0.18316058499587165, + -0.8546862528226032, + -0.07553090207841909, + 2.1968612305059834, + -0.1734580018542482, + -0.89158072360678, + 0.017977829778812265, + 0.19161704265110313, + -0.020341567456493917, + ), + ( + -0.8964833683739212, + -0.6038768961220443, + -0.5995953059405849, + -0.15212446911598965, + 0.3889544003539062, + 1.9871015442471918, + -0.9746781245763875, + -0.28459612830995773, + -0.7195016395928718, + 0.4628341672035696, + 1.652035259226453, + ), + ( + -0.15296262636768043, + 0.17628558201043018, + 0.11212126329600514, + 1.5711153194443364, + 0.5209619736717268, + -3.068192837466073, + 0.1483332044127799, + 1.2331177561153577, + -0.02091226761957991, + -0.23041694611129848, + 0.0031568086031440803, + ), + ( + 1.7234228895153363, + -2.906528582039084, + -1.3938598383149996, + -1.6262956756929428, + 0.3326361580291295, + -0.8862583674506147, + -0.2185426118098439, + 0.5660635905206617, + -0.09949171171933309, + -0.35271418843339297, + 0.06514559686105968, + ), + ) +) +B1 = np.array( + ( + -1.886007283361096, + -0.02498619641898423, + 0.29510485628465327, + 0.0029300996499639458, + -3.359449911074414, + ) +) +# Layer 2: 1 output neuron +# 1 x 5 matrix +W2 = np.array( + ( + 0.21418510066217855, + 2.354410480678047, + 0.039929632100371135, + 1.5480571230482811, + -0.11310020940549115, + ) +) + +B2 = -0.15076057408085747 + + +def fcover_fun(raster: xr.DataArray, angles: xr.DataArray) -> xr.DataArray: + # Normalize bands + norm_bands = normalize(raster, BANDS_NORM[:, :1, None], BANDS_NORM[:, 1:, None]) + # Normalize angles before upsampling + zen_norm = normalize( + cast(xr.DataArray, np.cos(np.deg2rad(angles[[0, 2]]))), + ANGLES_NORM[:2, :1, None], + ANGLES_NORM[:2, 1:, None], + ) + rel_az_norm = cast( + xr.DataArray, + normalize( + np.cos(np.deg2rad(angles[3] - angles[1])), + ANGLES_NORM[2, :1, None], + ANGLES_NORM[2, 1:, None], + ), + ).expand_dims("band") + norm_angles = xr.concat((zen_norm, rel_az_norm), dim="band") + # Upsample angles to the same resolution as the band data + norm_angles = norm_angles.rio.reproject_match(norm_bands, resampling=Resampling.bilinear) + full_data = xr.concat((norm_bands, norm_angles), dim="band").to_numpy() + layer1 = np.tanh(W1.dot(full_data.transpose((1, 0, 2))) + B1[:, None, None]) + layer2 = np.tanh(W2.dot(layer1.transpose(1, 0, 2)) + B2) + fcover = denormalize(layer2, DENORMALIZATION[0], DENORMALIZATION[1])[None] + fcover = raster[:1].copy(data=fcover) # Copy metadata + return fcover + + +@overload +def normalize(unnormalized: NDArray[Any], min: NDArray[Any], max: NDArray[Any]) -> NDArray[Any]: ... + + +@overload +def normalize(unnormalized: xr.DataArray, min: NDArray[Any], max: NDArray[Any]) -> xr.DataArray: ... + + +def normalize(unnormalized: Any, min: NDArray[Any], max: NDArray[Any]): + return 2 * (unnormalized - min) / (np.subtract(max, min)) - 1 + + +@overload +def denormalize(normalized: NDArray[Any], min: NDArray[Any], max: NDArray[Any]) -> NDArray[Any]: ... + + +@overload +def denormalize(normalized: xr.DataArray, min: NDArray[Any], max: NDArray[Any]) -> xr.DataArray: ... + + +def denormalize(normalized: Any, min: NDArray[Any], max: NDArray[Any]): + return 0.5 * (normalized + 1) * (np.subtract(max, min)) + min + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def fcover_callback(raster: Raster, angles: Raster) -> Dict[str, Raster]: + r = load_raster(raster, bands=BANDS, use_geometry=True) * raster.scale + raster.offset + a = load_raster(angles, use_geometry=True) + fcover = fcover_fun(r, a) + asset = save_raster_to_asset(fcover, self.tmp_dir.name) + vis_dict = { + "bands": [0], + "colormap": get_cmap("viridis"), + "range": (0, 1), + } + out_raster = Raster.clone_from( + raster, + id=gen_guid(), + assets=[asset, json_to_asset(vis_dict, self.tmp_dir.name)], + bands={"fcover": 0}, + ) + return {"fcover": out_raster} + + return fcover_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_ghg_fluxes/compute_ghg_fluxes.py b/ops/compute_ghg_fluxes/compute_ghg_fluxes.py new file mode 100644 index 00000000..53b704ef --- /dev/null +++ b/ops/compute_ghg_fluxes/compute_ghg_fluxes.py @@ -0,0 +1,1043 @@ +from copy import copy +from dataclasses import asdict, dataclass +from enum import Enum, IntEnum, auto +from typing import Dict, List, Optional, Tuple, Union + +import geopandas +from pyproj import Geod +from shapely import geometry as shpg + +from vibe_core.data import GHGFlux, GHGProtocolVibe, gen_hash_id + + +@dataclass(frozen=True, unsafe_hash=True) +class Fertilizer: + source: str + details: str + co2: float + n2o: float + nitrogen_ratio: float + unit: str + + +@dataclass +class GHG: + name: str + details: str + factor: float + + +@dataclass +class EmissionFactor: + value: float + unit: str + details: Optional[str] = None + + +class FertilizerType(Enum): + SYNTHETIC = auto() + UREA = auto() + LIMESTONE_CALCITE = auto() + LIMESTONE_DOLOMITE = auto() + GYPSUM = auto() + MANURE = auto() + MANURE_BIRDS = auto() + ORGANIC_COMPOUND = auto() + GENERIC_ORGANIC_COMPOUND = auto() + FILTER_CAKE = auto() + VINASSE = auto() + + +class CropType(Enum): + SOYBEAN = auto() + CORN = auto() + BEANS = auto() + RICE = auto() + WHEAT = auto() + SUGARCANE = auto() + SUGARCANE_WITH_BURNING = auto() + COTTON = auto() + GREEN_MANURE_LEGUMES = auto() + GREEN_MANURE_GRASSES = auto() + GREEN_MANURE = auto() + + +class Biome(Enum): + US_FOREST = 1 + BRAZIL_AMAZON_FOREST = 2 + BRAZIL_AMAZON_SAVANNA = 3 + BRAZIL_CERRADO = 4 + BRAZIL_PANTANAL = 5 + BRAZIL_CAATINGA = 6 + BRAZIL_MATA_ATLANTICA = 7 + BRAZIL_PAMPA = 8 + + +class CurrentLandUse(Enum): + CONVENTIONAL_CROPS = auto() + DIRECT_SEEDING = auto() + SUGARCANE_WITH_BURNING = auto() + SUGARCANE_WITHOUT_BURNING = auto() + + +class PreviousLandUse(Enum): + CONVENTIONAL_CROPS = auto() + DIRECT_SEEDING = auto() + NATIVE = auto() + SUGARCANE_WITH_BURNING = auto() + SUGARCANE_WITHOUT_BURNING = auto() + + +BIOME_TO_CARBON_STOCK = { + Biome.US_FOREST: 88.39, # Source: EPA + Biome.BRAZIL_AMAZON_FOREST: 573.16, + Biome.BRAZIL_AMAZON_SAVANNA: 86.38, + Biome.BRAZIL_CERRADO: 115.92, + Biome.BRAZIL_PANTANAL: 150.52, + Biome.BRAZIL_CAATINGA: 159.57, + Biome.BRAZIL_MATA_ATLANTICA: 468.5, # average value + Biome.BRAZIL_PAMPA: 92.10, +} + + +GLOBAL_HEATING_POTENTIAL_GHG = { + "CO2": GHG("CO2", "Carbon dioxide", 1.0), + "N2O": GHG("N2O", "Nitrous oxide", 298.0), + "CH4": GHG("CH4", "Methane", 25.0), +} + +GHG_CONVERSION = { + "C_CO2": 3.66, + "CO_CO2": 1.57, + "N-N2O_N2O": 1.57, + "NOX_N2O": 0.96, +} + +WORLD_GASOLINE_MIXTURE = 1 - 0.1 +GASOLINE_MIXTURES = { # % gasoline + "Argentina": 1 - 0.05, + "Australia": 1 - 0.1, + "Brazil": 1 - 0.27, + "Canada": 1 - 0.05, + "China": 1 - 0.1, + "Colombia": 1 - 0.1, + "Costa Rica": 1 - 0.07, + "India": 1 - 0.2, + "Jamica": 1 - 0.1, + "Malawi": 1 - 0.1, + "Mexico": 1 - 0.6, + "New Zealand": 1 - 0.1, + "Pakistan": 1 - 0.1, + "Paraguay": 1 - 0.24, + "Peru": 1 - 0.08, + "Philippines": 1 - 0.1, + "Thailand": 1 - 0.2, + "Vietnam": 1 - 0.05, + "Austria": 1 - 0.1, + "Denmark": 1 - 0.05, + "Finland": 1 - 0.1, + "France": 1 - 0.1, + "Germany": 1 - 0.1, + "Ireland": 1 - 0.04, + "Netherlands": 1 - 0.15, + "Romania": 1 - 0.04, + "Sweden": 1 - 0.1, + "United States of America": 1 - 0.1, + "World": WORLD_GASOLINE_MIXTURE, +} + +# Emission factors {{{ + +FERTILIZER_SYNTHETIC = Fertilizer( + "Synthetic", "Except urea", 0.0, 0.01130, 0.0, "kg N2O/kg applied nitrogen" +) +FERTILIZER_UREA = Fertilizer("Urea", "", 0.73300, 0.00880, 45.0 / 100, "kg N2O/kg applied nitrogen") +FERTILIZER_LIMESTONE_CALCITE = Fertilizer( + "Limestone", "Calcite", 0.44000, 0, 0, "kg CO2/kg limestone" +) +FERTILIZER_LIMESTONE_DOLOMITE = Fertilizer( + "Limestone", "Dolomite", 0.47667, 0, 0, "kg CO2/kg limestone" +) +FERTILIZER_GYPSUM = Fertilizer("Agricultural Gypsum", "", 0.40000, 0, 0, "kg CO2/kg gypsum") +FERTILIZER_MANURE = Fertilizer( + "Manure", "Bovine, horse, pig, sheep", 0, 0.00020, 1.6 / 100, "kg N2O/kg manure" +) +FERTILIZER_MANURE_BIRDS = Fertilizer("Manure", "Birds", 0, 0.00038, 3.0 / 100, "kg N2O/kg manure") +FERTILIZER_ORGANIC_COMPOUND = Fertilizer( + "Organic compound", "", 0, 0.000176, 1.4 / 100, "kg N2O/kg manure" +) +FERTILIZER_GENERIC_ORGANIC = Fertilizer( + "Generic organic fertilizer", + "", + 0, + 0.000226285714285714, + 1.8 / 100, + "kg N2O/kg manure", +) +FERTILIZER_FILTER_CAKE = Fertilizer("Filter cake", "", 0, 2.35723, 1.4 / 100, "kg N2O/hectare-year") +FERTILIZER_VINASSE = Fertilizer("Vinasse", "", 0, 0.00001, 0.0313 / 100, "kg N2O/filter") + +C_N2O_FLOW_RATE = 0.0075 # kg N2O/kg N applied +C_FRAC_GAS_F = 0.1 # Fraction of N2O emitted as gas +C_FRAC_LEACH = 0.3 # Fraction of N leached +C_N2O_VOLATILIZATION = 0.02 # kg N2O/kg N applied +N2O_RESIDUE = 0.20 # Ratio +N2O_ATMOSPHERIC_VOLATIZATION_RATE = 0.01 # kg N2O-N/kg N +N2O_SOIL_LOSS = 0.0188571428571429 # N2O tonnes / ha / year +CO2EQ_SOIL_EMISSIONS = 73.3333333333 # CO2eq tonnes / ha -- tropical / subtropical + +FOREST_TO_CROPLAND_CARBON_STOCK = 88.39 # tonnes CO2 / ha -- reference: EPA +# https://www.epa.gov/energy/greenhouse-gases-equivalencies-calculator-calculations-and-references + +HIGH_CLAY_CONTENT_EMISSION_FACTOR = 0.16 # tonnes CO2 / ha year +LOW_CLAY_CONTENT_EMISSION_FACTOR = 0.92 # tonnes CO2 / ha year +CLAY_CONTENT_THRESHOLD = 0.6 +FOREST_STR = "forest" + +RESIDUES = { + CropType.SOYBEAN: EmissionFactor(0.000243624857142857, "kg N2O/kg product"), + CropType.CORN: EmissionFactor(0.000162963428571429, "kg N2O/kg product"), + CropType.BEANS: EmissionFactor(0.000346297285714286, "kg N2O/kg product"), + CropType.RICE: EmissionFactor(0.00011484, "kg N2O/kg product"), + CropType.WHEAT: EmissionFactor(0.000177728571428571, "kg N2O/kg product"), + CropType.SUGARCANE: EmissionFactor(0.0000170657142857143, "kg N2O/kg product"), + CropType.SUGARCANE_WITH_BURNING: EmissionFactor(0.00000341314285714286, "kg N2O/kg product"), + CropType.COTTON: EmissionFactor(0.000361428571428571, "kg N2O/kg product"), + CropType.GREEN_MANURE_LEGUMES: EmissionFactor(0.000382380952380952, "kg N2O/kg product"), + CropType.GREEN_MANURE_GRASSES: EmissionFactor(0.000158015873015873, "kg N2O/kg product"), + CropType.GREEN_MANURE: EmissionFactor(0.000247761904761905, "kg N2O/kg product"), +} + +ENERGY_FACTORS_BY_COUNTRY = { # {{{ + "Albania": 0.003095364, + "Algeria": 0.159542831, + "Angola": 0.065773567, + "Argentina": 0.098421175, + "Armenia": 0.029916277, + "Australia": 0.236261887, + "Austria": 0.045215264, + "Azerbaijan": 0.12282867, + "Bahrain": 0.184169045, + "Bangladesh": 0.162124582, + "Belarus": 0.083745465, + "Belgium": 0.060356361, + "Benin": 0.200827188, + "Bolivia": 0.108945513, + "Bosnia & Herzegovina": 0.214942416, + "Brazil": 0.017763401, + "Brunei Darussalam": 0.209197436, + "Bulgaria": 0.128374791, + "Cameroon": 0.067222554, + "Canada": 0.046323264, + "Chile": 0.10327114, + "China": 0.205740504, + "Chinese Taipei": 0.175834586, + "Colombia": 0.048550178, + "Congo Dem. Rep.": 0.000812663, + "Costa Rica": 0.011027675, + "Cote d'Ivoire": 0.118112301, + "Croatia": 0.078498393, + "Cuba": 0.208176358, + "Cyprus": 0.206163455, + "Czech Republic": 0.142448081, + "Denmark": 0.083861473, + "Dominican Republic": 0.163578361, + "Ecuador": 0.080231471, + "Egypt": 0.128953112, + "El Salvador": 0.088302753, + "Eritrea": 0.186029433, + "Estonia": 0.194967281, + "Ethiopia": 0.032771621, + "Finland": 0.056897185, + "France": 0.024888727, + "Gabon": 0.089232088, + "Georgia": 0.035633834, + "Germany": 0.119247392, + "Ghana": 0.051669336, + "Greece": 0.200104523, + "Guatemala": 0.096702916, + "Haiti": 0.15155357, + "Honduras": 0.095377471, + "Hong Kong (China)": 0.211343355, + "Hungary": 0.08367062, + "Iceland": 0.000117448, + "India": 0.263539434, + "Indonesia": 0.206556241, + "Iran Islamic Rep.": 0.174499668, + "Iraq": 0.18949185, + "Ireland": 0.128871203, + "Israel": 0.192482591, + "Italy": 0.107035847, + "Jamaica": 0.150815254, + "Japan": 0.114874393, + "Jordan": 0.160811796, + "Kazakhstan": 0.132976177, + "Kenya": 0.109399904, + "Korea, Dem Rep. of": 0.138185411, + "Korea, Rep. of": 0.137862623, + "Kuwait": 0.24088064, + "Kyrgyzstan": 0.02242761, + "Latvia": 0.042401553, + "Lebanon": 0.198518255, + "Libya": 0.241484223, + "Lithuania": 0.030789353, + "Luxembourg": 0.106447222, + "Malaysia": 0.179675413, + "Malta": 0.235565038, + "Mexico": 0.126030291, + "Moldova": 0.110827257, + "Morocco": 0.176849154, + "Mozambique": 0.000139414, + "Myanmar": 0.054249536, + "Namibia": 0.06562166, + "Nepal": 0.001203039, + "Netherlands": 0.103732345, + "Netherlands Antilles": 0.195810829, + "New Zealand": 0.046121331, + "Nicaragua": 0.140110007, + "Nigeria": 0.115158373, + "Norway": 0.004788776, + "Oman": 0.233369148, + "Pakistan": 0.126789742, + "Panama": 0.08370217, + "Paraguay": 0, + "Peru": 0.065456928, + "Philippines": 0.132450182, + "Poland": 0.177334846, + "Portugal": 0.102001926, + "Qatar": 0.136840465, + "Romania": 0.114776003, + "Russia": 0.087920908, + "Saudi Arabia": 0.209753403, + "Senegal": 0.170202733, + "Serbia & Montenegro": 0.188450468, + "Singapore": 0.143723334, + "Slovak Republic": 0.061415332, + "Slovenia": 0.087538925, + "South Africa": 0.256475657, + "Spain": 0.082763168, + "Sri Lanka": 0.127450138, + "Sudan": 0.098500037, + "Sweden": 0.011948395, + "Switzerland": 0.011061718, + "Syria": 0.177526918, + "Tajikistan": 0.008095713, + "Tanzania United Rep.": 0.077898522, + "Thailand": 0.142206509, + "The former Yugoslav Republic of Macedonia": 0.196643297, + "Togo": 0.055835278, + "Trinidad & Tobago": 0.19910965, + "Tunisia": 0.149048022, + "Turkey": 0.132940333, + "Turkmenistan": 0.218678315, + "Ukraine": 0.103585535, + "United Arab Emirates": 0.174855004, + "United Kingdom": 0.124511777, + "United States": 0.14076309, + "Uruguay": 0.070130528, + "Uzbekistan": 0.127828824, + "Venezuela": 0.055011591, + "Vietnam": 0.106396642, + "Yemen": 0.174635675, + "Zambia": 0.000899308, + "Zimbabwe": 0.171449399, + "Africa": 0.178111, + "Asia": 0.206365, + "Central and Eastern Europe": 0.093903, + "China (including Hong Kong)": 0.205811, + "Former USSR": 0.096388889, + "Latin America": 0.048475, + "Middle East": 0.19113, + "Rest of Europe": 0.107222222, +} # }}} + + +# }}} Emission factors + + +class Scope(IntEnum): + SCOPE_1 = 1 + SCOPE_2 = 2 + SCOPE_3 = 3 + + +@dataclass +class Emissions: + scope: Scope + source: str + co2: float = 0.0 + n2o: float = 0.0 + ch4: float = 0.0 + + CO2_CO2EQ = GHG("CO2", "Carbon dioxide", 1.0) + N2O_CO2EQ = GHG("N2O", "Nitrous oxide", 298.0) + CH4_CO2EQ = GHG("CH4", "Methane", 25.0) + + @property + def total(self): + # co2 equivalent + return self.co2 + self.n2o * self.N2O_CO2EQ.factor + self.ch4 * self.CH4_CO2EQ.factor + + def __add__(self, other: "Emissions") -> "Emissions": + return Emissions( + scope=self.scope, + source=self.source + " / " + other.source, + co2=self.co2 + other.co2, + n2o=self.n2o + other.n2o, + ch4=self.ch4 + other.ch4, + ) + + def __rmul__(self, scalar: float) -> "Emissions": + return Emissions( + scope=self.scope, + source=self.source, + co2=self.co2 * scalar, + n2o=self.n2o * scalar, + ch4=self.ch4 * scalar, + ) + + +class FuelType(Enum): + DIESEL = 1 + DIESEL_B2 = 2 + DIESEL_B5 = 3 + DIESEL_B6 = 4 + DIESEL_B7 = 5 + DIESEL_B8 = 6 + DIESEL_B9 = 7 + DIESEL_B10 = 8 + GASOLINE = 9 + BIODIESEL = 10 + ETHANOL_ANHYDROUS = 11 + ETHANOL_HYDRATED = 12 + + +FUEL_COMPOSITION = { # 1 - Diesel = Biodiesel + FuelType.DIESEL: 1.0, + FuelType.DIESEL_B2: 0.98, + FuelType.DIESEL_B5: 0.95, + FuelType.DIESEL_B6: 0.94, + FuelType.DIESEL_B7: 0.93, + FuelType.DIESEL_B8: 0.92, + FuelType.DIESEL_B9: 0.91, + FuelType.DIESEL_B10: 0.9, +} + +AVERAGE_FUEL_CONSUMPTION = 20 # liters per hour +FUEL_EMISSION_FACTORS: Dict[FuelType, Emissions] = { + k: v * Emissions(Scope.SCOPE_1, k.name, co2=0.002681, n2o=0.00000002, ch4=0.00000030) + for k, v in FUEL_COMPOSITION.items() + if k != FuelType.GASOLINE +} +FUEL_EMISSION_FACTORS[FuelType.GASOLINE] = Emissions( + Scope.SCOPE_1, "Gasoline", co2=0.002212, n2o=0.0, ch4=0.0 +) +FUEL_EMISSION_FACTORS[FuelType.ETHANOL_ANHYDROUS] = Emissions( + Scope.SCOPE_1, "Ethanol anhydrous", co2=0.001526, n2o=0.0, ch4=0.0 +) +FUEL_EMISSION_FACTORS[FuelType.ETHANOL_HYDRATED] = Emissions( + Scope.SCOPE_1, "Ethanol hydrated", co2=0.001457, n2o=0.0, ch4=0.0 +) +FUEL_EMISSION_FACTORS[FuelType.BIODIESEL] = Emissions( + Scope.SCOPE_1, "Biodiesel", co2=0.002499, n2o=0.0, ch4=0.0 +) + +BURNING_EMISSION_FACTORS = { + CropType.BEANS: Emissions( + Scope.SCOPE_1, + "Biomass Burning (Beans)", + co2=GHG_CONVERSION["CO_CO2"] * 0.0734272, + n2o=0.000288464 + GHG_CONVERSION["NOX_N2O"] * 0.0104259131428571, + ch4=0.00349653333333333, + ), + CropType.CORN: Emissions( + Scope.SCOPE_1, + "Biomass Burning (Corn)", + co2=GHG_CONVERSION["CO_CO2"] * 0.078583792, + n2o=0.000123488816 + GHG_CONVERSION["NOX_N2O"] * 0.00446323863542857, + ch4=0.00374208533333333, + ), + CropType.COTTON: Emissions( + Scope.SCOPE_1, + "Biomass Burning (Cotton)", + co2=GHG_CONVERSION["CO_CO2"] * 0.10773, + n2o=0.000355509 + GHG_CONVERSION["NOX_N2O"] * 0.012849111, + ch4=0.00513, + ), + CropType.RICE: Emissions( + Scope.SCOPE_1, + "Biomass Burning (Rice)", + co2=GHG_CONVERSION["CO_CO2"] * 0.04873344, + n2o=0.000053606784 + GHG_CONVERSION["NOX_N2O"] * 0.001937502336, + ch4=0.00232064, + ), + CropType.SOYBEAN: Emissions( + Scope.SCOPE_1, + "Biomass Burning (Soybeans)", + co2=GHG_CONVERSION["CO_CO2"] * 0.0975744, + n2o=0.000383328 + GHG_CONVERSION["NOX_N2O"] * 0.0138545691428571, + ch4=0.0046464, + ), + CropType.SUGARCANE: Emissions( + Scope.SCOPE_1, + "Biomass Burning (Sugarcane)", + co2=GHG_CONVERSION["CO_CO2"] * 0.00793636844, + n2o=0.0000186425657631827 + GHG_CONVERSION["NOX_N2O"] * 0.000673795591155031, + ch4=0.000377922306666667, + ), + CropType.WHEAT: Emissions( + Scope.SCOPE_1, + "Biomass Burning (Wheat)", + co2=GHG_CONVERSION["CO_CO2"] * 0.058212, + n2o=0.0000548856 + GHG_CONVERSION["NOX_N2O"] * 0.0019837224, + ch4=0.002772, + ), +} + +GREEN_MANURE_CAPTURE_FACTOR = -1.835 # tonnes of CO2 per hectare + + +def geometry_to_country_name( + polygon: Union[ + shpg.Polygon, + shpg.MultiPolygon, + shpg.Point, + shpg.LineString, + shpg.LinearRing, + shpg.MultiLineString, + shpg.GeometryCollection, + ], +) -> str: + # Use geopandas "naturalearth_lowres" dataset + df = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres")) # type: ignore + df = df[df.geometry.intersects(polygon)] + + assert df is not None, "There is not intersection between the geometry, and any country" + if len(df) == 0: + return "World" + return df.iloc[0]["name"] + + +def get_land_use_change_factor( + previous_land_use: PreviousLandUse, + current_land_use: CurrentLandUse, + biome: Biome, + high_clay_content: bool, +): + if previous_land_use.name == current_land_use.name: + return 0.0 + if previous_land_use == PreviousLandUse.DIRECT_SEEDING: + if current_land_use == CurrentLandUse.CONVENTIONAL_CROPS: + return 0.9167 + elif previous_land_use == PreviousLandUse.CONVENTIONAL_CROPS: + if current_land_use == CurrentLandUse.SUGARCANE_WITH_BURNING: + return -2.09 + elif current_land_use == CurrentLandUse.DIRECT_SEEDING: + return -1.52 + elif previous_land_use == PreviousLandUse.NATIVE: + if current_land_use == CurrentLandUse.CONVENTIONAL_CROPS and high_clay_content: + return 0.1613 + elif current_land_use == CurrentLandUse.CONVENTIONAL_CROPS and not high_clay_content: + return 0.9167 + elif current_land_use == CurrentLandUse.SUGARCANE_WITH_BURNING: + return 3.1203 + elif current_land_use == CurrentLandUse.DIRECT_SEEDING: + if biome == Biome.BRAZIL_CERRADO: + return -0.44 + elif biome == Biome.BRAZIL_AMAZON_SAVANNA or biome == Biome.BRAZIL_AMAZON_FOREST: + return 0.88 + return 0.0 # we don't know what this is, so we return 0 + + +class CropEmission: + """General calculation method for emissions from a crop type. + + Computation should be correct for the following crops: + - wheat + - corn + - cotton + - soybeans + + :param crop_type: Crop type + :param cultivation_area: Cultivation area in hectares + """ + + def __init__(self, crop_type: CropType, cultivation_area: float): + self.cultivation_area = cultivation_area / 1000.0 + self.crop_type = crop_type + + if crop_type not in [ + CropType.WHEAT, + CropType.CORN, + CropType.COTTON, + CropType.SOYBEAN, + ]: + raise ValueError("Crop type not supported") + + def fuel_emissions( + self, + fuel_consumptions: List[Tuple[FuelType, float]], + scope: Scope = Scope.SCOPE_1, + desc: str = "", + gasoline_mixture: float = WORLD_GASOLINE_MIXTURE, + ) -> Emissions: + emissions = Emissions(scope, desc) + for fuel_type, fuel_consumption in fuel_consumptions: + tmp = copy(FUEL_EMISSION_FACTORS[fuel_type]) + tmp.scope = scope + emissions += fuel_consumption * tmp + if "DIESEL" in fuel_type.name: + emissions += ( + fuel_consumption + * (1 - FUEL_COMPOSITION[fuel_type]) + * FUEL_EMISSION_FACTORS[FuelType.BIODIESEL] + ) + elif "GASOLINE" in fuel_type.name: + emissions += ( + fuel_consumption + * (1 - gasoline_mixture) + * FUEL_EMISSION_FACTORS[FuelType.ETHANOL_ANHYDROUS] + ) + return emissions + + def biomass_burning_emissions( + self, average_yield: float, burn_area: float, scope: Scope = Scope.SCOPE_1 + ) -> Emissions: + tmp = copy(BURNING_EMISSION_FACTORS[self.crop_type]) + tmp.scope = scope + return average_yield * burn_area * tmp + + def initial_carbon_stock(self, biome: str = "", previous_land_use: str = "") -> Emissions: + if biome.upper() not in Biome.__members__ or "native" not in previous_land_use.lower(): + return Emissions(Scope.SCOPE_1, "Initial carbon stock") + stock = BIOME_TO_CARBON_STOCK[Biome[biome.upper()]] + return Emissions( + Scope.SCOPE_1, + "Initial carbon stock", + co2=(stock * self.cultivation_area * 1000), + ) + + def carbon_capture( + self, + cultivation_area: float, + green_manure_amount: float = 0.0, + green_manure_grass_amount: float = 0.0, + freen_fertilizer_legumes_amount: float = 0.0, + ) -> Emissions: + total_capture = ( + cultivation_area + * GREEN_MANURE_CAPTURE_FACTOR + * any( + ( + green_manure_amount, + green_manure_grass_amount, + freen_fertilizer_legumes_amount, + ) + ) + ) + return Emissions( + Scope.SCOPE_1, + "Carbon captured by Green Manure", + co2=total_capture, + ) + + def land_use_emissions( + self, + biome: str = "", + previous_land_use: str = "", + cultivation_area: float = 0.0, + current_land_use: str = "", + clay_content: float = 0.0, + ) -> Emissions: + try: + previous = PreviousLandUse[previous_land_use.upper()] + except Exception: + for land_use in PreviousLandUse: + if previous_land_use.upper() in land_use.name: + previous = land_use + break + raise ValueError( + f"Previous land use {previous_land_use} not supported. " + f"Supported values: {PreviousLandUse.__members__}" + ) + try: + current = CurrentLandUse[current_land_use.upper()] + except Exception: + for land_use in CurrentLandUse: + if current_land_use.upper() in land_use.name: + current = land_use + break + raise ValueError( + f"Current land use {current_land_use} not supported. " + f"Supported values: {CurrentLandUse.__members__}" + ) + return ( + cultivation_area + * get_land_use_change_factor( + previous, + current, + Biome[biome.upper()], + clay_content > CLAY_CONTENT_THRESHOLD, + ) + * Emissions(Scope.SCOPE_1, "Land use change", co2=1.0) + ) + + def fertilizer_emissions( + self, + average_yield: float = 0.0, + urea_amount: float = 0.0, + gypsum_amount: float = 0.0, + limestone_calcite_amount: float = 0.0, + limestone_dolomite_amount: float = 0.0, + synthetic_fertilizer_amount: float = 0.0, + synthetic_fertilizer_nitrogen_ratio: float = 0.0, + manure_amount: float = 0.0, + manure_birds_amount: float = 0.0, + organic_compound_amount: float = 0.0, + organic_other_amount: float = 0.0, + green_manure_amount: float = 0.0, + green_manure_grass_amount: float = 0.0, + green_manure_legumes_amount: float = 0.0, + soil_management_area: float = 0.0, + ) -> Dict[str, Emissions]: + leached_rate = C_N2O_FLOW_RATE * GHG_CONVERSION["N-N2O_N2O"] * C_FRAC_LEACH + return { + "Urea": Emissions( # ✅ + scope=Scope.SCOPE_1, + source="Fertilizer emissions, urea", + co2=FERTILIZER_UREA.co2 * urea_amount * self.cultivation_area, + n2o=FERTILIZER_UREA.n2o + * (urea_amount * FERTILIZER_UREA.nitrogen_ratio) + * self.cultivation_area, + ), + "Liming, gypsum": ( + Emissions( # ✅ + scope=Scope.SCOPE_1, + source="Fertilizer emissions, gypsum", + co2=gypsum_amount * FERTILIZER_GYPSUM.co2 * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Fertilizer emissions, limestone, calcite", + co2=limestone_calcite_amount + * FERTILIZER_LIMESTONE_CALCITE.co2 + * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Fertilizer emissions, limestone, dolomite", + co2=limestone_dolomite_amount + * FERTILIZER_LIMESTONE_DOLOMITE.co2 + * self.cultivation_area, + ) + ), + "Synthetic nitrogen fertilizer": Emissions( # ✅ + scope=Scope.SCOPE_1, + source="Fertilizer emissions, synthetic nitrogen fertilizer", + n2o=FERTILIZER_SYNTHETIC.n2o + * (synthetic_fertilizer_amount * synthetic_fertilizer_nitrogen_ratio) + * self.cultivation_area, + ), + "Organic fertilizers": ( + Emissions( # ✅ + scope=Scope.SCOPE_1, + source="Fertilizer emissions, manure", + n2o=manure_amount * FERTILIZER_MANURE.n2o * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Fertilizer emissions, bird manure", + n2o=manure_birds_amount * FERTILIZER_MANURE_BIRDS.n2o * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Fertilizer emissions, organic fertilizer", + n2o=organic_compound_amount + * FERTILIZER_ORGANIC_COMPOUND.n2o + * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Fertilizer emissions, organic others", + n2o=organic_other_amount + * FERTILIZER_GENERIC_ORGANIC.n2o + * self.cultivation_area, + ) + ), + "Leaching / Surface runoff": ( + Emissions( # ✅ + scope=Scope.SCOPE_1, + source="Flow emissions, surface runoff, urea", + n2o=(urea_amount * FERTILIZER_UREA.nitrogen_ratio) + * leached_rate + * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Flow emissions, surface runoff, synthetic fertilizer", + n2o=(synthetic_fertilizer_amount * synthetic_fertilizer_nitrogen_ratio) + * leached_rate + * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Flow emissions, surface runoff, organic fertilizer", + n2o=(organic_compound_amount * FERTILIZER_ORGANIC_COMPOUND.nitrogen_ratio) + * leached_rate + * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Flow emissions, surface runoff, manure", + n2o=(manure_amount * FERTILIZER_MANURE.nitrogen_ratio) + * leached_rate + * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Flow emissions, surface runoff, manure, bird", + n2o=(manure_birds_amount * FERTILIZER_MANURE_BIRDS.nitrogen_ratio) + * leached_rate + * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Flow emissions, surface runoff, organic, other", + n2o=(organic_other_amount * FERTILIZER_GENERIC_ORGANIC.nitrogen_ratio) + * leached_rate + * self.cultivation_area, + ) + ), + "Atmospheric emissions, N2O": ( + Emissions( + scope=Scope.SCOPE_1, + source="Atmospheric emissions, N2O, Urea", + n2o=urea_amount + * FERTILIZER_UREA.nitrogen_ratio + * C_FRAC_GAS_F + * N2O_ATMOSPHERIC_VOLATIZATION_RATE + * GHG_CONVERSION["N-N2O_N2O"] + * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Atmospheric emissions, N2O, Synthetic nitrogen fertilizer", + n2o=synthetic_fertilizer_amount + * synthetic_fertilizer_nitrogen_ratio + * C_FRAC_GAS_F + * N2O_ATMOSPHERIC_VOLATIZATION_RATE + * GHG_CONVERSION["N-N2O_N2O"] + * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Atmospheric emissions, N2O, Organic fertilizer", + n2o=organic_compound_amount + * FERTILIZER_ORGANIC_COMPOUND.nitrogen_ratio + * C_FRAC_GAS_F + * C_N2O_VOLATILIZATION + * GHG_CONVERSION["N-N2O_N2O"] + * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Atmospheric emissions, N2O, Manure", + n2o=manure_amount + * FERTILIZER_MANURE.nitrogen_ratio + * C_FRAC_GAS_F + * C_N2O_VOLATILIZATION + * GHG_CONVERSION["N-N2O_N2O"] + * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Atmospheric emissions, N2O, Manure, Birds", + n2o=manure_birds_amount + * FERTILIZER_MANURE_BIRDS.nitrogen_ratio + * C_FRAC_GAS_F + * C_N2O_VOLATILIZATION + * GHG_CONVERSION["N-N2O_N2O"] + * self.cultivation_area, + ) + + Emissions( + scope=Scope.SCOPE_1, + source="Atmospheric emissions, N2O, Organic, other", + n2o=organic_other_amount + * FERTILIZER_GENERIC_ORGANIC.nitrogen_ratio + * C_FRAC_GAS_F + * C_N2O_VOLATILIZATION + * GHG_CONVERSION["N-N2O_N2O"] + * self.cultivation_area, + ) + ), + "Residue decomposition": ( + Emissions( + scope=Scope.SCOPE_1, + source="Residue decomposition", + n2o=( + (average_yield * RESIDUES[self.crop_type].value) + + (green_manure_amount / 1000 * RESIDUES[CropType.GREEN_MANURE].value) + + ( + green_manure_grass_amount + / 1000 + * RESIDUES[CropType.GREEN_MANURE_GRASSES].value + ) + + ( + green_manure_legumes_amount + / 1000 + * RESIDUES[CropType.GREEN_MANURE_LEGUMES].value + ) + ) + * 10, + ) + ), + "Soil management": ( + Emissions( + scope=Scope.SCOPE_1, + source="Soil management", + co2=soil_management_area * CO2EQ_SOIL_EMISSIONS, + ) + ), + } + + +class CallbackBuilder: + def __init__( + self, + crop_type: str, + ): + if crop_type.upper() not in CropType.__members__: + raise ValueError(f"Unsupported crop type: {crop_type}") + self.crop_type = CropType[crop_type.upper()] + + def __call__(self): + def emissions_callback(ghg: GHGProtocolVibe) -> Dict[str, List[GHGFlux]]: + geometry = shpg.shape(ghg.geometry) + country_name = geometry_to_country_name(geometry) # type: ignore + gasoline_mixture = GASOLINE_MIXTURES.get(country_name, GASOLINE_MIXTURES["World"]) + + if ghg.cultivation_area: + area_ha = ghg.cultivation_area + else: + geod = Geod(ellps="WGS84") + area = abs(geod.geometry_area_perimeter(geometry)[0]) # in m^2 + area_ha = area / 10000 # in ha + + fuel_consumptions = [] + if ghg.diesel_amount != 0: + if ghg.diesel_type is None: + raise ValueError("Diesel amount is not zero, but diesel type is not specified") + fuel_consumptions.append( + ( + FuelType[ghg.diesel_type.upper()], + ghg.diesel_amount, + ) + ) + if ghg.gasoline_amount != 0: + fuel_consumptions.append( + ( + FuelType.GASOLINE, + ghg.gasoline_amount * gasoline_mixture if ghg.gasoline_amount else 0.0, + # The above can be done because all equations are linear + ) + ) + + if not ghg.total_yield: + raise ValueError("Total yield is not specified") + + crop_emission = CropEmission(self.crop_type, area_ha) + internal_operations_emissions = crop_emission.fuel_emissions( + fuel_consumptions, + Scope.SCOPE_1, + "Internal operations", + gasoline_mixture, + ) + transport_emissions = crop_emission.fuel_emissions( + [ + ( + FuelType[ + ghg.transport_diesel_type.upper() + if ghg.transport_diesel_type + else "DIESEL" + ], + ghg.transport_diesel_amount if ghg.transport_diesel_amount else 0.0, + ) + ], + Scope.SCOPE_3, + "Transportation", + gasoline_mixture, + ) + fertilizer_parameters = dict( + average_yield=ghg.total_yield / area_ha, + urea_amount=ghg.urea_amount if ghg.urea_amount else 0, + gypsum_amount=ghg.gypsum_amount if ghg.gypsum_amount else 0, + limestone_calcite_amount=ghg.limestone_calcite_amount, + limestone_dolomite_amount=ghg.limestone_dolomite_amount, + synthetic_fertilizer_amount=ghg.synthetic_fertilizer_amount, + synthetic_fertilizer_nitrogen_ratio=ghg.synthetic_fertilizer_nitrogen_ratio, + manure_amount=ghg.manure_amount, + manure_birds_amount=ghg.manure_birds_amount, + organic_compound_amount=ghg.organic_compound_amount, + organic_other_amount=ghg.organic_other_amount, + green_manure_amount=ghg.green_manure_amount, + green_manure_grass_amount=ghg.green_manure_grass_amount, + green_manure_legumes_amount=ghg.green_manure_legumes_amount, + soil_management_area=ghg.soil_management_area + if ghg.soil_management_area + else area_ha, + ) + fertilizer_parameters = { + k: v if v is not None else 0.0 for k, v in fertilizer_parameters.items() + } + + fertilizer_emissions = crop_emission.fertilizer_emissions(**fertilizer_parameters) + initial_carbon_stock = crop_emission.initial_carbon_stock( + ghg.biome, ghg.previous_land_use + ) + biomass_burning_emissions = crop_emission.biomass_burning_emissions( + average_yield=ghg.total_yield / area_ha, + burn_area=ghg.burn_area if ghg.burn_area else 0.0, + ) + carbon_capture = crop_emission.carbon_capture( + area_ha, + ghg.green_manure_amount if ghg.green_manure_amount else 0.0, + ghg.green_manure_grass_amount if ghg.green_manure_grass_amount else 0.0, + ghg.green_manure_legumes_amount if ghg.green_manure_legumes_amount else 0.0, + ) + land_use_emissions = crop_emission.land_use_emissions( + ghg.biome, + ghg.previous_land_use, + area_ha, + ghg.current_land_use, + ghg.soil_clay_content if ghg.soil_clay_content else 0.0, + ) + + emissions = ( + [internal_operations_emissions] + + [e for e in fertilizer_emissions.values()] + + [initial_carbon_stock] + + [transport_emissions] + + [biomass_burning_emissions] + + [carbon_capture] + + [land_use_emissions] + ) + return { + "fluxes": [ + GHGFlux( + id=gen_hash_id( + f"ghg_{e.scope}_{e.source}_{asdict(ghg)}", + ghg.geometry, + ghg.time_range, + ), + time_range=ghg.time_range, + geometry=ghg.geometry, + scope=str(e.scope.value), + value=e.total, + description=e.source, + assets=[], + ) + for e in emissions + ] + } + + return emissions_callback diff --git a/ops/compute_ghg_fluxes/compute_ghg_fluxes.yaml b/ops/compute_ghg_fluxes/compute_ghg_fluxes.yaml new file mode 100644 index 00000000..11954e47 --- /dev/null +++ b/ops/compute_ghg_fluxes/compute_ghg_fluxes.yaml @@ -0,0 +1,15 @@ +name: compute_ghg_fluxes +inputs: + ghg: GHGProtocolVibe +output: + fluxes: List[GHGFlux] +parameters: + crop_type: "" +entrypoint: + file: compute_ghg_fluxes.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - crop_type +description: + short_description: Computes Green House Gas emission fluxes based on emission factors based on IPCC methodology. diff --git a/ops/compute_ghg_fluxes/test_ghg_fluxes.py b/ops/compute_ghg_fluxes/test_ghg_fluxes.py new file mode 100644 index 00000000..4c7bee80 --- /dev/null +++ b/ops/compute_ghg_fluxes/test_ghg_fluxes.py @@ -0,0 +1,106 @@ +import os +from datetime import datetime +from math import isclose +from typing import Dict, List, cast + +import pytest +from shapely import geometry as shpg + +from vibe_core.data import GHGFlux, GHGProtocolVibe +from vibe_dev.testing.op_tester import OpTester + +YAML_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "compute_ghg_fluxes.yaml") + + +@pytest.fixture +def fake_ghg() -> GHGProtocolVibe: + return GHGProtocolVibe( + id="fake_id", + time_range=(datetime(2020, 1, 1), datetime(2021, 1, 1)), + geometry=shpg.mapping(shpg.box(-43.793839, -20.668953, -43.784183, -20.657266)), + assets=[], + cultivation_area=10, + total_yield=50, # average = 5 + soil_texture_class="sand", + soil_clay_content=0.1, + previous_land_use="native", + current_land_use="conventional_crops", + practice_adoption_period=10, + burn_area=4, + soil_management_area=2, + synthetic_fertilizer_amount=100, + synthetic_fertilizer_nitrogen_ratio=10 / 100.0, + urea_amount=3, + limestone_calcite_amount=11, + limestone_dolomite_amount=22, + gypsum_amount=33, + organic_compound_amount=44, + manure_amount=55, + manure_birds_amount=66, + organic_other_amount=77, + diesel_amount=10, + gasoline_amount=666, + ethanol_amount=42, + biome="BRAZIL_AMAZON_SAVANNA", + transport_diesel_type="DIESEL_B10", + transport_diesel_amount=790, + green_manure_amount=22, + green_manure_grass_amount=33, + green_manure_legumes_amount=44, + ) + + +def test_ghg_fluxes(fake_ghg: GHGProtocolVibe): + op_tester = OpTester(YAML_PATH) + parameters = {"crop_type": "cotton"} + op_tester.update_parameters(parameters) + + output = cast(Dict[str, List[GHGFlux]], op_tester.run(ghg=fake_ghg)) + assert output + + fluxes = {e.description: e.value for e in output["fluxes"]} + + assert isclose(fluxes["Fertilizer emissions, urea"], 0.06, abs_tol=0.01), fluxes[ + "Fertilizer emissions, urea" + ] + + gypsum = [v for k, v in fluxes.items() if ", gypsum" in k][0] # type: ignore + assert isclose(gypsum, 0.29, abs_tol=0.01), gypsum + + assert isclose( + fluxes["Fertilizer emissions, synthetic nitrogen fertilizer"], 0.34, abs_tol=0.01 + ), fluxes["Fertilizer emissions, synthetic nitrogen fertilizer"] + + s = "Fertilizer emissions, manure" + f = [v for k, v in fluxes.items() if s in k][0] # type: ignore + assert isclose(f, 0.18, abs_tol=0.01), (s, f) + + flow = [v for k, v in fluxes.items() if "Flow emissions" in k][0] # type: ignore + assert isclose(flow, 0.17, abs_tol=0.001), flow + + atmospheric = [v for k, v in fluxes.items() if "Atmospheric emissions" in k][0] # type: ignore + assert isclose(atmospheric, 0.098, abs_tol=0.001), atmospheric + + residue = [v for k, v in fluxes.items() if "Residue decomposition" in k][0] # type: ignore + assert isclose(residue, 5.4672, abs_tol=0.001), residue + + assert isclose(fluxes["Soil management"], 146.67, abs_tol=0.1), fluxes["Soil management"] + + s = "Internal operations" + internal = [v for k, v in fluxes.items() if s in k][0] # type: ignore + assert isclose(internal, 1.3027, abs_tol=0.001), (s, internal) + + s = "Initial carbon stock" + assert isclose(fluxes[s], 863.76, abs_tol=1), fluxes[s] + + s = "Transportation / DIESEL_B10 / Biodiesel" + assert isclose(fluxes[s], 2.1131, abs_tol=0.01), fluxes[s] + + s = "Biomass Burning (Cotton)" + assert isclose(fluxes[s], 81.58, abs_tol=0.1), fluxes[s] + + s = "Carbon captured by Green Manure" + assert isclose(fluxes[s], -18.35, abs_tol=0.1), fluxes[s] + + s = "Land use change" + assert isclose(fluxes[s], 9.167, abs_tol=0.1), fluxes[s] diff --git a/ops/compute_illuminance/compute_illuminance.py b/ops/compute_illuminance/compute_illuminance.py new file mode 100644 index 00000000..90b14eb0 --- /dev/null +++ b/ops/compute_illuminance/compute_illuminance.py @@ -0,0 +1,45 @@ +# pyright: reportUnknownMemberType=false +from typing import Dict, List, Union, cast + +import rasterio + +from vibe_core.data import RasterIlluminance, Sentinel2CloudMask, Sentinel2Raster +from vibe_lib.spaceeye.illumination import MIN_CLEAR_RATIO, masked_average_illuminance +from vibe_lib.spaceeye.utils import QUANTIFICATION_VALUE + + +def compute_illuminance(item: Sentinel2Raster, cloud_mask: Sentinel2CloudMask): + """ + Compute illuminance values one band at a time to save memory + """ + data_filepath = item.raster_asset.url + mask_filepath = cloud_mask.raster_asset.url + illuminance: List[float] = [] + with rasterio.open(mask_filepath) as src: + mask = src.read(1).astype(bool) + if mask.mean() < MIN_CLEAR_RATIO: + return None + with rasterio.open(data_filepath) as src: + # rasterio indexes bands starting with 1 + for i in range(1, cast(int, src.count + 1)): + x = src.read(i) / QUANTIFICATION_VALUE + illuminance.append(float(masked_average_illuminance(x, mask))) + + return RasterIlluminance.clone_from(item, id=item.id, assets=[], illuminance=illuminance) + + +class CallbackBuilder: + def __init__(self, num_workers: int): + self.num_workers = num_workers + + def __call__(self): + def callback( + rasters: List[Sentinel2Raster], cloud_masks: List[Sentinel2CloudMask] + ) -> Dict[str, List[RasterIlluminance]]: + results = [compute_illuminance(item, mask) for item, mask in zip(rasters, cloud_masks)] + results = cast(List[Union[RasterIlluminance, None]], results) + results = [r for r in results if r is not None] + + return {"illuminance": results} + + return callback diff --git a/ops/compute_illuminance/compute_illuminance.yaml b/ops/compute_illuminance/compute_illuminance.yaml new file mode 100644 index 00000000..de302175 --- /dev/null +++ b/ops/compute_illuminance/compute_illuminance.yaml @@ -0,0 +1,11 @@ +name: compute_illuminance +inputs: + rasters: List[Sentinel2Raster] + cloud_masks: List[Sentinel2CloudMask] +output: + illuminance: List[RasterIlluminance] +parameters: + num_workers: 6 +entrypoint: + file: compute_illuminance.py + callback_builder: CallbackBuilder diff --git a/ops/compute_index/compute_index.yaml b/ops/compute_index/compute_index.yaml new file mode 100644 index 00000000..54a822a2 --- /dev/null +++ b/ops/compute_index/compute_index.yaml @@ -0,0 +1,15 @@ +name: compute_index +inputs: + raster: Raster +output: + index: Raster +parameters: + index: ndvi +entrypoint: + file: index.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - index +description: + short_description: Computes `index` over the input raster. \ No newline at end of file diff --git a/ops/compute_index/index.py b/ops/compute_index/index.py new file mode 100644 index 00000000..f6eefda4 --- /dev/null +++ b/ops/compute_index/index.py @@ -0,0 +1,213 @@ +from collections import defaultdict +from tempfile import TemporaryDirectory +from typing import Any, Callable, Dict, List, cast + +import numpy as np +import spyndex +import xarray as xr +from scipy.ndimage import gaussian_filter +from sklearn.neighbors import NearestNeighbors + +from vibe_core.data import Raster +from vibe_lib.raster import ( + RGBA, + compute_index, + get_cmap, + interpolated_cmap_from_colors, + json_to_asset, + load_raster, + save_raster_from_ref, +) + +NDVI_CMAP_INTERVALS: List[float] = [ + -1.0, + -0.2, + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0, +] +NDVI_CMAP_COLORS: List[RGBA] = [ + RGBA(0, 0, 255, 255), + RGBA(0, 0, 38, 255), + RGBA(166, 0, 38, 255), + RGBA(214, 48, 38, 255), + RGBA(242, 110, 66, 255), + RGBA(252, 173, 97, 255), + RGBA(252, 224, 140, 255), + RGBA(255, 255, 191, 255), + RGBA(217, 240, 140, 255), + RGBA(166, 217, 107, 255), + RGBA(102, 189, 99, 255), + RGBA(26, 153, 79, 255), + RGBA(0, 102, 54, 255), +] + + +def compute_ndre(bands: xr.DataArray) -> xr.DataArray: + """ + Normalized difference red edge index + """ + re, nir = bands + ndre: xr.DataArray = (nir - re) / (nir + re) + ndre.rio.write_nodata(100, encoded=True, inplace=True) + return ndre + + +def compute_pri(bands: xr.DataArray) -> xr.DataArray: + """ + Photochemical reflectance index + """ + re, nir = bands + pri: xr.DataArray = (re) / (nir + re) + pri.rio.write_nodata(100, encoded=True, inplace=True) + return pri + + +def compute_reci(bands: xr.DataArray) -> xr.DataArray: + """ + Red-Edge Chlorophyll Vegetation Index + """ + re, nir = bands + reci: xr.DataArray = (nir / re) - 1 + reci.rio.write_nodata(100, encoded=True, inplace=True) + return reci + + +def compute_methane(bands: xr.DataArray, neighbors: int = 6, sigma: float = 1.8) -> xr.DataArray: + b12 = bands[-1].to_masked_array() + m = b12.mask + b12 = b12.filled(b12.mean()) + other_bands = bands[:-1].to_masked_array() + m = m | other_bands.mask.any(axis=0) + other_bands = other_bands.filled(other_bands.mean()) + b12 = gaussian_filter(b12, sigma).squeeze() + b12_f = b12.flatten() + other_bands = gaussian_filter(other_bands, sigma) + x = other_bands.reshape(other_bands.shape[0], -1).T + nn = NearestNeighbors(n_neighbors=neighbors).fit(x) + ref_b12_values = np.median( + b12_f[nn.kneighbors(x, return_distance=False)], # type: ignore + axis=1, + ).reshape(b12.shape) + index = (b12 - ref_b12_values) / ref_b12_values + methane_xr = bands[0].astype(np.float32).copy(data=np.ma.masked_array(index, mask=m)) + return methane_xr + + +def default_vis(): + return { + "colormap": interpolated_cmap_from_colors(NDVI_CMAP_COLORS, NDVI_CMAP_INTERVALS), + "range": (-1, 1), + } + + +class CallbackBuilder: + custom_indices: Dict[str, Callable[..., xr.DataArray]] = { + "methane": compute_methane, + "ndre": compute_ndre, + "pri": compute_pri, + "reci": compute_reci, + } + custom_index_bands: Dict[str, List[str]] = { + "methane": ["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B09", "B12"], + "ndre": ["RE1", "N"], + "pri": ["R", "N"], + "reci": ["RE1", "N"], + } + index_vis: Dict[str, Dict[str, Any]] = defaultdict( + default_vis, {"methane": {"colormap": get_cmap("gray"), "range": (-0.2, 0.2)}} + ) + + def __init__(self, index: str): + # the indices ndvi, evi, msevi and ndmi are now computed with spyndex + if ( + index not in spyndex.indices + and index.upper() not in spyndex.indices + and index not in self.custom_indices + ): + raise ValueError( + f"Operation compute_index called with unknown index {index}. " + f"Available indices are {list(spyndex.indices) + list(self.custom_indices.keys())}." + ) + self.tmp_dir = TemporaryDirectory() + if index in self.custom_indices.keys(): + self.name = index + self.index_fn = self.custom_indices[index] + else: + self.name = {i.upper(): i for i in spyndex.indices}[index.upper()] + + def check_raster_bands(self, raster: Raster, bands: List[str]) -> None: + if not set(bands).issubset(set(raster.bands)): + raise ValueError( + f"Raster does not contain bands {bands} needed to compute index {self.name}. " + f"Bands in input raster are: {', '.join(raster.bands.keys())}." + ) + + def check_constants(self, constants: Dict[str, Any]) -> None: + unsupported_constants = [] + for k, v in constants.items(): + if v is None or not isinstance(v, (int, float)): + unsupported_constants.append(k) + + if unsupported_constants: + raise ValueError( + f"Index {self.name} still not supported. " + "Spyndex does not define a default int or float value " + f"for constants {unsupported_constants}." + ) + + def __call__(self): + def index_callback(raster: Raster) -> Dict[str, Raster]: + output_dir = self.tmp_dir.name + + # compute index using spyndex + if self.name in spyndex.indices: + bands_spyndex = list(set(spyndex.indices[self.name].bands) - set(spyndex.constants)) + # TODO allow user to use different values for the constants + const_spyndex = { + i: spyndex.constants[i].default + for i in set(spyndex.indices[self.name].bands).intersection( + set(spyndex.constants) + ) + } + self.check_constants(const_spyndex) + self.check_raster_bands(raster, bands_spyndex) + raster_da = load_raster( + raster, bands=cast(List[str], bands_spyndex), use_geometry=True + ) + # Convert to reflectance values, add minimum value to avoid division by zero + raster_da = (raster_da.astype(np.float32) * raster.scale + raster.offset).clip( + min=1e-6 + ) + params = {j: raster_da[i] for i, j in enumerate(bands_spyndex)} + params.update(const_spyndex) + idx = spyndex.computeIndex(index=self.name, params=params) + index_raster = save_raster_from_ref(idx, output_dir, raster) + index_raster.bands = {self.name: 0} + else: + self.check_raster_bands(raster, self.custom_index_bands[self.name]) + index_raster = compute_index( + raster, + self.custom_index_bands[self.name], + self.index_fn, + self.name, + output_dir, + ) + + vis_dict = {"bands": [0], **self.index_vis[self.name]} + index_raster.assets.append(json_to_asset(vis_dict, output_dir)) + + return {"index": index_raster} + + return index_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_index/test_index.py b/ops/compute_index/test_index.py new file mode 100644 index 00000000..114daa98 --- /dev/null +++ b/ops/compute_index/test_index.py @@ -0,0 +1,156 @@ +import os +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import List, Tuple, cast + +import numpy as np +import pytest +import rioxarray as rio +import spyndex +import xarray as xr +from index import compute_methane, compute_ndre, compute_reci +from shapely import geometry as shpg + +from vibe_core.data import Raster +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.raster import save_raster_to_asset + +YAML_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "compute_index.yaml") + + +# code originally on index.py. now we are using spyndex +def compute_ndvi(bands: xr.DataArray) -> xr.DataArray: + red, nir = bands + ndvi: xr.DataArray = (nir - red) / (nir + red) + ndvi.rio.write_nodata(100, encoded=True, inplace=True) + return ndvi + + +# code originally on index.py. now we are using spyndex +def compute_evi(bands: xr.DataArray) -> xr.DataArray: + blue, red, nir = bands + evi: xr.DataArray = 2.5 * (nir - red) / (nir + 6 * red - 7.5 * blue + 1) + evi.rio.write_nodata(100, encoded=True, inplace=True) + return evi + + +# code originally on index.py. now we are using spyndex +def compute_msavi(bands: xr.DataArray) -> xr.DataArray: + """ + Modified Soil Adjusted Vegetation Index. + This is technically MSAVI_2 which is frequently used as MSAVI + """ + red, nir = bands + disc = (2 * nir + 1) ** 2 - 8 * (nir - red) + msavi: xr.DataArray = (2 * nir + 1 - disc**0.5) / 2.0 + msavi.rio.write_nodata(100, encoded=True, inplace=True) + return msavi + + +# code originally on index.py. now we are using spyndex +def compute_ndmi(bands: xr.DataArray) -> xr.DataArray: + """ + Normalized Difference Moisture Index + """ + nir, swir16 = bands + ndmi: xr.DataArray = (nir - swir16) / (nir + swir16) + ndmi.rio.write_nodata(100, encoded=True, inplace=True) + return ndmi + + +def compute_ndwi(bands: xr.DataArray) -> xr.DataArray: + g, n = bands + return spyndex.indices.NDWI.compute(G=g, N=n) + + +def compute_lswi(bands: xr.DataArray) -> xr.DataArray: + n, s1 = bands + return spyndex.indices.LSWI.compute(N=n, S1=s1) + + +def compute_nbr(bands: xr.DataArray) -> xr.DataArray: + n, s2 = bands + return spyndex.indices.NBR.compute(N=n, S2=s2) + + +true_index_fn = { + "ndvi": compute_ndvi, + "evi": compute_evi, + "msavi": compute_msavi, + "ndmi": compute_ndmi, + "ndwi": compute_ndwi, + "methane": compute_methane, + "ndre": compute_ndre, + "reci": compute_reci, + "LSWI": compute_lswi, + "NBR": compute_nbr, +} + + +def create_fake_raster( + tmp_dir_name: str, bands: List[str], y: int, x: int +) -> Tuple[Raster, xr.DataArray]: + nbands = len(bands) + fake_data = np.random.random((nbands, y, x)).astype(np.float32) + fake_da = xr.DataArray( + fake_data, + coords={"bands": np.arange(nbands), "x": np.linspace(0, 1, x), "y": np.linspace(0, 1, y)}, + dims=["bands", "y", "x"], + ) + fake_da.rio.write_crs("epsg:4326", inplace=True) + + asset = save_raster_to_asset(fake_da, tmp_dir_name) + + return ( + Raster( + id="fake_id", + time_range=(datetime(2023, 1, 1), datetime(2023, 1, 1)), + geometry=shpg.mapping(shpg.box(*fake_da.rio.bounds())), + assets=[asset], + bands={j: i for i, j in enumerate(bands)}, + ), + fake_da, + ) + + +@pytest.fixture +def tmp_dir(): + _tmp_dir = TemporaryDirectory() + yield _tmp_dir.name + _tmp_dir.cleanup() + + +@pytest.mark.parametrize( + "bands, index, should_fail", + [ + (["R", "N"], "ndvi", False), + (["B", "R", "N"], "evi", False), + (["R", "N"], "msavi", False), + (["N", "S1"], "ndmi", False), + (["RE1", "N"], "ndre", False), + (["RE1", "N"], "reci", False), + ( + ["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B09", "B12"], + "methane", + False, + ), + (["G", "N"], "ndwi", False), + (["N"], "LSWI", True), + (["N", "S1"], "LSWI", False), + (["N", "S2"], "NBR", False), + ], +) +def test_op(bands: List[str], index: str, should_fail: bool, tmp_dir: str): + raster, da = create_fake_raster(tmp_dir, bands, 20, 20) + op_tester = OpTester(YAML_PATH) + parameters = {"index": index} + op_tester.update_parameters(parameters) + try: + output = cast(Raster, op_tester.run(raster=raster)["index"]) + except ValueError as e: + if not should_fail: + raise ValueError(f"this should not have failed. {e}") from e + return + output_array = rio.open_rasterio(output.raster_asset.path_or_url).values # type: ignore + true_array = true_index_fn[index](da).values + assert np.all(np.isclose(output_array, true_array)) # type: ignore diff --git a/ops/compute_irrigation_probability/compute_irrigation_probability.py b/ops/compute_irrigation_probability/compute_irrigation_probability.py new file mode 100644 index 00000000..b13fb3e2 --- /dev/null +++ b/ops/compute_irrigation_probability/compute_irrigation_probability.py @@ -0,0 +1,113 @@ +import os +from tempfile import TemporaryDirectory +from typing import Dict + +import numpy as np +import xarray as xr +from sklearn.linear_model import LogisticRegression +from sklearn.preprocessing import StandardScaler + +from vibe_core.data import AssetVibe, LandsatRaster, Raster, gen_guid +from vibe_lib.raster import load_raster_match + + +# Define a function for ngi, egi, and lst data treatment +def preprocess_raster_values(raster: xr.DataArray): + raster_values = raster.values.ravel() + + # Handle NaN and Inf values + raster_values[np.isnan(raster_values)] = -9999 + raster_values[np.isinf(raster_values)] = -9999 + + # Replace -9999 with 0 + raster_values = np.where(raster_values == -9999, 0, raster_values) + + return raster_values + + +class CallbackBuilder: + def __init__(self, coef_ngi: float, coef_egi: float, coef_lst: float, intercept: float): + # Create temporary directory to store our new data, which will be transfered to our storage + # automatically when the op is run in a workflow + self.tmp_dir = TemporaryDirectory() + + # Set Parameters + self.coef_ngi = coef_ngi + self.coef_egi = coef_egi + self.coef_lst = coef_lst + self.intercept = intercept + + def __call__(self): + def callback( + landsat_raster: LandsatRaster, + ngi: Raster, + egi: Raster, + lst: Raster, + cloud_water_mask_raster: Raster, + ) -> Dict[str, Raster]: + # Get cloud water mask layer + cloud_water_mask = load_raster_match(cloud_water_mask_raster, landsat_raster)[0] + + # Get ngi, egi, and lst layers + ngi1 = load_raster_match(ngi, landsat_raster)[0] + egi1 = load_raster_match(egi, landsat_raster)[0] + lst1 = load_raster_match(lst, landsat_raster)[0] + + ngi_values = preprocess_raster_values(ngi1) + egi_values = preprocess_raster_values(egi1) + lst_values = preprocess_raster_values(lst1) + + # Reduce dimension + x = np.stack((ngi_values, egi_values, lst_values), axis=1) + x = x.astype(float) + + # Apply scaler + scaler = StandardScaler() + x_scaled = scaler.fit_transform(x) + + # Create a logistic regression model + model = LogisticRegression() + + # Set the coefficients and intercept + coef_ = np.array([[self.coef_ngi, self.coef_ngi, self.coef_lst]]) + intercept_ = [self.intercept] + classes_ = np.array(["1", "2"]) + + # Assign the coefficients and intercept to the model + model.coef_ = coef_ + model.intercept_ = intercept_ + model.classes_ = classes_ + + # Make predictions using the model + predicted_labels = model.predict_proba(x_scaled)[:, 0] + + # Assign shape + predicted_labels = predicted_labels.reshape(cloud_water_mask.shape) + + # Treat the result with cloud water mask + predicted_labels = predicted_labels * cloud_water_mask + + # Create a new DataArray with predicted_labels and the same dimensions as ngi + predicted_labels_xr = xr.DataArray( + predicted_labels, + dims=cloud_water_mask.dims, + coords=cloud_water_mask.coords, + ) + + # Save the DataArray to a raster file + filepath = os.path.join(self.tmp_dir.name, "irrigation_probability.tif") + predicted_labels_xr.rio.to_raster(filepath) + irr_prob_asset = AssetVibe(reference=filepath, type="image/tiff", id=gen_guid()) + return { + "irrigation_probability": Raster.clone_from( + landsat_raster, + id=gen_guid(), + assets=[irr_prob_asset], + bands={"irrigation_probability": 0}, + ) + } + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_irrigation_probability/compute_irrigation_probability.yaml b/ops/compute_irrigation_probability/compute_irrigation_probability.yaml new file mode 100644 index 00000000..6857f14b --- /dev/null +++ b/ops/compute_irrigation_probability/compute_irrigation_probability.yaml @@ -0,0 +1,26 @@ +name: compute_irrigation_probability +inputs: + landsat_raster: LandsatRaster + cloud_water_mask_raster: Raster + ngi: Raster + egi: Raster + lst: Raster +output: + irrigation_probability: Raster +parameters: + coef_ngi: -0.50604148 + coef_egi: -0.93103156 + coef_lst: -0.14612046 + intercept: 1.99036986 +entrypoint: + file: compute_irrigation_probability.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - coef_ngi + - coef_egi + - coef_lst + - intercept +description: + short_description: Computes irrigation probability values for each pixel in raster using optimized logistic regression + model with ngi, egi, and lst rasters as input diff --git a/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.py b/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.py new file mode 100644 index 00000000..c2a529d6 --- /dev/null +++ b/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.py @@ -0,0 +1,102 @@ +import os +from tempfile import TemporaryDirectory +from typing import Dict + +import rioxarray as rio + +from vibe_core.data import AssetVibe, LandsatRaster, Raster, gen_guid +from vibe_lib.raster import load_raster, load_raster_match + +# Scale and Offset Constants of LST and Rest of the Landsat Bands +SCALE_LST = 0.00341802 +OFFSET_LST = 149 +SCALE_BAND = 0.0000275 +OFFSET_BAND = 0.2 + + +class CallbackBuilder: + def __init__(self): + # Create temporary directory to store our new data, which will be transfered to our storage + # automatically when the op is run in a workflow + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def callback( + landsat_raster: LandsatRaster, + ndvi_raster: Raster, + evaporative_fraction: Raster, + cloud_water_mask_raster: Raster, + ) -> Dict[str, Raster]: + # LAYERS PREPARATION + lst = rio.open_rasterio(landsat_raster.raster_asset.path_or_url)[ + landsat_raster.bands["lwir11"] + ] + + # Apply scale and offset value to the band lst band + lst = load_raster(landsat_raster, bands=["lwir11"])[0] + lst = (lst * SCALE_LST) + OFFSET_LST + + # Apply scale and offset value to the band lst band + green = rio.open_rasterio(landsat_raster.raster_asset.path_or_url)[ + landsat_raster.bands["green"] + ] + green = (green * SCALE_BAND) - OFFSET_BAND + + # Apply scale and offset value to the band lst band + nir = rio.open_rasterio(landsat_raster.raster_asset.path_or_url)[ + landsat_raster.bands["nir"] + ] + nir = (nir * SCALE_BAND) - OFFSET_BAND + + # Get ndvi index + ndvi = load_raster_match(ndvi_raster, landsat_raster)[0] + + # Get evaporative fraction raster + evap_fraxn = load_raster_match(evaporative_fraction, landsat_raster)[0] + + # Get cloud water mask raster + cloud_water_mask = load_raster_match(cloud_water_mask_raster, landsat_raster)[0] + + # Calculate Green Index + gi = nir / green + + # Calculate ngi layer from Green Index and ndvi index + ngi = ndvi * gi + + # Calculate egi layer from Green Index and evaporative fraction layer + egi = evap_fraxn / gi + + # Apply cloud water mask to ngi, egi, and lst layers + ngi = ngi * cloud_water_mask + egi = egi * cloud_water_mask + lst = lst * cloud_water_mask + + # Save the DataArray to a raster file + filepath = os.path.join(self.tmp_dir.name, "ngi.tif") + ngi.rio.to_raster(filepath) + ngi_asset = AssetVibe(reference=filepath, type="image/tiff", id=gen_guid()) + + filepath1 = os.path.join(self.tmp_dir.name, "egi.tif") + egi.rio.to_raster(filepath1) + egi_asset = AssetVibe(reference=filepath1, type="image/tiff", id=gen_guid()) + + filepath2 = os.path.join(self.tmp_dir.name, "lst.tif") + lst.rio.to_raster(filepath2) + lst_asset = AssetVibe(reference=filepath2, type="image/tiff", id=gen_guid()) + + return { + "ngi": Raster.clone_from( + landsat_raster, id=gen_guid(), assets=[ngi_asset], bands={"ngi": 0} + ), + "egi": Raster.clone_from( + landsat_raster, id=gen_guid(), assets=[egi_asset], bands={"egi": 0} + ), + "lst": Raster.clone_from( + landsat_raster, id=gen_guid(), assets=[lst_asset], bands={"lst": 0} + ), + } + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.yaml b/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.yaml new file mode 100644 index 00000000..40b6631f --- /dev/null +++ b/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.yaml @@ -0,0 +1,17 @@ +name: compute_ngi_egi_layers +inputs: + landsat_raster: LandsatRaster + ndvi_raster: Raster + cloud_water_mask_raster: Raster + evaporative_fraction: Raster +output: + ngi: Raster + egi: Raster + lst: Raster +entrypoint: + file: compute_ngi_egi_layers.py + callback_builder: CallbackBuilder +dependencies: +parameters: +description: + short_description: Computes NGI, EGI, and LST layers from landsat bands, ndvi layer, cloud water mask layer and evaporative fraction layer diff --git a/ops/compute_onnx/compute_onnx.py b/ops/compute_onnx/compute_onnx.py new file mode 100644 index 00000000..e4294a47 --- /dev/null +++ b/ops/compute_onnx/compute_onnx.py @@ -0,0 +1,105 @@ +import os +from tempfile import TemporaryDirectory +from typing import Dict, List, Optional, Union + +import onnxruntime as ort +from rasterio.enums import Resampling + +from vibe_core.data import AssetVibe, Raster, gen_guid +from vibe_core.data.rasters import RasterChunk, RasterSequence +from vibe_lib.raster import resample_raster +from vibe_lib.spaceeye.chip import Dims, StackOnChannelsChipDataset, get_loader, predict_chips + +ROOT_DIR = "/mnt/onnx_resources/" + + +class CallbackBuilder: + def __init__( + self, + model_file: str, + window_size: int, + overlap: float, + batch_size: int, + num_workers: int, + nodata: Union[float, int], + skip_nodata: bool, + resampling: str = "bilinear", + root_dir: str = ROOT_DIR, + downsampling: int = 1, + ): + self.tmp_dir = TemporaryDirectory() + self.downsampling = downsampling + if model_file is None or not os.path.exists(os.path.join(root_dir, model_file)): + raise ValueError(f"Model file '{model_file}' does not exist.") + self.root_dir = root_dir + self.model_file = model_file + self.window_size = window_size + self.overlap = overlap + self.batch_size = batch_size + self.num_workers = num_workers + self.nodata = nodata + self.skip_nodata = skip_nodata + self.resampling = Resampling[resampling] + + def __call__(self): + def compute_onnx( + input_raster: Union[Raster, RasterSequence, List[Raster]], + chunk: Optional[RasterChunk] = None, + ) -> Dict[str, Union[Raster, RasterChunk]]: + if self.downsampling < 1: + raise ValueError( + f"Downsampling must be equal or larger than 1, found {self.downsampling}" + ) + + if isinstance(input_raster, RasterSequence): + input = [ + Raster.clone_from(input_raster, gen_guid(), assets=[i]) + for i in input_raster.get_ordered_assets() + ] + elif isinstance(input_raster, list): + input = input_raster + else: + input = [input_raster] + + model_path = os.path.join(self.root_dir, self.model_file) + model = ort.InferenceSession(model_path) + chip_size = self.window_size + step_size = int(chip_size * (1 - self.overlap)) + dataset = StackOnChannelsChipDataset( + [[i] for i in input], + chip_size=Dims(chip_size, chip_size, 1), + step_size=Dims(step_size, step_size, 1), + downsampling=self.downsampling, + nodata=self.nodata, + geometry_or_chunk=chunk, + ) + + dataloader = get_loader(dataset, self.batch_size, self.num_workers) + pred_filepaths = predict_chips( + model, dataloader, self.tmp_dir.name, skip_nodata=self.skip_nodata + ) + assert ( + len(pred_filepaths) == 1 + ), f"Expected one prediction file, found: {len(pred_filepaths)}" + pred_filepath = resample_raster( + pred_filepaths[0], + self.tmp_dir.name, + dataset.width, + dataset.height, + dataset.transform, + self.resampling, + ) + asset = AssetVibe(reference=pred_filepath, type="image/tiff", id=gen_guid()) + if chunk is None: + res = Raster.clone_from(input[0], id=gen_guid(), assets=[asset]) + else: + res = RasterChunk.clone_from( + chunk, id=gen_guid(), geometry=chunk.geometry, assets=[asset] + ) + + return {"output_raster": res} + + return compute_onnx + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_onnx/compute_onnx.yaml b/ops/compute_onnx/compute_onnx.yaml new file mode 100644 index 00000000..fe5a792f --- /dev/null +++ b/ops/compute_onnx/compute_onnx.yaml @@ -0,0 +1,26 @@ +name: compute_onnx +inputs: + input_raster: Raster +output: + output_raster: Raster +parameters: + downsampling: 1 + model_file: + window_size: 512 + overlap: .25 + batch_size: 1 + num_workers: 0 + nodata: 100 + skip_nodata: true + resampling: bilinear +entrypoint: + file: compute_onnx.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - model_file + - downsampling + - window_size + - overlap + - resampling + - skip_nodata diff --git a/ops/compute_onnx/compute_onnx_from_chunks.yaml b/ops/compute_onnx/compute_onnx_from_chunks.yaml new file mode 100644 index 00000000..db19a2d7 --- /dev/null +++ b/ops/compute_onnx/compute_onnx_from_chunks.yaml @@ -0,0 +1,25 @@ +name: compute_onnx_from_chunks +inputs: + input_raster: RasterSequence + chunk: RasterChunk +output: + output_raster: RasterChunk +parameters: + model_file: + window_size: 512 + overlap: .0 + batch_size: 1 + num_workers: 0 + nodata: 100 + skip_nodata: True +entrypoint: + file: compute_onnx.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - model_file + - window_size + - overlap + - skip_nodata +description: + short_description: Runs the onnx model across chunks of the input rasters. \ No newline at end of file diff --git a/ops/compute_onnx/compute_onnx_from_sequence.yaml b/ops/compute_onnx/compute_onnx_from_sequence.yaml new file mode 100644 index 00000000..fd4cd194 --- /dev/null +++ b/ops/compute_onnx/compute_onnx_from_sequence.yaml @@ -0,0 +1,28 @@ +name: compute_onnx_from_sequence +inputs: + input_raster: RasterSequence +output: + output_raster: Raster +parameters: + downsampling: 1 + model_file: + window_size: 512 + overlap: .0 + batch_size: 1 + num_workers: 0 + nodata: 100 + skip_nodata: True + resampling: bilinear +entrypoint: + file: compute_onnx.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - model_file + - downsampling + - window_size + - overlap + - resampling + - skip_nodata +description: + short_description: Processes a sequence of rasters with an ONNX model. \ No newline at end of file diff --git a/ops/compute_onnx/test_compute_onnx.py b/ops/compute_onnx/test_compute_onnx.py new file mode 100644 index 00000000..eeb173b9 --- /dev/null +++ b/ops/compute_onnx/test_compute_onnx.py @@ -0,0 +1,154 @@ +import mimetypes +import os +from datetime import datetime, timedelta +from tempfile import TemporaryDirectory +from typing import List, Tuple, Union, cast + +import numpy as np +import pytest +import rioxarray +import torch +import xarray as xr +from numpy.typing import NDArray +from shapely import geometry as shpg +from torch import nn +from torch.nn.parameter import Parameter + +from vibe_core.data import AssetVibe, Raster +from vibe_core.data.core_types import gen_guid +from vibe_core.data.rasters import RasterSequence +from vibe_dev.testing.op_tester import OpTester + +YAML_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "compute_onnx.yaml") +YAML_FLIST_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "compute_onnx_from_sequence.yaml" +) +PY_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "compute_onnx.py") + + +class IdentityNetwork(nn.Module): + def __init__(self, channels: int): + super(IdentityNetwork, self).__init__() + self.c1 = nn.Conv2d(in_channels=channels, out_channels=channels, kernel_size=1, bias=False) + eye = np.eye(channels).reshape((channels, channels, 1, 1)).astype(np.float32) + self.c1.weight = Parameter(torch.from_numpy(eye)) + + def forward(self, x: torch.Tensor): + return self.c1(x) + + +class DummyCloud(nn.Module): + def __init__(self, channels: int, kernel_size: int = 3): + super(DummyCloud, self).__init__() + self.c1 = nn.Conv2d( + in_channels=channels, + out_channels=1, + kernel_size=kernel_size, + padding=(kernel_size - 1) // 2, + bias=False, + ) + w = np.ones((1, channels, kernel_size, kernel_size)).astype(np.float32) + self.c1.weight = Parameter(torch.from_numpy(w)) + self.p = nn.Sigmoid() + + def forward(self, x: torch.Tensor): + return self.p(self.c1(x)) + + +def create_onnx_model(nn: nn.Module, tmp_dir_name: str, channels: int) -> str: + dims = (1, channels, 3, 3) # any value for batch size, y, x should work here + data = np.random.random(dims).astype(np.float32) + + name = f"{nn.__class__.__name__}.onnx" + + torch.onnx.export( + nn, + torch.Tensor(data), + os.path.join(tmp_dir_name, name), + input_names=["in"], + output_names=["out"], + dynamic_axes={"in": {0: "batch", 2: "y", 3: "x"}, "out": {0: "batch", 2: "y", 3: "x"}}, + ) + + return name + + +def create_fake_raster( + tmp_dir_name: str, bands: int, y: int, x: int, delta: int = 0 +) -> Tuple[Raster, NDArray[np.float32]]: + fake_data = np.random.random((bands, y, x)).astype(np.float32) + fake_da = xr.DataArray( + fake_data, + coords={"bands": np.arange(bands), "x": np.linspace(0, 1, x), "y": np.linspace(0, 1, y)}, + dims=["bands", "y", "x"], + ) + path = os.path.join(tmp_dir_name, f"{gen_guid()}.tif") + fake_da.rio.to_raster(path) + + asset = AssetVibe( + reference=path, + type=mimetypes.types_map[".tif"], + id="fake_asset", + ) + + d = datetime(2022, 1, 1) + timedelta(days=delta) + + return ( + Raster( + id="fake_id", + time_range=(d, d), + geometry=shpg.mapping(shpg.box(*fake_da.rio.bounds())), + assets=[asset], + bands={str(i): i for i in range(bands)}, + ), + fake_data, + ) + + +@pytest.fixture +def tmp_dir(): + _tmp_dir = TemporaryDirectory() + yield _tmp_dir.name + _tmp_dir.cleanup() + + +@pytest.mark.parametrize( + "bands, y, x", + [ + ([3, 2, 1], 512, 512), + ([2, 2, 2], 1024, 1024), + ([1], 514, 513), + (3, 512, 512), + (2, 1024, 1024), + ], +) +def test_op(bands: Union[int, List[int]], y: int, x: int, tmp_dir: str): + model_class_list = [IdentityNetwork, DummyCloud] + channels = np.sum(bands).astype(int) + + model_list = [m(channels) for m in model_class_list] + onnx_list = [create_onnx_model(m, tmp_dir, channels) for m in model_list] + if isinstance(bands, list): + yaml = YAML_FLIST_PATH + rasters = [] + arrays = [] + for i, n in enumerate(bands): + raster, array = create_fake_raster(tmp_dir, n, y, x, delta=i) + rasters.append(raster) + arrays.append(array) + raster = RasterSequence.clone_from(rasters[0], gen_guid(), []) + for r in rasters: + raster.add_item(r) + array = np.concatenate(arrays, axis=0) + else: + yaml = YAML_PATH + raster, array = create_fake_raster(tmp_dir, bands, y, x) + + op_tester = OpTester(yaml) + for model, onnx in zip(model_list, onnx_list): + parameters = {"root_dir": tmp_dir, "model_file": onnx, "overlap": 0.1} + op_tester.update_parameters(parameters) + output_data = cast(Raster, op_tester.run(input_raster=raster)["output_raster"]) + output_array = rioxarray.open_rasterio(output_data.raster_asset.path_or_url).values # type: ignore + true_array = model.forward(torch.from_numpy(array)).detach().numpy() + assert np.all(np.isclose(output_array, true_array)) # type: ignore diff --git a/ops/compute_onnx/test_compute_onnx_chunk.py b/ops/compute_onnx/test_compute_onnx_chunk.py new file mode 100644 index 00000000..8ec5922b --- /dev/null +++ b/ops/compute_onnx/test_compute_onnx_chunk.py @@ -0,0 +1,155 @@ +import os +from datetime import datetime, timedelta +from pathlib import Path +from typing import List, Tuple, cast + +import numpy as np +import pytest +import rioxarray +import torch +import xarray as xr +from numpy.typing import NDArray +from shapely import geometry as shpg +from torch import nn + +from vibe_core.data import DataVibe, Raster, RasterChunk, RasterSequence +from vibe_core.data.core_types import gen_guid +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.raster import save_raster_to_asset + +N_SAMPLES = 100 +STEP_Y = 3 +STEP_X = 3 +WINDOW_SIZE = 3 + +HERE = os.path.dirname(os.path.abspath(__file__)) +CHUNK_RASTER_YAML = os.path.join(HERE, "..", "chunk_raster", "chunk_raster.yaml") +LIST_TO_SEQ_YAML = os.path.join(HERE, "..", "list_to_sequence", "list_to_sequence.yaml") +COMPUTE_ONNX_YAML = os.path.join(HERE, "compute_onnx_from_chunks.yaml") +COMBINE_CHUNKS_YAML = os.path.join(HERE, "..", "combine_chunks", "combine_chunks.yaml") + + +class TestModel(nn.Module): + __test__ = False + + def __init__(self, n: int): + super(TestModel, self).__init__() + self.n = n + A = np.stack((np.arange(n), np.ones(n))).T + self.A = torch.from_numpy(A) + self.ATAinv = torch.from_numpy(np.linalg.inv(A.T @ A)) + + def forward(self, x: torch.Tensor): + x = torch.squeeze(x) + B = torch.reshape(x, (self.n, -1)) + ATB = torch.sum(self.A.reshape(self.n, 2, 1) * B.reshape(self.n, 1, -1), dim=0) + beta_hat = (self.ATAinv @ ATB)[0, :] + alpha = (self.ATAinv @ ATB)[1, :] + return torch.stack((beta_hat.reshape(x.shape[1:]), alpha.reshape(x.shape[1:])))[ + None, :, :, : + ] + + +def create_list_fake_raster( + tmp_dir_name: str, t: int, y: int, x: int +) -> Tuple[List[Raster], NDArray[np.float32]]: + def fake_cube(sx: int, sy: int, sz: int): + res = [] + for i in range(sy * sx): + h = i / (sy * sx - 1) + res.append(np.linspace(0, h * (sz - 1), sz)) + + res = np.stack(res) + return res.reshape((sy, sx, -1)).transpose((2, 0, 1)) + + sf = fake_cube(x, y, t) + res = [] + for i in range(t): + fake_da = xr.DataArray( + sf[i : i + 1, :, :], + coords={"bands": [0], "x": np.linspace(0, 1, x), "y": np.linspace(0, 1, y)}, + dims=["bands", "y", "x"], + ) + asset = save_raster_to_asset(fake_da, tmp_dir_name) + + d = datetime(2022, 1, 1) + timedelta(days=i) + res.append( + Raster( + id=gen_guid(), + time_range=(d, d), + geometry=shpg.mapping(shpg.box(*fake_da.rio.bounds())), + bands={"band": 0}, + assets=[asset], + ) + ) + + return res, sf + + +@pytest.mark.parametrize("y, x", [(6, 6), (3, 3), (6, 3), (3, 6), (8, 3), (8, 8), (10, 12)]) +def test_op(y: int, x: int, tmp_path: Path): + raster_list, input_model = create_list_fake_raster(str(tmp_path.absolute()), N_SAMPLES, y, x) + model = TestModel(N_SAMPLES) + model_path = os.path.join(str(tmp_path.absolute()), "model.onnx") + dummy = np.random.random((1, N_SAMPLES, STEP_Y, STEP_X)).astype(np.float32) + torch.onnx.export( + model, + torch.from_numpy(dummy), + model_path, + input_names=["in"], + output_names=["out"], + dynamic_axes={"in": {0: "batch", 2: "y", 3: "x"}, "out": {0: "batch", 2: "y", 3: "x"}}, + ) + + chunk_raster_op = OpTester(CHUNK_RASTER_YAML) + chunk_raster_op.update_parameters({"step_y": STEP_Y, "step_x": STEP_X}) + chunked_rasters = cast( + List[RasterChunk], + # pyright misidentifies types here + chunk_raster_op.run(rasters=cast(List[DataVibe], raster_list))[ # type: ignore + "chunk_series" + ], + ) + + list_to_raster_op = OpTester(LIST_TO_SEQ_YAML) + raster_seq = cast( + RasterSequence, + # pyright misidentifies types here + list_to_raster_op.run(list_rasters=cast(List[DataVibe], raster_list))[ # type: ignore + "rasters_seq" + ], + ) + + out_chunks = [] + ops = [] + for chunk in chunked_rasters: + compute_onnx_op = OpTester(COMPUTE_ONNX_YAML) + compute_onnx_op.update_parameters( + { + "root_dir": HERE, + "model_file": model_path, + "window_size": WINDOW_SIZE, + "downsampling": 1, + "overlap": 0, + } + ) + ops.append(compute_onnx_op) + out_chunks.append( + cast( + RasterChunk, + compute_onnx_op.run(input_raster=cast(DataVibe, raster_seq), chunk=chunk)[ + "output_raster" + ], + ) + ) + + combine_chunks_op = OpTester(COMBINE_CHUNKS_YAML) + output_data = cast(Raster, combine_chunks_op.run(chunks=out_chunks)["raster"]) + output_array = np.squeeze( + rioxarray.open_rasterio(output_data.raster_asset.path_or_url).values # type: ignore + ) + + pred_torch = model.forward(torch.from_numpy(input_model[None, :, :, :].astype(np.float32))) + pred = np.squeeze(pred_torch.detach().numpy()) + + assert np.all(np.isclose(output_array, pred)) diff --git a/ops/compute_pixel_count/compute_pixel_count.py b/ops/compute_pixel_count/compute_pixel_count.py new file mode 100644 index 00000000..110fbe79 --- /dev/null +++ b/ops/compute_pixel_count/compute_pixel_count.py @@ -0,0 +1,69 @@ +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict + +import numpy as np +import rasterio +from numpy._typing import NDArray +from rasterio.mask import mask +from shapely import geometry as shpg + +from vibe_core.data import Raster, RasterPixelCount, gen_guid +from vibe_core.data.core_types import AssetVibe, BaseGeometry + +UNIQUE_VALUES_COLUMN = "unique_values" +COUNTS_COLUMN = "counts" + + +def read_data(raster: Raster, geom: BaseGeometry) -> NDArray[Any]: + with rasterio.open(raster.raster_asset.path_or_url) as src: + raw_data, _ = mask( + src, + [geom], + crop=True, + filled=False, + ) + + # We are counting the number of pixels + # for all the raster bands + return raw_data.compressed() # type: ignore + + +def calculate_unique_values(data: NDArray[Any]) -> NDArray[Any]: + unique_values, counts = np.unique(data, return_counts=True) + return np.column_stack((unique_values, counts)) + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def callback(raster: Raster) -> Dict[str, RasterPixelCount]: + data = read_data(raster, shpg.shape(raster.geometry)) + stack_data = calculate_unique_values(data) + guid = gen_guid() + filepath = os.path.join(self.tmp_dir.name, f"{guid}.csv") + + # Save the data to a CSV file + np.savetxt( + filepath, + stack_data, + delimiter=",", + fmt="%d", + header=f"{UNIQUE_VALUES_COLUMN},{COUNTS_COLUMN}", + comments="", + ) + + raster_pixel_count = RasterPixelCount.clone_from( + raster, + id="pixel_count_" + raster.id, + assets=[AssetVibe(reference=filepath, type="text/csv", id=guid)], + ) + + return {"pixel_count": raster_pixel_count} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_pixel_count/compute_pixel_count.yaml b/ops/compute_pixel_count/compute_pixel_count.yaml new file mode 100644 index 00000000..82b290b3 --- /dev/null +++ b/ops/compute_pixel_count/compute_pixel_count.yaml @@ -0,0 +1,18 @@ +name: compute_pixels_count +inputs: + raster: Raster +output: + pixel_count: RasterPixelCount +parameters: +entrypoint: + file: compute_pixel_count.py + callback_builder: CallbackBuilder +description: + short_description: Counts the pixel values in the input raster. + long_description: + Receives a raster and returns a RasterPixelCount which + stores an asset with the count of pixel values in the raster. + sources: + raster: Input raster. + sinks: + pixel_count: Counts of pixel values. diff --git a/ops/compute_pixel_count/test_compute_pixel_count.py b/ops/compute_pixel_count/test_compute_pixel_count.py new file mode 100644 index 00000000..138a00c0 --- /dev/null +++ b/ops/compute_pixel_count/test_compute_pixel_count.py @@ -0,0 +1,73 @@ +import os +from datetime import datetime +from typing import cast + +import numpy as np +import pandas as pd +import pytest +import shapely.geometry as shpg +import xarray as xr +from compute_pixel_count import COUNTS_COLUMN, UNIQUE_VALUES_COLUMN + +from vibe_core.data import Raster, RasterPixelCount +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.raster import save_raster_to_asset + +NBANDS = 3 +FAKE_RASTER_DATA = np.array([[0, 1, 2], [0, 1, 2], [0, 1, 2]]).astype(np.float32) +CONFIG_PATH = os.path.join(os.path.dirname(__file__), "compute_pixel_count.yaml") + +EXPECTED_UNIQUE_VALUES = [0, 1, 2] +# We are using 3 bands, so we expect 9 counts for each unique value +EXPECTED_COUNTS = [9, 9, 9] + + +@pytest.fixture +def fake_raster(tmpdir: str): + x = 3 + y = 3 + + fake_data = FAKE_RASTER_DATA + fake_data = [fake_data] * NBANDS + + fake_da = xr.DataArray( + fake_data, + coords={"bands": np.arange(NBANDS), "x": np.linspace(0, 1, x), "y": np.linspace(0, 1, y)}, + dims=["bands", "y", "x"], + ) + + fake_da.rio.write_crs("epsg:4326", inplace=True) + asset = save_raster_to_asset(fake_da, tmpdir) + + return Raster( + id="fake_id", + time_range=(datetime(2023, 1, 1), datetime(2023, 1, 1)), + geometry=shpg.mapping(shpg.box(*fake_da.rio.bounds())), + assets=[asset], + bands={j: i for i, j in enumerate(["B1", "B2", "B3"])}, + ) + + +def test_compute_pixel_count(fake_raster: Raster): + op = OpTester(CONFIG_PATH) + + output = op.run(raster=fake_raster) + assert output + assert "pixel_count" in output + + pixel_count = cast(RasterPixelCount, output["pixel_count"]) + assert len(pixel_count.assets) == 1 + + asset_path = pixel_count.assets[0].path_or_url + assert os.path.exists(asset_path) + + # Read the CSV file + df = pd.read_csv(asset_path) + + # Check the columns + assert UNIQUE_VALUES_COLUMN in df.columns # type: ignore + assert COUNTS_COLUMN in df.columns # type: ignore + + # Check the values + assert np.array_equal(df[UNIQUE_VALUES_COLUMN].values, EXPECTED_UNIQUE_VALUES) # type: ignore + assert np.array_equal(df[COUNTS_COLUMN].values, EXPECTED_COUNTS) # type: ignore diff --git a/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.py b/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.py new file mode 100644 index 00000000..3ef7c532 --- /dev/null +++ b/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.py @@ -0,0 +1,149 @@ +import logging +from tempfile import TemporaryDirectory +from typing import Any, Dict, List + +import torch +import torch.nn.functional as F +from numpy.typing import NDArray + +from vibe_core.data import Raster, gen_guid +from vibe_lib.raster import ( + RGBA, + interpolated_cmap_from_colors, + json_to_asset, + load_raster, + load_raster_match, + save_raster_to_asset, +) + +CMAP_INTERVALS: List[float] = [0.0, 4000.0] + +CMAP_COLORS: List[RGBA] = [ + RGBA(0, 0, 0, 255), + RGBA(255, 255, 255, 255), +] + +LOGGER = logging.getLogger(__name__) + + +def run_average_elevation( + dem: NDArray[Any], cdl: NDArray[Any], window_size: int = 41 +) -> NDArray[Any]: + kernel = torch.ones((1, 1, window_size, window_size)) + padding = (window_size - 1) // 2 + eps = 1e-9 + + dem_torch = torch.from_numpy(dem).to(kernel) + cdl_torch = torch.from_numpy(cdl).to(kernel) + + # Downscale + downscale = 4 + dem_torch = F.interpolate( + dem_torch.unsqueeze(0), + (dem_torch.shape[1] // downscale, dem_torch.shape[2] // downscale), + mode="bilinear", + ).squeeze(0) + + cdl_torch = F.interpolate( + cdl_torch.unsqueeze(0), + (cdl_torch.shape[1] // downscale, cdl_torch.shape[2] // downscale), + mode="nearest", + ).squeeze(0) + + # DEM z-scores + cdl_elevation = torch.zeros_like(dem_torch).to(kernel) + + mean_elev = F.conv2d( + F.pad( + dem_torch.unsqueeze(0).to(kernel), + (padding, padding, padding, padding), + mode="replicate", + ), + kernel, + bias=None, + stride=1, + padding=0, + ).squeeze(0) / (window_size**2) + + std_elev = F.conv2d( + F.pad( + (dem_torch - mean_elev).unsqueeze(0).to(kernel) ** 2, + (padding, padding, padding, padding), + mode="replicate", + ), + kernel, + bias=None, + stride=1, + padding=0, + ).squeeze(0) / (window_size**2 - 1) + + # Compute Z-scores of per-class means (wrt statistics of the whole window) + z_elevation = (dem_torch - mean_elev) / (std_elev + eps) + + # Compute elevation mean per-class in overlapping windows + unique_cdl_labels = torch.unique(cdl_torch) + for i in unique_cdl_labels: + label_mask = cdl_torch == i + masked_elev = z_elevation * label_mask + elev_sum = F.conv2d( + masked_elev.unsqueeze(0), kernel, bias=None, stride=1, padding=padding + ).squeeze(0) + label_count = F.conv2d( + label_mask.unsqueeze(0).to(kernel), kernel, bias=None, stride=1, padding=padding + ).squeeze(0) + cdl_elevation[label_mask] = elev_sum[label_mask] / label_count[label_mask] + + # Upsample to original resolution + cdl_elevation = F.interpolate( + cdl_elevation.unsqueeze(0), (dem.shape[1], dem.shape[2]), mode="bilinear" + ).squeeze(0) + + return cdl_elevation.numpy() + + +class CallbackBuilder: + def __init__( + self, + window_size: int, + ): + self.tmp_dir = TemporaryDirectory() + self.window_size = window_size + + def __call__(self): + def operator_callback( + input_dem_raster: Raster, input_cluster_raster: Raster + ) -> Dict[str, Raster]: + dem_da = load_raster_match( + input_dem_raster, + match_raster=input_cluster_raster, + ) + cluster_da = load_raster(input_cluster_raster, use_geometry=True) + + average_elevation_da: NDArray[Any] = run_average_elevation( + dem_da.to_numpy(), cluster_da.to_numpy(), self.window_size + ) + + vis_dict: Dict[str, Any] = { + "bands": [0], + "colormap": interpolated_cmap_from_colors(CMAP_COLORS, CMAP_INTERVALS), + "range": (0, 4000), + } + + asset = save_raster_to_asset( + dem_da[:1].copy(data=average_elevation_da), self.tmp_dir.name + ) + out_raster = Raster.clone_from( + src=input_dem_raster, + id=gen_guid(), + assets=[ + asset, + json_to_asset(vis_dict, self.tmp_dir.name), + ], + ) + + return {"output_raster": out_raster} + + return operator_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.yaml b/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.yaml new file mode 100644 index 00000000..8edfb18d --- /dev/null +++ b/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.yaml @@ -0,0 +1,17 @@ +name: compute_raster_class_windowed_average +inputs: + input_dem_raster: Raster + input_cluster_raster: Raster +output: + output_raster: Raster +parameters: + window_size: 41 +dependencies: + parameters: + - window_size +entrypoint: + file: compute_raster_class_windowed_average.py + callback_builder: CallbackBuilder +description: + short_description: + Computes average elevation per-class in overlapping windows, combining cluster and elevation tiles. diff --git a/ops/compute_raster_cluster/compute_raster_cluster.py b/ops/compute_raster_cluster/compute_raster_cluster.py new file mode 100644 index 00000000..9dfbe2ab --- /dev/null +++ b/ops/compute_raster_cluster/compute_raster_cluster.py @@ -0,0 +1,80 @@ +import logging +from tempfile import TemporaryDirectory +from typing import Any, Dict + +from numpy.typing import NDArray + +from vibe_core.data import CategoricalRaster, Raster, gen_guid +from vibe_lib import overlap_clustering +from vibe_lib.raster import get_categorical_cmap, json_to_asset, load_raster, save_raster_to_asset + +INT8_MAX_VALUE = 255 + +LOGGER = logging.getLogger(__name__) + + +class CallbackBuilder: + def __init__( + self, + clustering_method: str, + number_classes: int, + half_side_length: int, + number_iterations: int, + stride: int, + warmup_steps: int, + warmup_half_side_length: int, + window: int, + ): + self.tmp_dir = TemporaryDirectory() + self.clustering_method = clustering_method + self.number_classes = number_classes + self.half_side_length = half_side_length + self.number_iterations = number_iterations + self.stride = stride + self.warmup_steps = warmup_steps + self.warmup_half_side_length = warmup_half_side_length + self.window = window + + def __call__(self): + def operator_callback(input_raster: Raster) -> Dict[str, Raster]: + src_xa = load_raster(input_raster, use_geometry=True) + src_data: NDArray[Any] = src_xa.to_numpy() + + if src_xa.dtype == "uint8": # overlap clustering requires a float numpy array + src_data = src_data / float(INT8_MAX_VALUE) + + p: NDArray[Any] = overlap_clustering.run_clustering( + src_data, + number_classes=self.number_classes, + half_side_length=self.half_side_length, + number_iterations=self.number_iterations, + stride=self.stride, + warmup_steps=self.warmup_steps, + warmup_half_side_length=self.warmup_half_side_length, + window=self.window, + ) + + vis_dict: Dict[str, Any] = { + "bands": [0], + "colormap": get_categorical_cmap("tab10", self.number_classes), + "range": (0, self.number_classes - 1), + } + + out_raster = CategoricalRaster( + id=gen_guid(), + geometry=input_raster.geometry, + time_range=input_raster.time_range, + assets=[ + save_raster_to_asset(src_xa[0].copy(data=p), self.tmp_dir.name), + json_to_asset(vis_dict, self.tmp_dir.name), + ], + bands={"cluster": 0}, + categories=[f"cluster{i}" for i in range(self.number_classes)], + ) + + return {"output_raster": out_raster} + + return operator_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_raster_cluster/compute_raster_cluster.yaml b/ops/compute_raster_cluster/compute_raster_cluster.yaml new file mode 100644 index 00000000..8e6ed4bc --- /dev/null +++ b/ops/compute_raster_cluster/compute_raster_cluster.yaml @@ -0,0 +1,29 @@ +name: compute_raster_cluster +inputs: + input_raster: Raster +output: + output_raster: Raster +parameters: + clustering_method: "overlap_clustering" + number_classes: 4 # we keep this number of clusters low as we want to distinguish between crop and non-crop + half_side_length: 21 # we keep this number of pixels low as we are looking for local differences + number_iterations: 2 # during tests, this number provided a good balance between speed and good results + stride: 8 # instead of calculating the cluster on each pixel, we skip X strides and interpolate the result + warmup_steps: 0 # we keep this parameter zero as we don't want to run a larger cluster at the beginning + warmup_half_side_length: 127 # size of the window for the initial larger clustering process. ignored when warmup_steps = 0 + window: 1024 +dependencies: + parameters: + - clustering_method + - number_classes + - half_side_length + - number_iterations + - stride + - warmup_steps + - warmup_half_side_length + - window +entrypoint: + file: compute_raster_cluster.py + callback_builder: CallbackBuilder +description: + short_description: Computes local clusters using an overlap clustering method. \ No newline at end of file diff --git a/ops/compute_raster_gradient/compute_raster_gradient.py b/ops/compute_raster_gradient/compute_raster_gradient.py new file mode 100644 index 00000000..e6fc94cf --- /dev/null +++ b/ops/compute_raster_gradient/compute_raster_gradient.py @@ -0,0 +1,82 @@ +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict, List + +import numpy as np +import rasterio + +from vibe_core.data import AssetVibe, Raster, gen_guid, gen_hash_id +from vibe_lib.raster import ( + RGBA, + compute_sobel_gradient, + include_raster_overviews, + interpolated_cmap_from_colors, + json_to_asset, +) + +GRADIENT_CMAP_INTERVALS: List[float] = [0.0, 100.0, 200.0] + +GRADIENT_CMAP_COLORS: List[RGBA] = [ + RGBA(255, 237, 160, 255), + RGBA(254, 178, 76, 255), + RGBA(240, 59, 32, 255), +] + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def operator_callback(input_raster: Raster) -> Dict[str, Raster]: + input_band_mapping = input_raster.bands + output_band_mapping = {} + output_bands = [] + uid = gen_guid() + + out_path = os.path.join(self.tmp_dir.name, f"{gen_guid()}.tif") + + # Open the original raster and go through the layers computing the gradient. + with rasterio.open(input_raster.raster_asset.url) as src: + out_meta = src.meta + for band_name in input_band_mapping.keys(): + output_bands.insert( + input_band_mapping[band_name], + compute_sobel_gradient(src.read(input_band_mapping[band_name] + 1)), + ) + + # Create a new raster to save the gradient layers. + with rasterio.open(out_path, "w", **out_meta) as dst: + dst.write(np.stack(output_bands, axis=0)) + + # Update output bands name. + output_band_mapping = {f"{k}_gradient": v for k, v in input_band_mapping.items()} + + vis_dict: Dict[str, Any] = { + "bands": [0], + "colormap": interpolated_cmap_from_colors( + GRADIENT_CMAP_COLORS, GRADIENT_CMAP_INTERVALS + ), + "range": (0, 200), + } + + asset = AssetVibe(reference=out_path, type=mimetypes.types_map[".tif"], id=uid) + include_raster_overviews(asset.local_path) + out_raster = Raster.clone_from( + input_raster, + id=gen_hash_id( + f"{input_raster.id}_compute_raster_gradient", + input_raster.geometry, + input_raster.time_range, + ), + assets=[asset, json_to_asset(vis_dict, self.tmp_dir.name)], + bands=output_band_mapping, + ) + + return {"output_raster": out_raster} + + return operator_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_raster_gradient/compute_raster_gradient.yaml b/ops/compute_raster_gradient/compute_raster_gradient.yaml new file mode 100644 index 00000000..49f803dc --- /dev/null +++ b/ops/compute_raster_gradient/compute_raster_gradient.yaml @@ -0,0 +1,11 @@ +name: compute_raster_gradient +inputs: + input_raster: Raster +output: + output_raster: Raster +parameters: +entrypoint: + file: compute_raster_gradient.py + callback_builder: CallbackBuilder +description: + short_description: Computes the gradient of each band of the input raster with a Sobel operator. \ No newline at end of file diff --git a/ops/compute_shadow_prob/compute_shadow_prob.py b/ops/compute_shadow_prob/compute_shadow_prob.py new file mode 100644 index 00000000..6a6880eb --- /dev/null +++ b/ops/compute_shadow_prob/compute_shadow_prob.py @@ -0,0 +1,119 @@ +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict + +import numpy as np +import onnxruntime as ort +from numpy.typing import NDArray +from rasterio.enums import Resampling + +from vibe_core.data import ( + AssetVibe, + S2ProcessingLevel, + Sentinel2CloudProbability, + Sentinel2Raster, + gen_guid, +) +from vibe_lib.raster import DEFAULT_NODATA, resample_raster +from vibe_lib.spaceeye.chip import ChipDataset, Dims, InMemoryReader, get_loader, predict_chips +from vibe_lib.spaceeye.utils import verify_processing_level + + +def pre_process(scale: float): + def fun(chip_data: NDArray[Any], _): + return chip_data * scale + + return fun + + +def post_process( + chip_data: NDArray[Any], chip_mask: NDArray[Any], model_out: NDArray[Any] +) -> NDArray[Any]: + """ + After prediction, we set nodata (all zeros) regions as 100% cloud + """ + nodata_mask = chip_mask.any(axis=1, keepdims=True) + model_prob = 1 / (1 + np.exp(-model_out)) + model_prob[nodata_mask] = 1 + return model_prob + + +class CallbackBuilder: + def __init__( + self, + downsampling: int, + root_dir: str, + model_path: str, + window_size: int, + overlap: float, + batch_size: int, + num_workers: int, + in_memory: bool, + ): + self.downsampling = downsampling + self.root_dir = root_dir + self.model_path = model_path + self.window_size = window_size + self.overlap = overlap + self.batch_size = batch_size + self.num_workers = num_workers + self.in_memory = in_memory + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def compute_shadow_prob( + sentinel_raster: Sentinel2Raster, + ) -> Dict[str, Sentinel2CloudProbability]: + verify_processing_level((sentinel_raster,), S2ProcessingLevel.L2A, "FPN Shadow model") + + if self.downsampling < 1: + raise ValueError( + f"Downsampling must be equal or larger than 1, found {self.downsampling}" + ) + model_path = os.path.join(self.root_dir, self.model_path) + model = ort.InferenceSession(model_path) + chip_size = self.window_size + step_size = int(chip_size * (1 - self.overlap)) + dataset = ChipDataset( + [sentinel_raster], + chip_size=Dims(chip_size, chip_size, 1), + step_size=Dims(step_size, step_size, 1), + downsampling=self.downsampling, + nodata=DEFAULT_NODATA, + reader=InMemoryReader(self.downsampling) if self.in_memory else None, + ) + + dataloader = get_loader( + dataset, self.batch_size, self.num_workers if not self.in_memory else 0 + ) + pred_filepaths = predict_chips( + model, + dataloader, + self.tmp_dir.name, + skip_nodata=True, + pre_process=pre_process(sentinel_raster.scale), + post_process=post_process, + ) + assert ( + len(pred_filepaths) == 1 + ), f"Expected one prediction file, found: {len(pred_filepaths)}" + mask_filepath = resample_raster( + pred_filepaths[0], + self.tmp_dir.name, + dataset.width, + dataset.height, + dataset.transform, + Resampling.bilinear, + ) + asset = AssetVibe(reference=mask_filepath, type="image/tiff", id=gen_guid()) + + shadow_mask = Sentinel2CloudProbability.clone_from( + sentinel_raster, id=gen_guid(), assets=[asset] + ) + + return {"shadow_probability": shadow_mask} + + return compute_shadow_prob + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/compute_shadow_prob/compute_shadow_prob.yaml b/ops/compute_shadow_prob/compute_shadow_prob.yaml new file mode 100644 index 00000000..851f6201 --- /dev/null +++ b/ops/compute_shadow_prob/compute_shadow_prob.yaml @@ -0,0 +1,25 @@ +name: compute_shadow_prob +inputs: + sentinel_raster: Sentinel2Raster +output: + shadow_probability: Sentinel2CloudProbability +parameters: + downsampling: 1 + root_dir: /opt/terravibes/ops/resources/shadow_models + model_path: shadow.onnx + window_size: 512 + overlap: .25 + batch_size: 1 + num_workers: 0 + in_memory: false +entrypoint: + file: compute_shadow_prob.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - model_path + - downsampling + - window_size + - overlap +description: + short_description: Computes shadow probabilities using a convolutional segmentation model for L2A. \ No newline at end of file diff --git a/ops/create_raster_sequence/create_raster_sequence.py b/ops/create_raster_sequence/create_raster_sequence.py new file mode 100644 index 00000000..5042c754 --- /dev/null +++ b/ops/create_raster_sequence/create_raster_sequence.py @@ -0,0 +1,53 @@ +from datetime import datetime +from typing import Any, Dict, List, Tuple, Union + +from shapely import geometry as shpg +from shapely import ops as shpo + +from vibe_core.data import Raster, gen_guid +from vibe_core.data.rasters import RasterSequence + + +def get_proper_order(seq: Union[List[Raster], RasterSequence]) -> List[Raster]: + if isinstance(seq, RasterSequence): + return [Raster.clone_from(seq, gen_guid(), assets=[i]) for i in seq.get_ordered_assets()] # type: ignore + else: + return sorted(seq, key=lambda r: r.time_range[0]) + + +def get_timerange(list1: List[Raster], list2: List[Raster]) -> Tuple[datetime, datetime]: + dates = sorted([t for list in [list1, list2] for r in list for t in r.time_range]) + return dates[0], dates[-1] + + +def get_geom(list1: List[Raster], list2: List[Raster]) -> Dict[str, Any]: + geoms = [r.geometry for list in [list1, list2] for r in list] + return shpg.mapping(shpo.unary_union([shpg.shape(i) for i in geoms])) + + +class CallbackBuilder: + def __call__(self): + def create_raster_sequence( + rasters1: Union[List[Raster], RasterSequence], + rasters2: Union[List[Raster], RasterSequence], + ) -> Dict[str, RasterSequence]: + list1 = get_proper_order(rasters1) + list2 = get_proper_order(rasters2) + + time_range = get_timerange(list1, list2) + geom = get_geom(list1, list2) + + res = RasterSequence( + gen_guid(), + time_range=time_range, + geometry=geom, + assets=[], + bands=dict(), + ) + for r in list1: + res.add_item(r) + for r in list2: + res.add_item(r) + return {"sequence": res} + + return create_raster_sequence diff --git a/ops/create_raster_sequence/create_raster_sequence.yaml b/ops/create_raster_sequence/create_raster_sequence.yaml new file mode 100644 index 00000000..d92d5392 --- /dev/null +++ b/ops/create_raster_sequence/create_raster_sequence.yaml @@ -0,0 +1,18 @@ +name: create_raster_sequence +inputs: + rasters1: List[Raster] + rasters2: List[Raster] +output: + sequence: RasterSequence +parameters: +entrypoint: + file: create_raster_sequence.py + callback_builder: CallbackBuilder +description: + short_description: Create a raster sequence from two lists of rasters. + long_description: The op will create a single sequence that combines rasters from two input lists. + inputs: + rasters1: First list of rasters. + rasters2: Second list of rasters. + output: + sequence: Combined raster sequence. \ No newline at end of file diff --git a/ops/create_raster_sequence/create_raster_sequence_from_sequence_list.yaml b/ops/create_raster_sequence/create_raster_sequence_from_sequence_list.yaml new file mode 100644 index 00000000..6a871fb6 --- /dev/null +++ b/ops/create_raster_sequence/create_raster_sequence_from_sequence_list.yaml @@ -0,0 +1,18 @@ +name: create_raster_sequence +inputs: + rasters1: RasterSequence + rasters2: List[Raster] +output: + sequence: RasterSequence +parameters: +entrypoint: + file: create_raster_sequence.py + callback_builder: CallbackBuilder +description: + short_description: Create a raster sequence from a raster sequence and a list of rasters. + long_description: The op will create a single sequence that combines rasters from the input sequence and the input list. + inputs: + rasters1: Raster sequence. + rasters2: List of rasters. + output: + sequence: Combined raster sequence. \ No newline at end of file diff --git a/ops/datavibe_filter/datavibe_filter.py b/ops/datavibe_filter/datavibe_filter.py new file mode 100644 index 00000000..84d0d0ba --- /dev/null +++ b/ops/datavibe_filter/datavibe_filter.py @@ -0,0 +1,38 @@ +from datetime import datetime +from functools import partial +from typing import Dict + +from shapely.geometry import Polygon, box, mapping + +from vibe_core.data import DataVibe +from vibe_core.data.core_types import gen_hash_id + + +def datavibe_filter(input_item: DataVibe, filter_out: str) -> Dict[str, DataVibe]: + geometry = input_item.geometry + time_range = input_item.time_range + if filter_out in ("all", "geometry"): + bbox = [0.0, -90.0, 360.0, 90.0] + polygon: Polygon = box(*bbox, ccw=True) + geometry = mapping(polygon) # dummy geometry + if filter_out in ("all", "time_range"): + time_range = (datetime(2022, 1, 1), datetime(2022, 1, 1)) # dummy dates + return { + "output_item": DataVibe.clone_from( + input_item, + id=gen_hash_id("datavibe_filter", geometry=geometry, time_range=time_range), + geometry=geometry, + time_range=time_range, + assets=[], + ) + } + + +def callback_builder(filter_out: str): + filter_out_options = ["all", "time_range", "geometry"] + if filter_out not in filter_out_options: + raise ValueError( + f"Invalid filter_out parameter: {filter_out}. " + f"Valid values are: {', '.join(filter_out_options)}" + ) + return partial(datavibe_filter, filter_out=filter_out) diff --git a/ops/datavibe_filter/datavibe_filter.yaml b/ops/datavibe_filter/datavibe_filter.yaml new file mode 100644 index 00000000..63c59ed5 --- /dev/null +++ b/ops/datavibe_filter/datavibe_filter.yaml @@ -0,0 +1,12 @@ +name: datavibe_filter +inputs: + input_item: DataVibe +output: + output_item: DataVibe +parameters: + filter_out: all # can be "all", "time_range" or "geometry" +entrypoint: + file: datavibe_filter.py + callback_builder: callback_builder +description: + short_description: Filters out time range and/or geometry information from the input item. \ No newline at end of file diff --git a/ops/detect_driveway/detect_driveway.py b/ops/detect_driveway/detect_driveway.py new file mode 100644 index 00000000..db1e69ce --- /dev/null +++ b/ops/detect_driveway/detect_driveway.py @@ -0,0 +1,199 @@ +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Optional, Tuple, cast + +import geopandas as gpd +import numpy as np +import rasterio +from numpy.typing import NDArray +from rasterio.features import shapes +from rasterio.mask import mask +from rasterio.transform import Affine +from scipy.ndimage import convolve +from shapely import geometry as shpg +from shapely import ops as shpo +from shapely.geometry.base import BaseGeometry +from skimage.measure import label, regionprops +from skimage.transform import rotate + +from vibe_core.data import CategoricalRaster, DataVibe, Raster +from vibe_core.data.core_types import AssetVibe, GeometryCollection, gen_guid +from vibe_lib.raster import MaskedArrayType + + +def read_raster(filepath: str, geometry: BaseGeometry) -> Tuple[MaskedArrayType, Affine]: + with rasterio.open(filepath) as src: + return mask(src, [geometry], crop=True, filled=False) + + +def get_kernels(kernel_size: Tuple[int, int], n_kernels: int) -> List[NDArray[Any]]: + y, x = kernel_size + k_max = max(kernel_size) + + base_kernel = np.zeros((k_max, k_max)) + off_y = (k_max - y) // 2 + off_x = (k_max - x) // 2 + base_kernel[off_y : k_max - off_y, off_x : k_max - off_x] = 1 + + angles = np.linspace(0, 180, n_kernels + 1)[:-1] + return [rotate(base_kernel, a, order=0) for a in angles] + + +def can_park(mask: NDArray[Any], car_size: Tuple[int, int], n_kernels: int, thr: float): + mask = mask.astype(np.float32) + kernels = get_kernels(car_size, n_kernels) + for kernel in kernels: + ks = kernel.sum() + if np.any(convolve(mask, kernel, mode="constant") / ks >= thr): + return True + return False + + +class DrivewayDetector: + def __init__( + self, + img_filepath: str, + pred_filepath: str, + road_df: gpd.GeoDataFrame, + min_region_area: float, + ndvi_thr: float, + car_size: Tuple[int, int], + num_kernels: int, + car_thr: float, + ) -> None: + self.img_filepath = img_filepath + self.pred_filepath = pred_filepath + + with rasterio.open(img_filepath) as src: + pixel_area = src.res[0] * src.res[1] + self.raster_geom = shpg.box(*src.bounds) + self.raster_crs = src.crs + self.min_area = min_region_area / pixel_area + + self.road_df = cast(gpd.GeoDataFrame, road_df.to_crs(self.raster_crs)) + + self.ndvi_thr = ndvi_thr + self.car_size = car_size + self.num_kernels = num_kernels + self.car_thr = car_thr + + def _get_region_near_road( + self, pred_mask: MaskedArrayType, tr: Affine + ) -> Optional[NDArray[np.bool_]]: + pred_labels = label(pred_mask.filled(0)) + pred_regions = sorted( + [p for p in regionprops(pred_labels) if p.area > self.min_area], + key=lambda x: self.road_df.geometry.distance(shpg.Point(tr * x.centroid[::-1])).min(), + ) + if not pred_regions: + # No region that is large enough + return None + + region = pred_regions[0] # Get region closest to the road + mask = pred_labels == region.label + return mask + + def detect(self, geom: BaseGeometry) -> Optional[BaseGeometry]: + bands, tr = read_raster(self.img_filepath, geom) + pred_mask = read_raster(self.pred_filepath, geom)[0][0] > 0 + + red, nir = bands[[0, 3]] + ndvi = (nir - red) / (nir + red) + not_green = (ndvi < self.ndvi_thr).filled(0) + + region_mask = self._get_region_near_road(pred_mask, tr) + if region_mask is None: + # Not region large enough + return None + + region_mask = not_green * region_mask + region_labels = label(region_mask) + + # Find regions where we could fit a car + dw_regions = [ + p + for p in regionprops(region_labels) + if can_park(p.image, self.car_size, self.num_kernels, self.car_thr) + ] + if not dw_regions: + # No region that can fit a car + return None + # Estimate total region of the driveway + dw_mask = np.sum([region_labels == p.label for p in dw_regions], axis=0).astype(bool) + dw_geom = shpo.unary_union( + [ + shpg.shape(s).convex_hull + for s, _ in shapes( + dw_mask.astype(np.uint8), mask=dw_mask, connectivity=8, transform=tr + ) + ] + ) + return dw_geom + + +class CallbackBuilder: + def __init__( + self, + min_region_area: float, + ndvi_thr: float, + car_size: Tuple[int, int], + num_kernels: int, + car_thr: float, + ): + self.min_region_area = min_region_area + self.ndvi_thr = ndvi_thr + self.car_size = car_size + self.num_kernels = num_kernels + self.car_thr = car_thr + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def callback( + input_raster: Raster, + segmentation_raster: CategoricalRaster, + property_boundaries: GeometryCollection, + roads: GeometryCollection, + ) -> Dict[str, DataVibe]: + road_df = cast(gpd.GeoDataFrame, gpd.read_file(roads.assets[0].url)) + detector = DrivewayDetector( + input_raster.raster_asset.url, + segmentation_raster.raster_asset.url, + road_df=road_df, + min_region_area=self.min_region_area, + ndvi_thr=self.ndvi_thr, + car_size=self.car_size, + num_kernels=self.num_kernels, + car_thr=self.car_thr, + ) + properties_df = cast( + gpd.GeoDataFrame, + gpd.read_file(property_boundaries.assets[0].url).to_crs(detector.raster_crs), # type: ignore + ) + properties_df = properties_df[properties_df.intersects(detector.raster_geom)] + driveway = [] + dw_geoms = [] + assert properties_df is not None, "There are no intersections with properties" + for _, row in properties_df.iterrows(): + geom = row.geometry.buffer(0) + dw_geom = detector.detect(geom) + is_dw = dw_geom is not None + driveway.append(is_dw) + if is_dw: + dw_geoms.append(dw_geom) # type: ignore + full_df = properties_df[driveway].copy() # type: ignore + dw_df = full_df.copy() + dw_df["geometry"] = dw_geoms # type: ignore + out = {} + for out_name, df in zip(("properties_with_driveways", "driveways"), (full_df, dw_df)): + asset_id = gen_guid() + filepath = os.path.join(self.tmp_dir.name, f"{asset_id}.geojson") + df.to_file(filepath, driver="GeoJSON") # type: ignore + asset = AssetVibe(reference=filepath, type="application/geo+json", id=asset_id) + out[out_name] = DataVibe.clone_from(input_raster, id=gen_guid(), assets=[asset]) + + return out + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/detect_driveway/detect_driveway.yaml b/ops/detect_driveway/detect_driveway.yaml new file mode 100644 index 00000000..5f5192cb --- /dev/null +++ b/ops/detect_driveway/detect_driveway.yaml @@ -0,0 +1,29 @@ +name: detect_driveway +inputs: + input_raster: Raster + segmentation_raster: CategoricalRaster + property_boundaries: GeometryCollection + roads: GeometryCollection +output: + properties_with_driveways: GeometryCollection + driveways: GeometryCollection +parameters: + min_region_area: 8 + ndvi_thr: .3 + car_size: [20, 8] + num_kernels: 8 + car_thr: .95 +entrypoint: + file: detect_driveway.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - min_region_area + - ndvi_thr + - car_size + - num_kernels + - car_thr +description: + short_description: + Detects driveways in the front of each house, using the input image, segmentation map, + road geometry, and input property boundaries. \ No newline at end of file diff --git a/ops/detect_outliers/detect_outliers.py b/ops/detect_outliers/detect_outliers.py new file mode 100644 index 00000000..68f2d176 --- /dev/null +++ b/ops/detect_outliers/detect_outliers.py @@ -0,0 +1,230 @@ +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Any, Callable, Dict, List, Sequence, Tuple, Union, cast + +import numpy as np +import pandas as pd +import xarray as xr +from numpy.typing import NDArray +from sklearn.preprocessing import StandardScaler + +from vibe_core.data import CategoricalRaster, Raster, TimeSeries, gen_guid +from vibe_lib.gaussian_mixture import ( + cluster_data, + mixture_log_likelihood, + train_mixture_with_component_search, +) +from vibe_lib.raster import ( + get_categorical_cmap, + get_cmap, + json_to_asset, + load_raster, + save_raster_to_asset, +) +from vibe_lib.timeseries import save_timeseries_to_asset + + +def compute_outliers( + curves: NDArray[Any], preprocessing: StandardScaler, thr: float, max_components: int +) -> Tuple[NDArray[np.int32], NDArray[np.float32], NDArray[np.int32], NDArray[Any]]: + x = preprocessing.fit_transform(curves) # Preprocess data + + mix = train_mixture_with_component_search(x, max_components=max_components) + labels = cluster_data(x, mix) # Assign labels + labels = labels.astype(np.int32) + # TODO: How to compute the threshold? Use fixed for now + likelihood = mixture_log_likelihood(x, mix) + outliers = likelihood < thr + likelihood = likelihood.astype(np.float32) + outliers = cast(NDArray[np.int32], outliers.astype(np.int32)) + # Recover means in the NDVI space + mix_means = cast(NDArray[Any], preprocessing.inverse_transform(mix.means_)) + + return labels, likelihood, outliers, mix_means + + +def save_mixture_means( + mix_means: NDArray[Any], + output_dir: str, + geom: Dict[str, Any], + date_list: Sequence[datetime], +) -> TimeSeries: + # Save timeseries output + df = pd.DataFrame(date_list, columns=["date"]) + for i, m in enumerate(mix_means): + df[f"component{i}"] = m + + df.set_index("date", drop=True, inplace=True) + + return TimeSeries( + id=gen_guid(), + geometry=geom, + time_range=(date_list[0], date_list[-1]), + assets=[save_timeseries_to_asset(df, output_dir)], + ) + + +def unpack_data(rasters: Sequence[Raster]) -> Tuple[NDArray[np.float32], xr.DataArray]: + # Sort rasters according to date + rasters = sorted(rasters, key=lambda x: x.time_range[0]) + # Load one raster to get metadata we need + band_data = load_raster(rasters[0], use_geometry=True) + + # Get band data and compress masked data into a stack of timeseries + curves = ( + np.stack( + [band_data.to_masked_array().compressed()] + + [ + load_raster(r, use_geometry=True).to_masked_array().compressed() + for r in rasters[1:] + ] + ) + .astype(np.float32) + .T + ) + return curves, band_data + + +def pack_rasters( + labels: NDArray[np.int32], + likelihood: NDArray[np.float32], + outliers: NDArray[np.int32], + geom: Dict[str, Any], + date_list: Sequence[datetime], + threshold: float, + output_dir: str, + reshape_fun: Callable[[NDArray[Any]], xr.DataArray], +): + output: Dict[str, List[Any]] = {} + time_range = (date_list[0], date_list[-1]) + + # Save likelihood raster + vis_dict = { + "bands": [0], + "colormap": get_cmap("viridis"), + "range": (max(threshold, float(likelihood.min())), float(likelihood.max())), + } + heatmap = Raster( + id=gen_guid(), + geometry=geom, + time_range=time_range, + assets=[ + save_raster_to_asset(reshape_fun(likelihood), output_dir), + json_to_asset(vis_dict, output_dir), + ], + bands={"likelihood": 0}, + ) + output["heatmap"] = [heatmap] + + # Save categorical rasters + classes = np.unique(labels) + num_classes = classes.shape[0] + vis_dict = { + "bands": [0], + "colormap": get_categorical_cmap("tab10", num_classes), + "range": (0, num_classes - 1), + } + output["segmentation"] = [ + CategoricalRaster( + id=gen_guid(), + geometry=geom, + time_range=time_range, + assets=[ + save_raster_to_asset(reshape_fun(labels), output_dir), + json_to_asset(vis_dict, output_dir), + ], + bands={"labels": 0}, + categories=[f"component{i}" for i in range(num_classes)], + ) + ] + vis_dict = { + "bands": [0], + "colormap": get_categorical_cmap("tab10", 2), + "range": (0, 1), + } + output["outliers"] = [ + CategoricalRaster( + id=gen_guid(), + geometry=geom, + time_range=time_range, + assets=[ + save_raster_to_asset(reshape_fun(outliers), output_dir), + json_to_asset(vis_dict, output_dir), + ], + bands={"labels": 0}, + categories=["normal", "outlier"], + ) + ] + return output + + +def pack_data( + labels: NDArray[np.int32], + likelihood: NDArray[np.float32], + outliers: NDArray[np.int32], + mix_means: NDArray[np.float32], + geom: Dict[str, Any], + date_list: Sequence[datetime], + threshold: float, + output_dir: str, + reshape_fun: Callable[[NDArray[Any]], xr.DataArray], +): + output = pack_rasters( + labels, likelihood, outliers, geom, date_list, threshold, output_dir, reshape_fun + ) + output["mixture_means"] = [save_mixture_means(mix_means, output_dir, geom, date_list)] + return output + + +class CallbackBuilder: + def __init__(self, threshold: float): + self.tmp_dir = TemporaryDirectory() + self.threshold = threshold + # TODO: Customize preprocessing + self.preprocessing = StandardScaler() + + def __call__(self): + def outliers_callback(rasters: List[Raster]) -> Dict[str, List[Union[Raster, TimeSeries]]]: + curves, band_data = unpack_data(rasters) + + # Get metadata + geom = rasters[0].geometry + date_list = [r.time_range[0] for r in rasters] + + # Helper function to obtain masked array from 1D array + def reshape_to_geom(values: NDArray[Any]) -> xr.DataArray: + data = np.ma.masked_all(band_data.shape, values.dtype) + data.mask = band_data.isnull() + data.data[~data.mask] = values + data.fill_value = band_data.rio.encoded_nodata # Unused value + data = band_data.copy(data=data.filled()) + data.rio.update_encoding({"dtype": str(values.dtype)}, inplace=True) + return data + + # Gaussian mixtures modeling + labels, likelihood, outliers, mix_means = compute_outliers( + curves, + self.preprocessing, + self.threshold, + max_components=1, # Assume only one component + ) + + # Pack data + output = pack_data( + labels, + likelihood, + outliers, + mix_means, + geom, + date_list, + self.threshold, + self.tmp_dir.name, + reshape_to_geom, + ) + + return output + + return outliers_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/detect_outliers/detect_outliers.yaml b/ops/detect_outliers/detect_outliers.yaml new file mode 100644 index 00000000..a91e5bbf --- /dev/null +++ b/ops/detect_outliers/detect_outliers.yaml @@ -0,0 +1,16 @@ +name: detect_outliers +inputs: + rasters: List[Raster] +output: + segmentation: List[CategoricalRaster] + heatmap: List[Raster] + outliers: List[CategoricalRaster] + mixture_means: List[TimeSeries] +parameters: + threshold: -60 +entrypoint: + file: detect_outliers.py + callback_builder: CallbackBuilder +description: + short_description: Fits a single-component Gaussian Mixture Model (GMM) over input rasters + to detect outliers according to the threshold parameter. diff --git a/ops/download_airbus/download_airbus.py b/ops/download_airbus/download_airbus.py new file mode 100644 index 00000000..9ed1d39d --- /dev/null +++ b/ops/download_airbus/download_airbus.py @@ -0,0 +1,92 @@ +import re +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Any, Dict, List + +from shapely import geometry as shpg + +from vibe_core.data import AirbusProduct, AirbusRaster, AssetVibe, gen_guid +from vibe_lib.airbus import IMAGE_FORMAT, AirBusAPI, Constellation +from vibe_lib.geometry import norm_intersection +from vibe_lib.raster import json_to_asset + + +def convert_product(product: Dict[str, Any], out_dir: str) -> AirbusRaster: + dt = datetime.fromisoformat(product["acquisitionDate"].replace("Z", "+00:00")) + filepath = product.pop("filepath") + geom = product.pop("geometry") + + asset = AssetVibe( + reference=filepath, + type=IMAGE_FORMAT, + id=gen_guid(), + ) + vis_asset = json_to_asset({"bands": list(range(3))}, out_dir) + # Get actual bounds from the raster + return AirbusRaster( + id=gen_guid(), + time_range=(dt, dt), + geometry=geom, + assets=[asset, vis_asset], + bands={k: v for v, k in enumerate(("red", "green", "blue", "nir"))}, + acquisition_id=product.pop("acquisitionIdentifier"), + extra_info=product, + ) + + +class CallbackBuilder: + def __init__( + self, + api_key: str, + projected_crs: bool, + iou_threshold: float, + delay: float, + timeout: float, + ): + self.api_key = api_key + self.projected_crs = projected_crs + self.iou_thr = iou_threshold + self.delay = delay + self.timeout = timeout + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def download_product(api: AirBusAPI, product: AirbusProduct) -> AirbusRaster: + geom = shpg.shape(product.geometry) + owned = api.query_owned(geom, product.acquisition_id) + owned = sorted( + owned, + key=lambda o: norm_intersection(geom, shpg.shape(o["geometry"])), + reverse=True, + ) + if ( + not owned + or norm_intersection(geom, shpg.shape(owned[0]["geometry"])) < self.iou_thr + ): + # We need to purchase the product + # We choose the envelope to avoid having images with a lot of nodata in the library + order = api.place_order([product.extra_info["id"]], geom.envelope) + order = api.block_until_order_delivered(order["id"]) + product_id = re.findall( + r"items/(.*)/", order["deliveries"][0]["_links"]["download"]["href"] + )[0] + owned = api.get_product_by_id(product_id) + else: + owned = owned[0] + product_id = owned["id"] + owned["filepath"] = api.download_product(product_id, self.tmp_dir.name) + return convert_product(owned, self.tmp_dir.name) + + def download_products( + airbus_products: List[AirbusProduct], + ) -> Dict[str, List[AirbusRaster]]: + api = AirBusAPI( + self.api_key, + self.projected_crs, + [c for c in Constellation], + self.delay, + self.timeout, + ) + return {"downloaded_products": [download_product(api, p) for p in airbus_products]} + + return download_products diff --git a/ops/download_airbus/download_airbus.yaml b/ops/download_airbus/download_airbus.yaml new file mode 100644 index 00000000..266f8e24 --- /dev/null +++ b/ops/download_airbus/download_airbus.yaml @@ -0,0 +1,16 @@ +name: download_airbus +inputs: + airbus_products: List[AirbusProduct] +output: + downloaded_products: List[AirbusRaster] +parameters: + api_key: "@SECRET(eywa-secrets, msr-airbus-api)" + projected_crs: true + iou_threshold: .95 + delay: 60 + timeout: 1200 +entrypoint: + file: download_airbus.py + callback_builder: CallbackBuilder +description: + short_description: Downloads the AirBus imagery from the listed product. \ No newline at end of file diff --git a/ops/download_alos/download_alos.py b/ops/download_alos/download_alos.py new file mode 100644 index 00000000..a1fe00d9 --- /dev/null +++ b/ops/download_alos/download_alos.py @@ -0,0 +1,40 @@ +import os +from tempfile import TemporaryDirectory +from typing import Dict + +import planetary_computer as pc + +from vibe_core.data import AlosProduct, AssetVibe, CategoricalRaster, gen_guid, gen_hash_id +from vibe_lib.planetary_computer import AlosForestCollection + + +class CallbackBuilder: + def __init__(self, pc_key: str): + self.tmp_dir = TemporaryDirectory() + pc.set_subscription_key(pc_key) + + def __call__(self): + def callback(product: AlosProduct) -> Dict[str, CategoricalRaster]: + collection = AlosForestCollection() + item = collection.query_by_id(product.id) + if not item: + raise Exception(f"Product {product.id} not found in ALOS Forest collection") + assets = collection.download_item(item, os.path.join(self.tmp_dir.name, product.id)) + if not assets: + raise Exception(f"No assets found for product {product.id}") + assets = [AssetVibe(reference=a, type="image/tiff", id=gen_guid()) for a in assets] + return { + "raster": CategoricalRaster.clone_from( + product, + id=gen_hash_id( + f"{product.id}_download_alos_product", + product.geometry, + product.time_range, + ), + assets=assets, + bands={"forest_non_forest": 0}, + categories=AlosForestCollection.categories, + ) + } + + return callback diff --git a/ops/download_alos/download_alos.yaml b/ops/download_alos/download_alos.yaml new file mode 100644 index 00000000..01c699e2 --- /dev/null +++ b/ops/download_alos/download_alos.yaml @@ -0,0 +1,20 @@ +name: download_alos +inputs: + product: AlosProduct +output: + raster: CategoricalRaster +parameters: + pc_key: +entrypoint: + file: download_alos.py + callback_builder: CallbackBuilder +description: + short_description: Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map. + long_description: + The op will download an ALOS forest/non-forest classification map and return it as a raster. + inputs: + product: Product with the tile metadata to be downloaded. + output: + raster: Downloaded ALOS forest/non-forest classification map as a raster. + parameters: + pc_key: Planetary computer API key. diff --git a/ops/download_alos/test_download_alos.py b/ops/download_alos/test_download_alos.py new file mode 100644 index 00000000..540eb555 --- /dev/null +++ b/ops/download_alos/test_download_alos.py @@ -0,0 +1,81 @@ +import os +from datetime import datetime, timezone +from typing import cast +from unittest.mock import MagicMock, patch + +import pytest +from pystac import Asset, Item + +from vibe_core.data import AlosProduct, Raster +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.planetary_computer import AlosForestCollection + +FAKE_TIME_RANGE = ( + datetime(2020, 11, 1, tzinfo=timezone.utc), + datetime(2020, 11, 2, tzinfo=timezone.utc), +) + + +@pytest.fixture +def alos_product(): + return AlosProduct( + id="N15W087_20_FNF", + geometry={ + "type": "Polygon", + "coordinates": [ + [ + [-86.773827, 14.575498], + [-86.770459, 14.579301], + [-86.764283, 14.575102], + [-86.769591, 14.567595], + [-86.773827, 14.575498], + ] + ], + }, + time_range=FAKE_TIME_RANGE, + assets=[], + ) + + +def fake_items(): + assets = {"N15W087_20_FNF": Asset(href="fake_href", media_type="image/tiff")} + return Item( + id="N15W087_20_FNF", + geometry=None, + bbox=None, + datetime=None, + properties={ + "start_datetime": FAKE_TIME_RANGE[0].isoformat() + "Z", + "end_datetime": FAKE_TIME_RANGE[1].isoformat() + "Z", + }, + assets=assets, + ) + + +@patch.object(AlosForestCollection, "download_item") +@patch.object(AlosForestCollection, "query_by_id") +@patch("vibe_lib.planetary_computer.get_available_collections") +def test_alos_download( + get_collections: MagicMock, + query_by_id: MagicMock, + download_item: MagicMock, + alos_product: AlosProduct, +): + get_collections.return_value = [AlosForestCollection.collection] + query_by_id.return_value = fake_items() + download_item.side_effect = lambda item, _: [item.assets[item.id].href] + + config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_alos.yaml") + + op = OpTester(config_path) + output_data = op.run(product=alos_product) + assert output_data + assert "raster" in output_data + + output_raster = cast(Raster, output_data["raster"]) + assert len(output_raster.assets) == 1 + assert output_raster.assets[0].type == "image/tiff" + assert output_raster.assets[0].path_or_url == "fake_href" + assert output_raster.bands == {"forest_non_forest": 0} + assert output_raster.time_range == FAKE_TIME_RANGE + assert output_raster.geometry == alos_product.geometry diff --git a/ops/download_ambient_weather/download_ambient_weather.py b/ops/download_ambient_weather/download_ambient_weather.py new file mode 100644 index 00000000..c55e3321 --- /dev/null +++ b/ops/download_ambient_weather/download_ambient_weather.py @@ -0,0 +1,236 @@ +import logging +import mimetypes +import os +import time +from datetime import timedelta +from random import randint +from tempfile import TemporaryDirectory +from typing import Any, Callable, Dict, Final, List, cast + +import pandas as pd +from ambient_api.ambientapi import AmbientAPI, AmbientWeatherStation +from shapely.geometry import shape + +from vibe_core.data import AssetVibe, DataVibe, gen_guid, gen_hash_id +from vibe_core.data.weather import WeatherVibe + +# Ambient Weather Station API endpoint +ENDPOINT: Final[str] = "https://api.ambientweather.net/v1" + +# time to sleep between API calls to avoid rate limits +ONE_SECOND: Final[int] = 1 + +# in minutes +SKIP_DATA_FOR_PERIOD = 360 + +# data points +MAX_FETCH = 288 + +# data points +MIN_FETCH = 2 + +# allows failed +FAILED_COUNT = 25 + +LOGGER = logging.getLogger(__name__) + + +def get_weather( + user_input: DataVibe, + output_dir: str, + api_key: str, + app_key: str, + limit: int, + feed_interval: int, +) -> WeatherVibe: + """Gets the Ambient Weather Station data at the location and time specified + + Args: + user_input: Specifies location and time for data query + output_dir: directory in which to save data + api_key: API key used to access Ambient Weather Station API + app_key: App key used to access Ambient Weather Station API + limit: Number of data points to be downloaded from ambient service + Returns: + Weather data at specified location and time + Raises: + RuntimeError: if API service, devices, or data is unreachable + """ + api = AmbientAPI( + AMBIENT_ENDPOINT=ENDPOINT, + AMBIENT_API_KEY=api_key, + AMBIENT_APPLICATION_KEY=app_key, + ) + + devices = call_ambient_api(api.get_devices) + assert devices is not None, "No devices found" + device = get_device(devices, user_input.geometry) + + # create a closure to simplify retries + def get_data() -> List[Dict[str, Any]]: + out = device.get_data(end_date=end_date, limit=delta) + assert out is not None, "No data found" + return out + + start_date = user_input.time_range[0] + end_date = user_input.time_range[1] + + delta = end_date - start_date + delta_sec = (delta.seconds // 60) // feed_interval + + if delta.days > 0: + delta = delta_sec + delta.days * 24 * 60 // feed_interval + else: + delta = delta_sec + + out = [] + + # split request into chunks if number of data points is greater than MAX_FETCH + if limit > MAX_FETCH or delta > MAX_FETCH: + limit = max(limit, delta) + lnt = 0 + failed_count = 0 + + # for lnt in range(0, limit, MAX_FETCH): + while end_date > start_date: + try: + if (limit - lnt) < MAX_FETCH: + delta = limit - lnt + else: + delta = MAX_FETCH + + time.sleep(ONE_SECOND) + out.extend(cast(List[Any], call_ambient_api(get_data))) + end_date -= timedelta(minutes=delta * feed_interval) + lnt += MAX_FETCH + failed_count = 0 + except Exception: + # skip from weation station malfunction by every 60 minutes + end_date -= timedelta(minutes=SKIP_DATA_FOR_PERIOD) + start_date -= timedelta(minutes=SKIP_DATA_FOR_PERIOD) + lnt += SKIP_DATA_FOR_PERIOD // feed_interval + failed_count += 1 + + # stop execution if not able to access api 25 times continuously + if failed_count > FAILED_COUNT: + raise RuntimeError("Weather station not responding.") + else: + if limit > 0: + delta = limit + else: + delta = MIN_FETCH if delta == 0 else delta + + out = call_ambient_api(get_data) + + file_path = os.path.join(output_dir, "weather.csv") + pd.DataFrame(out).to_csv(file_path) + + asset = AssetVibe(reference=file_path, type=mimetypes.types_map[".csv"], id=gen_guid()) + return WeatherVibe( + gen_hash_id( + f"AmbientWeather_{device.mac_address}", + user_input.geometry, + user_input.time_range, + ), + user_input.time_range, + user_input.geometry, + [asset], + ) + + +# In the following, pyright fails to detect that we are raising an exception +def get_device( + devices: List[AmbientWeatherStation], geometry: Dict[str, Any] +) -> AmbientWeatherStation: # type: ignore + """Returns a weather device within the bounding box + + Args: + devices: list of weather stations in this subscription + geometry: location of interest + + Returns: + A device within the region + + Raises: + RuntimteError if no matching device is found + """ + search_area = shape(geometry) + for device in devices: + try: + device_loc = shape(device.info["coords"]["geo"]) # type: ignore + except KeyError: + LOGGER.error("Device info did not contain geolocation for device {}".format(device)) + continue + if device_loc.within(search_area): + return device + + log_and_raise_error("No devices found in given geometry {}".format(search_area)) + + +def log_and_raise_error(message: str): + LOGGER.error(message) + raise RuntimeError(message) + + +def call_ambient_api( + api_call: Callable[[], List[Any]], max_attempts: int = 3, backoff: int = ONE_SECOND +): + """Call the given function with retries. + + Args: + api_call: function to call + max_attempts: tries to make before quitting + backoff: seconds to wait before first retry. Wait increases between each call. + + Returns: + result of function call + + Raises: + RuntimeError if function does not return a non-empty result after max_attempts calls + """ + # use 1 based counting + for attempt in range(1, max_attempts + 1): + result = api_call() + if result: + return result + else: + LOGGER.warning( + f"Ambient Weather API call {api_call.__name__} " + f"failed on try {attempt}/{max_attempts}" + ) + if attempt < max_attempts: + time.sleep(backoff + randint(0, 10)) + backoff *= randint(2, 5) + log_and_raise_error("Could not get data from Ambient Weather API") + + +class CallbackBuilder: + def __init__(self, api_key: str, app_key: str, limit: int, feed_interval: int): + """ + Args: + api_key: API key used to access Ambient Weather Station API + app_key: App key used to access Ambient Weather Station API + limit: Number of data points to be downloaded from ambient service + """ + self.temp_dir = TemporaryDirectory() + self.api_key = api_key + self.app_key = app_key + self.limit = limit + self.feed_interval = feed_interval + + def __call__(self): + def get_weather_data(user_input: List[DataVibe]) -> Dict[str, WeatherVibe]: + measured_weather = get_weather( + user_input[0], + output_dir=self.temp_dir.name, + api_key=self.api_key, + app_key=self.app_key, + limit=self.limit, + feed_interval=self.feed_interval, + ) + return {"weather": measured_weather} + + return get_weather_data + + def __del__(self): + self.temp_dir.cleanup() diff --git a/ops/download_ambient_weather/download_ambient_weather.yaml b/ops/download_ambient_weather/download_ambient_weather.yaml new file mode 100644 index 00000000..8d0f6efb --- /dev/null +++ b/ops/download_ambient_weather/download_ambient_weather.yaml @@ -0,0 +1,21 @@ +name: download_ambient_weather +inputs: + user_input: List[DataVibe] +output: + weather: WeatherVibe +parameters: + api_key: "@SECRET(eywa-secrets, ambient-api-key)" + app_key: "@SECRET(eywa-secrets, ambient-app-key)" + limit: -1 + # in minutes + feed_interval: 5 +entrypoint: + callback_builder: CallbackBuilder + file: download_ambient_weather.py +dependencies: + parameters: + - limit +description: + short_description: + Connects to the Ambient Weather REST API and requests weather data for the input time range + from stations within input geometry. \ No newline at end of file diff --git a/ops/download_bing_basemap/download_bing_basemap.py b/ops/download_bing_basemap/download_bing_basemap.py new file mode 100644 index 00000000..acfc7bd1 --- /dev/null +++ b/ops/download_bing_basemap/download_bing_basemap.py @@ -0,0 +1,81 @@ +import hashlib +import os +from tempfile import TemporaryDirectory +from typing import Dict + +import rasterio +from rasterio.transform import from_bounds + +from vibe_core.data import AssetVibe, BBox, Raster, gen_guid +from vibe_core.data.products import BingMapsProduct +from vibe_lib.bing_maps import BingMapsCollection + + +def build_raster_asset(tile_path: str, tile_bbox: BBox, output_path: str): + """Build a GeoTIFF raster asset from a tile downloaded from BingMaps.""" + with rasterio.open(tile_path) as src: + img = src.read() + + transform = from_bounds(*tile_bbox, img.shape[2], img.shape[1]) + + with rasterio.open( + output_path, + "w", + driver="GTiff", + height=img.shape[1], + width=img.shape[2], + count=3, + dtype=img.dtype, + crs="EPSG:4326", + transform=transform, + ) as dst: + dst.write(img) + + +class CallbackBuilder: + def __init__(self, api_key: str): + if not api_key: + raise ValueError("BingMaps API key was not provided.") + + self.collection = BingMapsCollection(api_key) + self.tmp_dir = TemporaryDirectory() + + def download_basemap(self, product: BingMapsProduct) -> AssetVibe: + img_id = gen_guid() + tile_path = os.path.join(self.tmp_dir.name, f"{img_id}.jpeg") + raster_path = os.path.join(self.tmp_dir.name, f"{img_id}.tiff") + + try: + self.collection.download_tile(product.url, tile_path) + except (RuntimeError, ValueError) as e: + raise type(e)( + f"Failed to download tile {product.id} at zoom level {product.zoom_level}. {e}" + ) from e + + build_raster_asset(tile_path, product.bbox, raster_path) + asset = AssetVibe( + reference=raster_path, + type="image/tiff", + id=gen_guid(), + ) + return asset + + def __call__(self): + def download_bing_basemap( + input_product: BingMapsProduct, + ) -> Dict[str, Raster]: + asset = self.download_basemap(input_product) + + basemap = Raster.clone_from( + input_product, + id=hashlib.sha256(f"downloaded_basemap_{input_product.id}".encode()).hexdigest(), + assets=[asset], + bands={"red": 0, "green": 1, "blue": 2}, + ) + + return {"basemap": basemap} + + return download_bing_basemap + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_bing_basemap/download_bing_basemap.yaml b/ops/download_bing_basemap/download_bing_basemap.yaml new file mode 100644 index 00000000..3cc0e25f --- /dev/null +++ b/ops/download_bing_basemap/download_bing_basemap.yaml @@ -0,0 +1,22 @@ +name: download_bing_basemap +inputs: + input_product: BingMapsProduct +output: + basemap: Raster +parameters: + api_key: +entrypoint: + file: download_bing_basemap.py + callback_builder: CallbackBuilder +dependencies: +description: + short_description: + Downloads a basemap tile represented by a BingMapsProduct using BingMapsAPI. + long_description: + The op will download a basemap tile and return it as a raster. + inputs: + input_product: Product with the tile metadata to be downloaded. + output: + basemap: Downloaded basemap as a raster. + parameters: + api_key: Bing Maps API key. Required to run the workflow. diff --git a/ops/download_bing_basemap/test_download_bing_basemap.py b/ops/download_bing_basemap/test_download_bing_basemap.py new file mode 100644 index 00000000..865e98b8 --- /dev/null +++ b/ops/download_bing_basemap/test_download_bing_basemap.py @@ -0,0 +1,66 @@ +import os +from datetime import datetime +from unittest.mock import MagicMock, patch + +import numpy as np +from PIL import Image +from shapely.geometry import Polygon, mapping + +from vibe_core.data import Raster +from vibe_core.data.products import BingMapsProduct +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.bing_maps import BingMapsCollection + +FAKE_GEOMETRY = Polygon( + [ + (46.998848, -118.940490), + (46.998848, -118.876148), + (47.013422, -118.876148), + (47.013422, -118.940490), + ] +) +FAKE_TIME_RANGE = (datetime.now(), datetime.now()) + + +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_bing_basemap.yaml") + + +def create_blank_jpeg(_: str, out_path: str): + data = np.zeros((100, 100, 3), dtype=np.uint8) + img = Image.fromarray(data) + img.save(out_path) + + +@patch.object( + BingMapsCollection, + "download_tile", + side_effect=create_blank_jpeg, +) +@patch.object( + BingMapsCollection, + "get_download_url_and_subdomains", + return_value=("fake_download_url_{subdomain}_{quadkey}_{api_key}", ["fake_subdomain"]), +) +def test_op(_: MagicMock, __: MagicMock): + input_product = BingMapsProduct( + id="fake_product", + time_range=FAKE_TIME_RANGE, + geometry=mapping(FAKE_GEOMETRY), # type: ignore + assets=[], + url="fake_url", + zoom_level=1, + imagery_set="Aerial", + map_layer="Basemap", + orientation=0.0, + ) + + op_tester = OpTester(CONFIG_PATH) + op_tester.update_parameters({"api_key": "fake_api_key"}) + output_data = op_tester.run(**{"input_product": input_product}) + + # Get op result + output_name = "basemap" + assert output_name in output_data + output_basemap = output_data[output_name] + assert isinstance(output_basemap, Raster) + assert len(output_basemap.assets) == 1 diff --git a/ops/download_cdl_data/download_cdl.py b/ops/download_cdl_data/download_cdl.py new file mode 100644 index 00000000..6085c660 --- /dev/null +++ b/ops/download_cdl_data/download_cdl.py @@ -0,0 +1,86 @@ +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict +from zipfile import ZipFile + +import numpy as np +import pandas as pd + +from vibe_core.data import AssetVibe, CategoricalRaster, gen_guid +from vibe_core.data.products import CDL_DOWNLOAD_URL, CDLProduct +from vibe_core.file_downloader import download_file +from vibe_lib.raster import ( + INT_COMPRESSION_KWARGS, + compress_raster, + json_to_asset, + step_cmap_from_colors, +) + + +def download_cdl_tif(cdl_product: CDLProduct, out_path: str) -> None: + """Download the CDL zip and decompress the .tif file and recompress it to out_path""" + cdl_year = cdl_product.time_range[0].year + + with TemporaryDirectory() as tmp: + zip_path = os.path.join(tmp, f"cdl_{cdl_year}.zip") + product_url = CDL_DOWNLOAD_URL.format(cdl_year) + download_file(product_url, zip_path) + + with ZipFile(zip_path) as zf: + zip_member = [f for f in zf.filelist if f.filename.endswith(".tif")][0] + # Trick to extract file without the whole directory tree + # https://stackoverflow.com/questions/4917284/ + zip_member.filename = os.path.basename(zip_member.filename) + file_path = zf.extract(zip_member, path=tmp) + compress_raster(file_path, out_path, **INT_COMPRESSION_KWARGS) + + +class CallbackBuilder: + MIN_CLASS_IDX: int = 0 + MAX_CLASS_IDX: int = 255 + + def __init__(self, metadata_path: str): + self.tmp_dir = TemporaryDirectory() + self.df = pd.read_excel(metadata_path, header=3, index_col=0).dropna(axis=1) + cmap = self.df[["Erdas_Red", "Erdas_Green", "Erdas_Blue"]].values.astype(float) + # Add alpha value + self.cmap = np.concatenate((cmap, cmap.sum(axis=1)[:, None] > 0), axis=1) + + def __call__(self): + def cdl_callback(input_product: CDLProduct) -> Dict[str, CategoricalRaster]: + """ + This op receives a CDLProduct (probably from list_cdl_products op) and + downloads the zipped CDL map. It decompress the .tif file from it and yields + a CategoricalRaster with references to that asset + """ + + out_id = gen_guid() + filepath = os.path.join(self.tmp_dir.name, f"{out_id}.tif") + + download_cdl_tif(input_product, filepath) + + new_asset = AssetVibe(reference=filepath, type=mimetypes.types_map[".tif"], id=out_id) + + vis_dict: Dict[str, Any] = { + "bands": [0], + "colormap": step_cmap_from_colors( + self.cmap, range(self.MIN_CLASS_IDX + 1, self.MAX_CLASS_IDX + 1) + ), + "range": (self.MIN_CLASS_IDX, self.MAX_CLASS_IDX), + } + + raster = CategoricalRaster.clone_from( + input_product, + id=gen_guid(), + assets=[new_asset, json_to_asset(vis_dict, self.tmp_dir.name)], + bands={"categories": 0}, + categories=self.df["Class_Names"].tolist(), + ) + + return {"cdl_raster": raster} + + return cdl_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_cdl_data/download_cdl.yaml b/ops/download_cdl_data/download_cdl.yaml new file mode 100644 index 00000000..f020cd8d --- /dev/null +++ b/ops/download_cdl_data/download_cdl.yaml @@ -0,0 +1,12 @@ +name: download_cdl +inputs: + input_product: CDLProduct +output: + cdl_raster: CategoricalRaster +parameters: + metadata_path: /opt/terravibes/ops/resources/cdl_metadata/CDL_codes_names_colors.xls +entrypoint: + file: download_cdl.py + callback_builder: CallbackBuilder +description: + short_description: Downloads a CategoricalRaster from a CDLProduct. \ No newline at end of file diff --git a/ops/download_cdl_data/download_cdl_data.py b/ops/download_cdl_data/download_cdl_data.py new file mode 100644 index 00000000..8c6eb7bc --- /dev/null +++ b/ops/download_cdl_data/download_cdl_data.py @@ -0,0 +1,126 @@ +import mimetypes +import os +import xml.etree.ElementTree as ET +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Tuple, cast + +import geopandas as gpd +import numpy as np +import pandas as pd +import requests +import shapely.geometry as shpg +from rasterio.merge import merge +from shapely.geometry.base import BaseGeometry + +from vibe_core.data import AssetVibe, CategoricalRaster, DataVibe, gen_guid +from vibe_lib.raster import json_to_asset, step_cmap_from_colors + +SERVICE_URL = "https://nassgeodata.gmu.edu/axis2/services/CDLService/GetCDLFile" +CDL_CRS = "epsg:5070" +# Maximum area per request is 2M square km, 2e11 seems to work better +MAX_AREA = 1e11 + + +def download_file(url: str, out_path: str) -> None: + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(out_path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + + +def split_geometry(geom: BaseGeometry, max_area: float) -> List[BaseGeometry]: + if geom.area < max_area: + # Done + return [geom] + + # Split it! + x0, y0, x1, y1 = cast(Tuple[int, int, int, int], geom.bounds) + if (x1 - x0) > (y1 - y0): + # Split along width + b1 = shpg.box(x0, y0, (x0 + x1) / 2, y1) + b2 = shpg.box((x0 + x1) / 2, y0, x1, y1) + else: + # Split along height + b1 = shpg.box(x0, y0, x1, (y0 + y1) / 2) + b2 = shpg.box(x0, (y0 + y1) / 2, x1, y1) + return split_geometry(b1, max_area) + split_geometry(b2, max_area) + + +def get_cdl_url(geom: BaseGeometry, dt: datetime) -> str: + formatted_bbox = ",".join([f"{b:.1f}" for b in geom.bounds]) + payload = {"year": str(dt.year), "bbox": formatted_bbox} + r = requests.get(SERVICE_URL, params=payload) + r.raise_for_status() + e = ET.fromstring(r.text) + tif_url = list(e)[0].text + if tif_url is None: + raise ValueError(f"URL is missing from response {r.text}") + return tif_url + + +def save_cdl_tif(geom: BaseGeometry, dt: datetime, out_path: str) -> None: + split_geoms = [g for g in split_geometry(geom, MAX_AREA) if g.intersects(geom)] + with TemporaryDirectory() as tmp: + split_paths = [os.path.join(tmp, f"{i}.tif") for i in range(len(split_geoms))] + for g, p in zip(split_geoms, split_paths): + tif_url = get_cdl_url(g, dt) + download_file(tif_url, p) + if len(split_geoms) > 1: + # Merge all parts into a single tiff + merge(split_paths, bounds=geom.bounds, dst_path=out_path) + else: + os.rename(split_paths[0], out_path) + + +class CallbackBuilder: + MIN_CLASS_IDX: int = 0 + MAX_CLASS_IDX: int = 255 + + def __init__(self, metadata_url: str): + self.tmp_dir = TemporaryDirectory() + self.df = pd.read_excel(metadata_url, header=3, index_col=0).dropna(axis=1) + cmap = self.df[["Erdas_Red", "Erdas_Green", "Erdas_Blue"]].values.astype(float) + # Add alpha value + self.cmap = np.concatenate((cmap, cmap.sum(axis=1)[:, None] > 0), axis=1) + + def __call__(self): + def cdl_callback(input_data: DataVibe) -> CategoricalRaster: + proj_geom: BaseGeometry = ( + gpd.GeoSeries(shpg.shape(input_data.geometry), crs="epsg:4326") + .to_crs(CDL_CRS) + .iloc[0] + ) + # We are taking the year in the middle point of the time range for now + dt = datetime.fromtimestamp(sum(d.timestamp() for d in input_data.time_range) / 2) + out_id = gen_guid() + filepath = os.path.join(self.tmp_dir.name, f"{out_id}.tif") + save_cdl_tif(proj_geom, dt, filepath) + new_asset = AssetVibe(reference=filepath, type=mimetypes.types_map[".tif"], id=out_id) + + vis_dict: Dict[str, Any] = { + "bands": [0], + "colormap": step_cmap_from_colors( + self.cmap, range(self.MIN_CLASS_IDX + 1, self.MAX_CLASS_IDX + 1) + ), + "range": (self.MIN_CLASS_IDX, self.MAX_CLASS_IDX), + } + + raster = CategoricalRaster.clone_from( + input_data, + id=gen_guid(), + assets=[new_asset, json_to_asset(vis_dict, self.tmp_dir.name)], + bands={"categories": 0}, + categories=self.df["Class_Names"].tolist(), + ) + + return raster + + def cdl_callback_list(input_data: List[DataVibe]) -> Dict[str, List[CategoricalRaster]]: + return {"cdl_rasters": [cdl_callback(input_datum) for input_datum in input_data]} + + return cdl_callback_list + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_cdl_data/download_cdl_data.yaml b/ops/download_cdl_data/download_cdl_data.yaml new file mode 100644 index 00000000..e9baebe8 --- /dev/null +++ b/ops/download_cdl_data/download_cdl_data.yaml @@ -0,0 +1,10 @@ +name: download_cdl_data +inputs: + input_items: List[DataVibe] +output: + cdl_rasters: List[CategoricalRaster] +parameters: + metadata_url: https://www.nass.usda.gov/Research_and_Science/Cropland/docs/CDL_codes_names_colors.xls +entrypoint: + file: download_cdl_data.py + callback_builder: CallbackBuilder diff --git a/ops/download_chirps/download_chirps.py b/ops/download_chirps/download_chirps.py new file mode 100644 index 00000000..30f65c09 --- /dev/null +++ b/ops/download_chirps/download_chirps.py @@ -0,0 +1,44 @@ +import logging +import os +import re +from tempfile import TemporaryDirectory +from typing import Dict, Optional + +from vibe_core.data import AssetVibe, gen_hash_id +from vibe_core.data.core_types import gen_guid +from vibe_core.data.products import ChirpsProduct +from vibe_core.file_downloader import download_file + +LOGGER = logging.getLogger(__name__) + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def download_product( + chirps_product: ChirpsProduct, + ) -> Dict[str, Optional[ChirpsProduct]]: + fname = re.search("chirps-.*cog", chirps_product.url) + if fname is not None: + fname = fname.group() + else: + raise ValueError(f"URL for chirps product has no COG. url: {chirps_product.url}") + fpath = os.path.join(self.tmp_dir.name, fname) + download_file(chirps_product.url, fpath) + + asset = AssetVibe(reference=fpath, type="image/tiff", id=gen_guid()) + + downloaded_product = ChirpsProduct.clone_from( + chirps_product, + id=gen_hash_id(fname, chirps_product.geometry, chirps_product.time_range), + assets=[asset], + ) + + return {"downloaded_product": downloaded_product} + + return download_product + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_chirps/download_chirps.yaml b/ops/download_chirps/download_chirps.yaml new file mode 100644 index 00000000..ef1bd9b0 --- /dev/null +++ b/ops/download_chirps/download_chirps.yaml @@ -0,0 +1,11 @@ +name: download_chirps +inputs: + chirps_product: ChirpsProduct +output: + downloaded_product: ChirpsProduct +parameters: +entrypoint: + file: download_chirps.py + callback_builder: CallbackBuilder +description: + short_description: Downloads accumulated precipitation data from listed products. \ No newline at end of file diff --git a/ops/download_climatology_lab/download_climatology_lab.py b/ops/download_climatology_lab/download_climatology_lab.py new file mode 100644 index 00000000..16181ca1 --- /dev/null +++ b/ops/download_climatology_lab/download_climatology_lab.py @@ -0,0 +1,39 @@ +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Dict + +from vibe_core.data import AssetVibe, gen_guid, gen_hash_id +from vibe_core.data.products import ClimatologyLabProduct +from vibe_core.file_downloader import download_file + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def download_climatology_lab( + input_product: ClimatologyLabProduct, + ) -> Dict[str, ClimatologyLabProduct]: + asset_id = gen_guid() + filepath = os.path.join(self.tmp_dir.name, f"{asset_id}.nc") + download_file(input_product.url, filepath) + new_asset = AssetVibe(reference=filepath, type=mimetypes.types_map[".nc"], id=asset_id) + + product = ClimatologyLabProduct.clone_from( + input_product, + id=gen_hash_id( + f"{input_product.id}_downloaded", + input_product.geometry, + input_product.time_range, + ), + assets=[new_asset], + ) + + return {"downloaded_product": product} + + return download_climatology_lab + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_climatology_lab/download_climatology_lab.yaml b/ops/download_climatology_lab/download_climatology_lab.yaml new file mode 100644 index 00000000..161df8a5 --- /dev/null +++ b/ops/download_climatology_lab/download_climatology_lab.yaml @@ -0,0 +1,16 @@ +name: download_climatology_lab +inputs: + input_product: ClimatologyLabProduct +output: + downloaded_product: ClimatologyLabProduct +parameters: +entrypoint: + file: download_climatology_lab.py + callback_builder: CallbackBuilder +description: + short_description: + Downloads Climatology Lab weather products (TerraClimate and GridMET) defined by the input product. + inputs: + input_product: Input Climatology Lab product. + output: + downloaded_product: Downloaded product with desired variable. diff --git a/ops/download_climatology_lab/test_download_climatology_lab.py b/ops/download_climatology_lab/test_download_climatology_lab.py new file mode 100644 index 00000000..705965e2 --- /dev/null +++ b/ops/download_climatology_lab/test_download_climatology_lab.py @@ -0,0 +1,40 @@ +import os +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +from shapely.geometry import Point, mapping + +from vibe_core.data import ClimatologyLabProduct +from vibe_dev.testing.op_tester import OpTester + +FAKE_GEOMETRY = Point(-92.99900, 42.03580).buffer(0.1, cap_style=3) +FAKE_TIME_RANGE = ( + datetime(year=2019, month=1, day=1, tzinfo=timezone.utc), + datetime(year=2019, month=12, day=31, tzinfo=timezone.utc), +) + +CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "download_climatology_lab.yaml" +) + + +@patch("vibe_core.file_downloader.download_file") +def test_op(_: MagicMock): + input_product = ClimatologyLabProduct( + id="fake_product", + time_range=FAKE_TIME_RANGE, + geometry=mapping(FAKE_GEOMETRY), # type: ignore + assets=[], + url="fake_href", + variable="fake_variable", + ) + + op_tester = OpTester(CONFIG_PATH) + output_data = op_tester.run(**{"input_product": input_product}) + + # Get op result + output_name = "downloaded_product" + assert output_name in output_data + output_raster = output_data[output_name] + assert isinstance(output_raster, ClimatologyLabProduct) + assert len(output_raster.assets) == 1 diff --git a/ops/download_dem/download_dem.py b/ops/download_dem/download_dem.py new file mode 100644 index 00000000..64905300 --- /dev/null +++ b/ops/download_dem/download_dem.py @@ -0,0 +1,68 @@ +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, cast + +import planetary_computer as pc + +from vibe_core.data import AssetVibe, DemProduct, DemRaster, gen_guid, gen_hash_id +from vibe_lib.planetary_computer import validate_dem_provider +from vibe_lib.raster import RGBA, interpolated_cmap_from_colors, json_to_asset + +ELEVATION_CMAP_INTERVALS: List[float] = [0.0, 4000.0] + +ELEVATION_CMAP_COLORS: List[RGBA] = [ + RGBA(0, 0, 0, 255), + RGBA(255, 255, 255, 255), +] + + +class CallbackBuilder: + def __init__(self, api_key: str): + self.tmp_dir = TemporaryDirectory() + self.api_key = api_key + + def __call__(self): + def op(input_product: DemProduct) -> Dict[str, DemRaster]: + pc.set_subscription_key(self.api_key) + collection = validate_dem_provider( + input_product.provider.upper(), input_product.resolution + ) + item = collection.query_by_id(input_product.tile_id) + assets = collection.download_item( + item, os.path.join(self.tmp_dir.name, input_product.id) + ) + assets = [ + AssetVibe(reference=a, type=cast(str, mimetypes.guess_type(a)[0]), id=gen_guid()) + for a in assets + ] + vis_dict: Dict[str, Any] = { + "bands": [0], + "colormap": interpolated_cmap_from_colors( + ELEVATION_CMAP_COLORS, ELEVATION_CMAP_INTERVALS + ), + "range": (0, 4000), + } + assets.append(json_to_asset(vis_dict, self.tmp_dir.name)) + + downloaded_product = DemRaster( + id=gen_hash_id( + f"{input_product.id}_download_dem_product", + input_product.geometry, + input_product.time_range, + ), + time_range=input_product.time_range, + geometry=input_product.geometry, + assets=assets, + bands={"elevation": 0}, + tile_id=input_product.tile_id, + resolution=input_product.resolution, + provider=input_product.provider, + ) + + return {"downloaded_product": downloaded_product} + + return op + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_dem/download_dem.yaml b/ops/download_dem/download_dem.yaml new file mode 100644 index 00000000..ee9378c8 --- /dev/null +++ b/ops/download_dem/download_dem.yaml @@ -0,0 +1,12 @@ +name: download_dem +inputs: + input_product: DemProduct +output: + downloaded_product: DemRaster +parameters: + api_key: "" +entrypoint: + file: download_dem.py + callback_builder: CallbackBuilder +description: + short_description: Downloads digital elevation map raster given a DemProduct. \ No newline at end of file diff --git a/ops/download_dem/test_download_dem.py b/ops/download_dem/test_download_dem.py new file mode 100644 index 00000000..64893a18 --- /dev/null +++ b/ops/download_dem/test_download_dem.py @@ -0,0 +1,51 @@ +import os +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +from shapely.geometry import Polygon, box, mapping + +from vibe_core.data import DemProduct +from vibe_core.data.rasters import DemRaster +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.planetary_computer import USGS3DEPCollection + +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_dem.yaml") + + +@patch( + "vibe_lib.planetary_computer.get_available_collections", + return_value=[USGS3DEPCollection.collection], +) +@patch.object(USGS3DEPCollection, "query_by_id") +@patch( + "vibe_lib.planetary_computer.USGS3DEPCollection.download_item", return_value=["/tmp/test.tif"] +) +def test_op(_: MagicMock, __: MagicMock, ___: MagicMock): + latitude = 44.0005556 + longitude = -97.0005556 + buffer = 0.1 + bbox = [longitude - buffer, latitude - buffer, longitude + buffer, latitude + buffer] + polygon: Polygon = box(*bbox, ccw=True) + start_date = datetime(year=2021, month=2, day=1, tzinfo=timezone.utc) + end_date = datetime(year=2021, month=2, day=11, tzinfo=timezone.utc) + + output = DemProduct( + id=str("n44w098-13"), + time_range=( + start_date, + end_date, + ), + geometry=mapping(polygon), + assets=[], + tile_id=str("n44w098-13"), + resolution=10, + provider=str("USGS3DEP"), + ) + + output_data = OpTester(CONFIG_PATH).run(input_product=output) + + # Get op result + output_name = "downloaded_product" + assert output_name in output_data + output_product = output_data[output_name] + assert isinstance(output_product, DemRaster) diff --git a/ops/download_era5/download_era5.py b/ops/download_era5/download_era5.py new file mode 100644 index 00000000..7460e11c --- /dev/null +++ b/ops/download_era5/download_era5.py @@ -0,0 +1,72 @@ +import logging +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Dict, Optional, cast + +import cdsapi +import fsspec +import planetary_computer as pc +import xarray as xr + +from vibe_core.data import AssetVibe, Era5Product, gen_guid, gen_hash_id +from vibe_lib.planetary_computer import Era5Collection + +LOGGER = logging.getLogger(__name__) + + +class CallbackBuilder: + def __init__(self, api_key: str): + self.tmp_dir = TemporaryDirectory() + self.api_key = api_key + + def __call__(self): + def download_product( + era5_product: Era5Product, + ) -> Dict[str, Optional[Era5Product]]: + if era5_product.item_id != "": + pc.set_subscription_key(self.api_key) + collection = Era5Collection() + item = collection.query_by_id(era5_product.item_id) + + # Only downloading the asset corresponding to the requested variable. + # In addition, the requested asset is a zarr, which is a directory structure, + # so it not possible to use download_asset. + signed_item = pc.sign(item) + asset = signed_item.assets[era5_product.var] + ds = xr.open_dataset(asset.href, **asset.extra_fields["xarray:open_kwargs"]) + else: + if self.api_key == "": + raise ValueError( + "api_key not supplied for CDS (registration " + "in https://cds.climate.copernicus.eu/user/register)" + ) + if len(era5_product.cds_request) != 1: + raise ValueError(f"Invalid number of CDS requests {era5_product.cds_request}") + dataset, request = next((k, v) for k, v in era5_product.cds_request.items()) + c = cdsapi.Client(url="https://cds.climate.copernicus.eu/api/v2", key=self.api_key) + r = c.retrieve(dataset, request) + if r is None: + raise ValueError(f"CDS request {era5_product.cds_request} returned None") + with fsspec.open(r.location) as f: + ds = xr.open_dataset(f, engine="scipy") # type: ignore + + path = os.path.join(self.tmp_dir.name, f"{era5_product.id}.nc") + ds.to_netcdf(path) + vibe_asset = AssetVibe( + reference=path, type=cast(str, mimetypes.guess_type(path)[0]), id=gen_guid() + ) + downloaded_product = Era5Product.clone_from( + era5_product, + id=gen_hash_id( + f"{era5_product.id}_downloaded", era5_product.geometry, era5_product.time_range + ), + assets=[vibe_asset], + ) + + return {"downloaded_product": downloaded_product} + + return download_product + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_era5/download_era5.yaml b/ops/download_era5/download_era5.yaml new file mode 100644 index 00000000..ef72a78a --- /dev/null +++ b/ops/download_era5/download_era5.yaml @@ -0,0 +1,12 @@ +name: download_era5 +inputs: + era5_product: Era5Product +output: + downloaded_product: Era5Product +parameters: + api_key: "" +entrypoint: + file: download_era5.py + callback_builder: CallbackBuilder +description: + short_description: Downloads requested property from ERA5 products. \ No newline at end of file diff --git a/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.py b/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.py new file mode 100644 index 00000000..230b0fc7 --- /dev/null +++ b/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.py @@ -0,0 +1,51 @@ +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Dict, cast + +import planetary_computer as pc + +from vibe_core.data import AssetVibe, CategoricalRaster, gen_guid, gen_hash_id +from vibe_core.data.products import EsriLandUseLandCoverProduct +from vibe_lib.planetary_computer import EsriLandUseLandCoverCollection +from vibe_lib.raster import json_to_asset + + +class CallbackBuilder: + def __init__(self, api_key: str): + self.tmp_dir = TemporaryDirectory() + self.api_key = api_key + + def __call__(self): + def op(input_product: EsriLandUseLandCoverProduct) -> Dict[str, CategoricalRaster]: + pc.set_subscription_key(self.api_key) + collection = EsriLandUseLandCoverCollection() + item = collection.query_by_id(input_product.id) + assets = collection.download_item( + item, os.path.join(self.tmp_dir.name, input_product.id) + ) + vibe_assets = [ + AssetVibe(reference=a, type=cast(str, mimetypes.guess_type(a)[0]), id=gen_guid()) + for a in assets + ] + vis_asset = json_to_asset({"bands": list(range(1))}, self.tmp_dir.name) + vibe_assets.append(vis_asset) + downloaded_product = CategoricalRaster( + id=gen_hash_id( + f"{input_product.id}_download_esri_landuse_landcover_product", + input_product.geometry, + input_product.time_range, + ), + time_range=input_product.time_range, + geometry=input_product.geometry, + assets=vibe_assets, + bands={"data": 0}, + categories=EsriLandUseLandCoverCollection.categories, + ) + + return {"downloaded_product": downloaded_product} + + return op + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.yaml b/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.yaml new file mode 100644 index 00000000..d7439c1f --- /dev/null +++ b/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.yaml @@ -0,0 +1,12 @@ +name: download_esri_landuse_landcover +inputs: + input_product: EsriLandUseLandCoverProduct +output: + downloaded_product: CategoricalRaster +parameters: + api_key: "" +entrypoint: + file: download_esri_landuse_landcover.py + callback_builder: CallbackBuilder +description: + short_description: Downloads ESRI 10m Land Use/Land Cover (9-class) raster from EsriLandUseLandCoverProduct. \ No newline at end of file diff --git a/ops/download_esri_landuse_landcover/test_download_esri_landuse_landcover.py b/ops/download_esri_landuse_landcover/test_download_esri_landuse_landcover.py new file mode 100644 index 00000000..950cad34 --- /dev/null +++ b/ops/download_esri_landuse_landcover/test_download_esri_landuse_landcover.py @@ -0,0 +1,52 @@ +import os +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +from shapely.geometry import Polygon, box, mapping + +from vibe_core.data import CategoricalRaster +from vibe_core.data.core_types import DataVibe +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.planetary_computer import EsriLandUseLandCoverCollection + +CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "download_esri_landuse_landcover.yaml" +) + + +@patch( + "vibe_lib.planetary_computer.get_available_collections", + return_value=[EsriLandUseLandCoverCollection.collection], +) +@patch.object(EsriLandUseLandCoverCollection, "query_by_id") +@patch.object( + EsriLandUseLandCoverCollection, + "download_item", + return_value=["/tmp/test_esri_landuse_landcover.tif"], +) +def test_op(_: MagicMock, __: MagicMock, ___: MagicMock): + latitude = 42.21422 + longitude = -93.22890 + buffer = 0.001 + bbox = [longitude - buffer, latitude - buffer, longitude + buffer, latitude + buffer] + polygon: Polygon = box(*bbox, ccw=True) + start_date = datetime(year=2017, month=1, day=1, tzinfo=timezone.utc) + end_date = datetime(year=2018, month=1, day=1, tzinfo=timezone.utc) + + input: DataVibe = DataVibe( + id=str("47P-2017"), + time_range=( + start_date, + end_date, + ), + geometry=mapping(polygon), # type: ignore + assets=[], + ) + + output_data = OpTester(CONFIG_PATH).run(**{"input_product": input}) + + # Get op result + output_name = "downloaded_product" + assert output_name in output_data + output_product = output_data[output_name] + assert isinstance(output_product, CategoricalRaster) diff --git a/ops/download_from_ref/download_from_ref.py b/ops/download_from_ref/download_from_ref.py new file mode 100644 index 00000000..b9af2ac9 --- /dev/null +++ b/ops/download_from_ref/download_from_ref.py @@ -0,0 +1,86 @@ +import hashlib +import mimetypes +import os +import pathlib +import shutil +from dataclasses import fields +from tempfile import TemporaryDirectory +from typing import Any, Dict, Type, cast, get_origin + +from vibe_core.data import ( + AssetVibe, + DataVibe, + ExternalReference, + data_registry, + gen_hash_id, +) +from vibe_core.file_downloader import download_file +from vibe_core.uri import is_local, local_uri_to_path, uri_to_filename + +CHUNK_SIZE_BYTES = 1024 * 1024 + + +def hash_file(filepath: str, chunk_size: int = CHUNK_SIZE_BYTES) -> str: + h = hashlib.sha256() + with open(filepath, "rb") as f: + while True: + b = f.read(chunk_size) + if not b: + break + h.update(b) + return h.hexdigest() + + +def get_empty_type(t: Any): + o = get_origin(t) + if o is not None: + return o() + return t() + + +def get_empty_fields(data_type: Type[DataVibe]) -> Dict[str, Any]: + base_fields = [f for f in fields(DataVibe) if f.init] + init_fields = [f for f in fields(data_type) if f.init and f not in base_fields] + return {f.name: get_empty_type(f.type) for f in init_fields} + + +def add_mime_type(extension: str): + if extension == ".geojson": + mimetypes.add_type("application/json", ".geojson") + + +class CallbackBuilder: + def __init__(self, out_type: str): + self.tmp_dir = TemporaryDirectory() + self.out_type = cast(Type[DataVibe], data_registry.retrieve(out_type)) + + def __call__(self): + def callback(input_ref: ExternalReference) -> Dict[str, DataVibe]: + # Download the file + out_path = os.path.join(self.tmp_dir.name, uri_to_filename(input_ref.url)) + if is_local(input_ref.url): + shutil.copy(local_uri_to_path(input_ref.url), out_path) + else: + download_file(input_ref.url, out_path) + + file_extension = pathlib.Path(out_path).suffix + if file_extension not in mimetypes.types_map.keys(): + add_mime_type(file_extension) + + # Create asset and Raster + asset_id = hash_file(out_path) + asset = AssetVibe( + reference=out_path, type=mimetypes.guess_type(out_path)[0], id=asset_id + ) + out = self.out_type.clone_from( + input_ref, + id=gen_hash_id(asset_id, input_ref.geometry, input_ref.time_range), + assets=[asset], + **get_empty_fields(self.out_type), + ) + return {"downloaded": out} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_from_ref/download_geometry_from_ref.yaml b/ops/download_from_ref/download_geometry_from_ref.yaml new file mode 100644 index 00000000..019e7c1a --- /dev/null +++ b/ops/download_from_ref/download_geometry_from_ref.yaml @@ -0,0 +1,12 @@ +name: download_geometry_from_ref +inputs: + input_ref: ExternalReference +output: + downloaded: GeometryCollection +parameters: + out_type: GeometryCollection +entrypoint: + file: download_from_ref.py + callback_builder: CallbackBuilder +description: + short_description: Downloads geometries provided in the reference and generates a GeometryCollection. \ No newline at end of file diff --git a/ops/download_from_ref/download_raster_from_ref.yaml b/ops/download_from_ref/download_raster_from_ref.yaml new file mode 100644 index 00000000..1c08000b --- /dev/null +++ b/ops/download_from_ref/download_raster_from_ref.yaml @@ -0,0 +1,12 @@ +name: download_raster_from_ref +inputs: + input_ref: ExternalReference +output: + downloaded: Raster +parameters: + out_type: Raster +entrypoint: + file: download_from_ref.py + callback_builder: CallbackBuilder +description: + short_description: Downloads the raster from the input reference's url. \ No newline at end of file diff --git a/ops/download_from_smb/download_rasters_from_smb.py b/ops/download_from_smb/download_rasters_from_smb.py new file mode 100644 index 00000000..4a0fe263 --- /dev/null +++ b/ops/download_from_smb/download_rasters_from_smb.py @@ -0,0 +1,144 @@ +import mimetypes +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Dict, List + +from smb.SMBConnection import SMBConnection + +from vibe_core.data import AssetVibe, DataVibe, Raster, gen_guid, gen_hash_id + + +def download_all_files( + server_name: str, + server_ip: str, + server_port: int, + username: str, + password: str, + share_name: str, + directory_path: str, + output_dir: Path, +) -> List[AssetVibe]: + """Download all files under directory_path on the SMB share and return a list of AssetVibes.""" + # Establish a connection with the server + conn = SMBConnection( + username, + password, + "FarmVibes_SMB_Downloader", + server_name, + use_ntlm_v2=True, + is_direct_tcp=True, + ) + conn.connect(server_ip, server_port) + + # Collect all files in the directory as assets + asset_list = [] + attributes = conn.getAttributes(share_name, directory_path) + + # Convert path to unix style + directory_path = directory_path.replace("\\", "/") + path = Path(directory_path) + if attributes.isDirectory: + crawl_directory(conn, share_name, path, asset_list, output_dir) + else: + download_asset(conn, share_name, path, asset_list, output_dir) + return asset_list + + +def download_asset( + conn: SMBConnection, + share_name: str, + filepath: Path, + asset_list: List[AssetVibe], + output_dir: Path, +): + # Compute the output path + if filepath.is_absolute(): + filepath = filepath.relative_to("/") + output_path = output_dir.joinpath(filepath) + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Create an Asset type from the file + with open(output_path, "wb") as asset_file: + conn.retrieveFile(share_name, str(filepath), asset_file) + asset = AssetVibe( + reference=asset_file.name, + type=mimetypes.guess_type(asset_file.name)[0], + id=gen_guid(), + ) + asset_list.append(asset) + + +def crawl_directory( + conn: SMBConnection, + share_name: str, + dir_path: Path, + asset_list: List[AssetVibe], + output_dir: Path, +): + """Recursively search through the file system starting at directory + and download all files.""" + files = conn.listPath(share_name, str(dir_path)) + for file in files: + if file.filename not in [".", ".."]: + filepath = dir_path.joinpath(file.filename) + if file.isDirectory: + # Open subfolder + crawl_directory(conn, share_name, filepath, asset_list, output_dir) + else: + # Download the file if it is an image + mimetype = mimetypes.guess_type(str(filepath))[0] + if mimetype and mimetype.startswith("image"): + download_asset(conn, share_name, filepath, asset_list, output_dir) + + +class CallbackBuilder: + def __init__( + self, + server_name: str, + server_ip: str, + server_port: int, + username: str, + password: str, + share_name: str, + directory_path: str, + bands: List[str], + ): + self.server_name = server_name + self.server_ip = server_ip + self.server_port = server_port + self.username = username + self.password = password + self.share_name = share_name + self.directory_path = directory_path + self.bands = bands + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def download(user_input: DataVibe) -> Dict[str, List[Raster]]: + raster_assets = download_all_files( + self.server_name, + self.server_ip, + self.server_port, + self.username, + self.password, + self.share_name, + self.directory_path, + Path(self.tmp_dir.name), + ) + bands = {name: index for index, name in enumerate(self.bands)} + return { + "rasters": [ + Raster.clone_from( + user_input, + id=gen_hash_id(asset.id, user_input.geometry, user_input.time_range), + assets=[asset], + bands=bands, + ) + for asset in raster_assets + ] + } + + return download + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_from_smb/download_rasters_from_smb.yaml b/ops/download_from_smb/download_rasters_from_smb.yaml new file mode 100644 index 00000000..6062db1d --- /dev/null +++ b/ops/download_from_smb/download_rasters_from_smb.yaml @@ -0,0 +1,33 @@ +name: download_rasters_from_smb +inputs: + user_input: DataVibe +output: + rasters: List[Raster] +parameters: + server_name: + server_ip: "@SECRET(eywa-secrets, smb-server-ip)" + server_port: 445 + username: "@SECRET(eywa-secrets, smb-username)" + password: "@SECRET(eywa-secrets, smb-password)" + share_name: + directory_path: "/" + bands: ["red", "green", "blue"] +entrypoint: + file: download_rasters_from_smb.py + callback_builder: CallbackBuilder +dependecies: + parameters: + - server_name + - share_name +description: + short_description: + Downloads rasters from an SMB share. + parameters: + server_name: The name of the SMB server + server_ip: The IP address of the SMB server + server_port: The port to connect to on the SMB server + username: Username used to connect to server + password: Password to access server + share_name: Name of file share + directory_path: Path to directory containing rasters + bands: Ordered list of bands within the rasters diff --git a/ops/download_gedi_product/download_gedi_product.py b/ops/download_gedi_product/download_gedi_product.py new file mode 100644 index 00000000..1359a652 --- /dev/null +++ b/ops/download_gedi_product/download_gedi_product.py @@ -0,0 +1,39 @@ +import logging +import os +from tempfile import TemporaryDirectory +from typing import Dict + +from vibe_core.data import AssetVibe, GEDIProduct, gen_guid +from vibe_core.file_downloader import download_file +from vibe_lib.earthdata import EarthDataAPI + +LOGGER = logging.getLogger(__name__) + + +class CallbackBuilder: + def __init__(self, token: str): + self.token = token + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def callback(gedi_product: GEDIProduct) -> Dict[str, GEDIProduct]: + api = EarthDataAPI(gedi_product.processing_level) + LOGGER.info(f"Querying EarthData API for product {gedi_product.product_name}") + items = api.query(id=gedi_product.product_name) + if len(items) != 1: + raise RuntimeError( + f"Query for GEDI product {gedi_product.product_name} " + "returned {len(items)} items, expected one item" + ) + url = items[0]["links"][0]["href"] + asset_guid = gen_guid() + out_path = os.path.join(self.tmp_dir.name, f"{asset_guid}") + h5_path = f"{out_path}.h5" + headers = {"Authorization": f"Bearer {self.token}", "Content-Type": "application/json"} + LOGGER.info(f"Downloading data from {url}") + download_file(url, h5_path, headers=headers) + asset = AssetVibe(reference=h5_path, type="application/x-hdf5", id=asset_guid) + dl_product = GEDIProduct.clone_from(gedi_product, id=gen_guid(), assets=[asset]) + return {"downloaded_product": dl_product} + + return callback diff --git a/ops/download_gedi_product/download_gedi_product.yaml b/ops/download_gedi_product/download_gedi_product.yaml new file mode 100644 index 00000000..ffe893f8 --- /dev/null +++ b/ops/download_gedi_product/download_gedi_product.yaml @@ -0,0 +1,12 @@ +name: download_gedi_product +inputs: + gedi_product: GEDIProduct +output: + downloaded_product: GEDIProduct +parameters: + token: "@SECRET(eywa-secrets, earthdata-token)" +entrypoint: + file: download_gedi_product.py + callback_builder: CallbackBuilder +description: + short_description: Downloads GEDI products. \ No newline at end of file diff --git a/ops/download_gedi_product/test_download_gedi_product.py b/ops/download_gedi_product/test_download_gedi_product.py new file mode 100644 index 00000000..69c070d9 --- /dev/null +++ b/ops/download_gedi_product/test_download_gedi_product.py @@ -0,0 +1,73 @@ +import os +from datetime import datetime +from typing import Any, cast +from unittest.mock import Mock, patch + +import h5py +import numpy as np +from shapely import geometry as shpg + +from vibe_core import file_downloader +from vibe_core.data import GEDIProduct +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.earthdata import EarthDataAPI + +HERE = os.path.dirname(os.path.abspath(__file__)) +CONFIG_PATH = os.path.join(HERE, "download_gedi_product.yaml") + +NUM_POINTS = 10 +BEAMS = [ + "BEAM0000", + "BEAM0001", + "BEAM0010", + "BEAM0011", + "BEAM0101", + "BEAM0110", + "BEAM1000", + "BEAM1011", +] +L2B = "GEDI02_B.002" + + +def fake_download(_: str, h5_path: str, **kwargs: Any): + beam_value = 0 + with h5py.File(h5_path, "w") as f: + for b in BEAMS: + beam_value = int(b.replace("BEAM", ""), 2) + f.create_dataset(f"{b}/geolocation/lon_lowestmode", data=np.arange(NUM_POINTS)) + f.create_dataset( + f"{b}/geolocation/lat_lowestmode", data=np.arange(NUM_POINTS) + NUM_POINTS + ) + f.create_dataset(f"{b}/beam", data=beam_value * np.ones(NUM_POINTS)) + f.create_dataset(f"{b}/rh100", data=np.linspace(0, 1, NUM_POINTS) + beam_value) + + +@patch.object(file_downloader, "download_file") +@patch.object(EarthDataAPI, "query") +def test_op(query: Mock, download: Mock): + query.return_value = [{"links": [{"href": "mock_link"}]}] + download.side_effect = fake_download + now = datetime.now() + geom = shpg.box(0, 0, 1, 1) + x = GEDIProduct( + id="1", + time_range=(now, now), + geometry=shpg.mapping(geom), + assets=[], + product_name="fake_product", + start_orbit=0, + stop_orbit=0, + processing_level="whatever", + ) + op_tester = OpTester(CONFIG_PATH) + test_token = "test-token" + op_tester.update_parameters({"token": test_token}) + out = op_tester.run(gedi_product=x) + query.assert_called_once_with(id=x.product_name) + download.assert_called_once() + # Make sure we used the token + assert download.call_args.kwargs["headers"]["Authorization"] == f"Bearer {test_token}" + assert "downloaded_product" in out + dl_product = cast(GEDIProduct, out["downloaded_product"]) + assert dl_product.geometry == x.geometry + assert dl_product.time_range == x.time_range diff --git a/ops/download_glad_data/download_glad.py b/ops/download_glad_data/download_glad.py new file mode 100644 index 00000000..3cd2f2b0 --- /dev/null +++ b/ops/download_glad_data/download_glad.py @@ -0,0 +1,37 @@ +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Dict + +from vibe_core.data import AssetVibe, CategoricalRaster, gen_hash_id +from vibe_core.data.core_types import gen_guid +from vibe_core.data.products import GLADProduct +from vibe_core.file_downloader import download_file + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def download_product(glad_product: GLADProduct) -> Dict[str, CategoricalRaster]: + fname = f"{glad_product.tile_name}_{glad_product.time_range[0].year}.tif" + fpath = os.path.join(self.tmp_dir.name, fname) + download_file(glad_product.url, fpath) + + asset = AssetVibe(reference=fpath, type=mimetypes.types_map[".tif"], id=gen_guid()) + + downloaded_product = CategoricalRaster.clone_from( + glad_product, + id=gen_hash_id(fname, glad_product.geometry, glad_product.time_range), + assets=[asset], + bands={"forest_extent": 0}, + categories=["Non-Forest", "Forest"], + ) + + return {"downloaded_product": downloaded_product} + + return download_product + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_glad_data/download_glad.yaml b/ops/download_glad_data/download_glad.yaml new file mode 100644 index 00000000..502c90bc --- /dev/null +++ b/ops/download_glad_data/download_glad.yaml @@ -0,0 +1,11 @@ +name: download_glad +inputs: + glad_product: GLADProduct +output: + downloaded_product: Raster +parameters: +entrypoint: + file: download_glad.py + callback_builder: CallbackBuilder +description: + short_description: Downloads a GLADProduct \ No newline at end of file diff --git a/ops/download_glad_data/test_download_glad_product.py b/ops/download_glad_data/test_download_glad_product.py new file mode 100644 index 00000000..34c1dfc8 --- /dev/null +++ b/ops/download_glad_data/test_download_glad_product.py @@ -0,0 +1,38 @@ +import os +from datetime import datetime +from typing import cast +from unittest.mock import Mock, patch + +import pytest +from shapely import geometry as shpg + +from vibe_core import file_downloader +from vibe_core.data import CategoricalRaster, GLADProduct +from vibe_dev.testing.op_tester import OpTester + + +@pytest.fixture +def glad_product(): + return GLADProduct( + id="test_id", + geometry=shpg.mapping(shpg.box(-115.0, 45.0, -105.0, 55.0)), + time_range=(datetime(2020, 1, 1), datetime(2020, 1, 2)), + url="https://test.com/test.tif", + assets=[], + ) + + +@patch.object(file_downloader, "download_file") +def test_download_glad_product(download: Mock, glad_product: GLADProduct): + config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_glad.yaml") + + op_tester = OpTester(config_path) + out = op_tester.run(glad_product=glad_product) + assert out + assert "downloaded_product" in out + downloaded_product: CategoricalRaster = cast(CategoricalRaster, out["downloaded_product"]) + assert len(downloaded_product.assets) > 0 + asset = downloaded_product.assets[0] + assert asset.path_or_url.endswith( + f"{glad_product.tile_name}_{glad_product.time_range[0].year}.tif" + ) diff --git a/ops/download_gnatsgo/download_gnatsgo.yaml b/ops/download_gnatsgo/download_gnatsgo.yaml new file mode 100644 index 00000000..3c8b95a0 --- /dev/null +++ b/ops/download_gnatsgo/download_gnatsgo.yaml @@ -0,0 +1,60 @@ +name: download_gnatsgo +inputs: + gnatsgo_product: GNATSGOProduct +output: + downloaded_raster: GNATSGORaster +parameters: + api_key: "" + variable: +dependencies: + parameters: + - variable +entrypoint: + file: download_gnatsgo_raster.py + callback_builder: CallbackBuilder +description: + short_description: Downloads the raster asset for 'variable' given a GNATSGO product. + parameters: + api_key: Optional Planetary Computer API key. + variable: >- + Options are: + aws{DEPTH} - Available water storage estimate (AWS) for the DEPTH zone. + soc{DEPTH} - Soil organic carbon stock estimate (SOC) for the DEPTH zone. + tk{DEPTH}a - Thickness of soil components used in the DEPTH zone for the AWS calculation. + tk{DEPTH}s - Thickness of soil components used in the DEPTH zone for the SOC calculation. + mukey - Map unit key, a unique identifier of a record for matching with gNATSGO tables. + droughty - Drought vulnerability estimate. + nccpi3all - National Commodity Crop Productivity Index that has the highest value among Corn + and Soybeans, Small Grains, or Cotton for major earthy components. + nccpi3corn - National Commodity Crop Productivity Index for Corn for major earthy + components. + nccpi3cot - National Commodity Crop Productivity Index for Cotton for major earthy + components. + nccpi3sg - National Commodity Crop Productivity Index for Small Grains for major earthy + components. + nccpi3soy - National Commodity Crop Productivity Index for Soy for major earthy components. + pctearthmc - National Commodity Crop Productivity Index map unit percent earthy is the map + unit summed comppct_r for major earthy components. + pwsl1pomu - Potential Wetland Soil Landscapes (PWSL). + rootznaws - Root zone (commodity crop) available water storage estimate (RZAWS). + rootznemc - Root zone depth is the depth within the soil profile that commodity crop (cc) + roots can effectively extract water and nutrients for growth. + musumcpct - Sum of the comppct_r (SSURGO component table) values for all listed components + in the map unit. + musumcpcta - Sum of the comppct_r (SSURGO component table) values used in the available + water storage calculation for the map unit. + musumcpcts - Sum of the comppct_r (SSURGO component table) values used in the soil organic + carbon calculation for the map unit. gNATSGO has properties available for multiple soil + depths. You may exchange DEPTH in the variable names above for any of the following (all + measured in cm): + 0_5 + 0_20 + 0_30 + 5_20 + 0_100 + 0_150 + 0_999 + 20_50 + 50_100 + 100_150 + 150_999 diff --git a/ops/download_gnatsgo/download_gnatsgo_raster.py b/ops/download_gnatsgo/download_gnatsgo_raster.py new file mode 100644 index 00000000..4663e874 --- /dev/null +++ b/ops/download_gnatsgo/download_gnatsgo_raster.py @@ -0,0 +1,68 @@ +import os +from tempfile import TemporaryDirectory +from typing import Dict, Optional + +import planetary_computer as pc + +from vibe_core.data import GNATSGOProduct, gen_hash_id +from vibe_core.data.core_types import AssetVibe, gen_guid +from vibe_core.data.rasters import GNATSGORaster +from vibe_lib.planetary_computer import GNATSGOCollection +from vibe_lib.raster import FLOAT_COMPRESSION_KWARGS, compress_raster + + +def download_asset(input_product: GNATSGOProduct, variable: str, dir_path: str) -> AssetVibe: + """ + Downloads the raster asset of the selected variable and compresses it + """ + collection = GNATSGOCollection() + item = collection.query_by_id(input_product.id) + + uncompressed_asset_path = collection.download_asset(item.assets[variable], dir_path) + + asset_id = gen_guid() + asset_path = os.path.join(dir_path, f"{asset_id}.tif") + + compress_raster(uncompressed_asset_path, asset_path, **FLOAT_COMPRESSION_KWARGS) + + return AssetVibe(reference=asset_path, type="image/tiff", id=asset_id) + + +class CallbackBuilder: + def __init__(self, api_key: str, variable: str): + self.tmp_dir = TemporaryDirectory() + self.api_key = api_key + + if variable not in GNATSGOCollection.asset_keys: + raise ValueError( + f"Requested variable '{variable}' not valid. " + f"Valid values are {', '.join(GNATSGOCollection.asset_keys)}" + ) + self.variable = variable + + def __call__(self): + def download_gnatsgo_raster( + gnatsgo_product: GNATSGOProduct, + ) -> Dict[str, Optional[GNATSGORaster]]: + pc.set_subscription_key(self.api_key) + + asset = download_asset(gnatsgo_product, self.variable, self.tmp_dir.name) + + downloaded_raster = GNATSGORaster.clone_from( + gnatsgo_product, + id=gen_hash_id( + f"{gnatsgo_product.id}_{self.variable}_downloaded_gnatsgo_product", + gnatsgo_product.geometry, + gnatsgo_product.time_range, + ), + assets=[asset], + bands={self.variable: 0}, + variable=self.variable, + ) + + return {"downloaded_raster": downloaded_raster} + + return download_gnatsgo_raster + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_gnatsgo/test_download_gnatsgo.py b/ops/download_gnatsgo/test_download_gnatsgo.py new file mode 100644 index 00000000..5e621e7f --- /dev/null +++ b/ops/download_gnatsgo/test_download_gnatsgo.py @@ -0,0 +1,71 @@ +import os +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +import pytest +from pystac import Asset, Item +from shapely.geometry import Point, mapping + +from vibe_core.data import GNATSGOProduct +from vibe_core.data.rasters import GNATSGORaster +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.planetary_computer import GNATSGOCollection + +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_gnatsgo.yaml") +INVALID_VARIABLE = "🙅" +FAKE_DATE = datetime(year=2020, month=7, day=1, tzinfo=timezone.utc) + + +def fake_item(): + assets = {f"{var}": Asset(href=f"fake_href_{var}") for var in GNATSGOCollection.asset_keys} + return Item( + id="fake_id", # type: ignore + geometry=None, + bbox=None, + datetime=None, + properties={ + "start_datetime": FAKE_DATE.isoformat() + "Z", + "end_datetime": FAKE_DATE.isoformat() + "Z", + }, + assets=assets, + ) + + +@pytest.mark.parametrize("variable", GNATSGOCollection.asset_keys) +@patch("vibe_lib.raster.compress_raster") +@patch("vibe_lib.planetary_computer.get_available_collections", return_value=["gnatsgo-rasters"]) +@patch.object(GNATSGOCollection, "download_asset") +@patch.object(GNATSGOCollection, "query_by_id") +def test_op(query: MagicMock, download: MagicMock, _: MagicMock, __: MagicMock, variable: str): + queried_item = fake_item() + query.return_value = queried_item + download.return_value = "/tmp/test.tif" + + polygon = Point(-92.99900, 42.03580).buffer(0.1, cap_style=3) + + input_product = GNATSGOProduct( + id="conus_101445_2236065_265285_2072225", + time_range=(FAKE_DATE, FAKE_DATE), + geometry=mapping(polygon), # type: ignore + assets=[], + ) + + op_tester = OpTester(CONFIG_PATH) + op_tester.update_parameters({"variable": variable}) + output_data = op_tester.run(**{"gnatsgo_product": input_product}) + + # Get op result + output_name = "downloaded_raster" + assert output_name in output_data + output_raster = output_data[output_name] + assert isinstance(output_raster, GNATSGORaster) + assert output_raster.variable == variable + assert len(output_raster.bands) == 1 + assert download.call_args.args[0] == queried_item.assets[variable] + + +def test_op_fails_invalid_variable(): + op_tester = OpTester(CONFIG_PATH) + op_tester.update_parameters({"variable": INVALID_VARIABLE}) + with pytest.raises(ValueError): + op_tester.run(input_item=[]) diff --git a/ops/download_hansen/download_hansen.py b/ops/download_hansen/download_hansen.py new file mode 100644 index 00000000..9a6e43b0 --- /dev/null +++ b/ops/download_hansen/download_hansen.py @@ -0,0 +1,42 @@ +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Dict + +from vibe_core.data import AssetVibe, HansenProduct +from vibe_core.data.core_types import gen_guid, gen_hash_id +from vibe_core.data.rasters import Raster +from vibe_core.file_downloader import download_file + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def download_product(hansen_product: HansenProduct) -> Dict[str, Raster]: + fname = ( + f"hansen_{hansen_product.layer_name}_{hansen_product.tile_name}_" + f"{hansen_product.last_year}.tif" + ) + fpath = os.path.join(self.tmp_dir.name, fname) + download_file(hansen_product.asset_url, fpath) + + asset = AssetVibe(reference=fpath, type=mimetypes.types_map[".tif"], id=gen_guid()) + downloaded_product = Raster.clone_from( + hansen_product, + id=gen_hash_id( + f"{hansen_product.id}_downloaded_hansen_product", + hansen_product.geometry, + hansen_product.time_range, + ), + assets=[asset], + bands={hansen_product.layer_name: 0}, + ) + + return {"raster": downloaded_product} + + return download_product + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_hansen/download_hansen.yaml b/ops/download_hansen/download_hansen.yaml new file mode 100644 index 00000000..5b106f26 --- /dev/null +++ b/ops/download_hansen/download_hansen.yaml @@ -0,0 +1,21 @@ +name: download_hansen +inputs: + hansen_product: HansenProduct +output: + raster: Raster +parameters: +entrypoint: + file: download_hansen.py + callback_builder: CallbackBuilder +description: + short_description: Downloads Global Forest Change (Hansen) data. + long_description: + The op will download a Global Forest Change (Hansen) product and return it + as a raster. The dataset is available at 30m resolution and is updated + annually. The data includes information on forest cover, loss, and gain. + Full dataset details can be found at + https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/download.html. + sources: + hansen_product: Product with the tile metadata to be downloaded. + sinks: + raster: Downloaded Global Forest Change (Hansen) data as a raster. diff --git a/ops/download_hansen/test_download_hansen_product.py b/ops/download_hansen/test_download_hansen_product.py new file mode 100644 index 00000000..8ced9f4f --- /dev/null +++ b/ops/download_hansen/test_download_hansen_product.py @@ -0,0 +1,48 @@ +import os +from datetime import datetime +from typing import List, cast +from unittest.mock import Mock, patch + +import pytest +from shapely import geometry as shpg + +from vibe_core import file_downloader +from vibe_core.data import HansenProduct, Raster +from vibe_dev.testing.op_tester import OpTester + + +@pytest.fixture +def hansen_products(): + return [ + HansenProduct( + id="test_id", + geometry=shpg.mapping(shpg.box(-115.0, 45.0, -105.0, 55.0)), + time_range=(datetime(2000, 1, 1), datetime(2022, 1, 2)), + asset_url=( + f"https://storage.googleapis.com/earthenginepartners-hansen/" + f"GFC-2022-v1.10/Hansen_GFC-2022-v1.10_{asset_key}_00N_000E.tif" + ), + assets=[], + ) + for asset_key in HansenProduct.asset_keys + ] + + +@patch.object(file_downloader, "download_file") +def test_download_hansen_product(download: Mock, hansen_products: List[HansenProduct]): + config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_hansen.yaml") + + op_tester = OpTester(config_path) + + for hansen_product in hansen_products: + out = op_tester.run(hansen_product=hansen_product) + assert out + + raster = cast(Raster, out["raster"]) + + assert raster + assert len(raster.assets) == 1 + assert raster.bands == {hansen_product.layer_name: 0} + + assert raster.time_range == hansen_product.time_range + assert raster.geometry == hansen_product.geometry diff --git a/ops/download_herbie/download_herbie.py b/ops/download_herbie/download_herbie.py new file mode 100644 index 00000000..ebf4e567 --- /dev/null +++ b/ops/download_herbie/download_herbie.py @@ -0,0 +1,48 @@ +from datetime import timedelta +from tempfile import TemporaryDirectory +from typing import Dict, Optional + +import rasterio +from herbie import Herbie + +from vibe_core.data import AssetVibe, Grib +from vibe_core.data.core_types import gen_guid +from vibe_core.data.products import HerbieProduct + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def download_herbie( + herbie_product: HerbieProduct, + ) -> Dict[str, Optional[Grib]]: + H = Herbie( + herbie_product.time_range[0].replace(tzinfo=None), + fxx=herbie_product.lead_time_hours, + model=herbie_product.model, + product=herbie_product.product, + ) + grib_path = H.download(herbie_product.search_text) + asset = AssetVibe(reference=str(grib_path), type="application/x-grib", id=gen_guid()) + with rasterio.open(grib_path) as f: + t = herbie_product.time_range[0] + timedelta(hours=herbie_product.lead_time_hours) + forecast = Grib.clone_from( + herbie_product, + time_range=(t, t), + id=gen_guid(), + assets=[asset], + meta={"lead_time": str(herbie_product.lead_time_hours)}, + bands={ + f.tags(i)["GRIB_ELEMENT"]: i - 1 # type: ignore + for i in range(1, f.count + 1) # type: ignore + }, + ) + + return {"forecast": forecast} + + return download_herbie + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_herbie/download_herbie.yaml b/ops/download_herbie/download_herbie.yaml new file mode 100644 index 00000000..7f8ad5e1 --- /dev/null +++ b/ops/download_herbie/download_herbie.yaml @@ -0,0 +1,12 @@ +name: download_herbie +inputs: + herbie_product: HerbieProduct +output: + forecast: Grib +parameters: +entrypoint: + file: download_herbie.py + callback_builder: CallbackBuilder +description: + short_description: + Download herbie grib files. \ No newline at end of file diff --git a/ops/download_herbie/forecast_range_split.py b/ops/download_herbie/forecast_range_split.py new file mode 100644 index 00000000..b152f106 --- /dev/null +++ b/ops/download_herbie/forecast_range_split.py @@ -0,0 +1,43 @@ +from typing import Dict, List + +import pandas as pd + +from vibe_core.data import DataVibe, gen_hash_id + + +class CallbackBuilder: + def __init__( + self, + forecast_lead_times: List[int], + weather_type: str, + ): + self.weather_type = weather_type + self.frequency = forecast_lead_times[1] - forecast_lead_times[0] + + def get_forecast_weather(self, user_input: DataVibe) -> List[DataVibe]: + dates = pd.date_range( + user_input.time_range[0], user_input.time_range[1], freq=f"{str(self.frequency)}H" + ) + + forecasts = [ + DataVibe( + gen_hash_id( + name=self.weather_type, + geometry=user_input.geometry, + time_range=(date, date), + ), + (date, date), + user_input.geometry, + [], + ) + for date in dates + ] + + return forecasts + + def __call__(self): + def range_split_initialize(user_input: List[DataVibe]) -> Dict[str, List[DataVibe]]: + download_period = self.get_forecast_weather(user_input[0]) + return {"download_period": download_period} + + return range_split_initialize diff --git a/ops/download_herbie/forecast_range_split.yaml b/ops/download_herbie/forecast_range_split.yaml new file mode 100644 index 00000000..0707b6f1 --- /dev/null +++ b/ops/download_herbie/forecast_range_split.yaml @@ -0,0 +1,18 @@ +name: forecast_range_split +inputs: + user_input: List[DataVibe] +output: + download_period: List[DataVibe] +parameters: + # [frequency_hour, number of hours, increment by] + forecast_lead_times: [1, 25, 1] + weather_type: "temperature" +entrypoint: + callback_builder: CallbackBuilder + file: forecast_range_split.py +dependencies: + parameters: + - weather_type + - forecast_lead_times +description: + short_description: Splits input time range according to frequency and number of hours in lead time. \ No newline at end of file diff --git a/ops/download_herbie/forecast_weather.py b/ops/download_herbie/forecast_weather.py new file mode 100644 index 00000000..20e6282c --- /dev/null +++ b/ops/download_herbie/forecast_weather.py @@ -0,0 +1,120 @@ +import os +import shutil +import tempfile +import warnings +from datetime import datetime +from socket import error as SocketError +from tempfile import TemporaryDirectory +from typing import Dict, List, Tuple + +import numpy as np +import pandas as pd +from herbie import FastHerbie + +from vibe_core.data import AssetVibe, DataVibe, gen_guid +from vibe_core.data.weather import WeatherVibe + +warnings.filterwarnings("ignore") + +INDEX_COLUMN = "date" + + +class CallbackBuilder: + def __init__( + self, + model: str, + overwrite: bool, + product: str, + forecast_lead_times: List[int], + search_text: str, + weather_type: str, + ): + self.temp_dir = TemporaryDirectory() + self.model = model + self.overwrite = overwrite + self.product = product + self.forecast_lead_times = range( + forecast_lead_times[0], forecast_lead_times[1], forecast_lead_times[2] + ) + self.frequency = forecast_lead_times[1] - forecast_lead_times[0] + self.search_text = search_text + self.weather_type = weather_type + + def ping_herbie_source(self, date: datetime, coordinates: Tuple[float, float]): + # initialize temporary directory + tmp_dir = tempfile.mkdtemp() + out_ = np.empty(0) + try: + # download forecast data + fh = FastHerbie( + [date], + model=self.model, + product=self.product, + fxx=self.forecast_lead_times, + save_dir=tmp_dir, + overwrite=self.overwrite, + ) + fh.download(searchString=self.search_text) + + # filter records nearest to coordinates + ds = fh.xarray(searchString=self.search_text) + + out_key = [key for key in ds.keys() if key != "gribfile_projection"] + out_ = ds.herbie.nearest_points(coordinates)[out_key[0]].values[0] + + if len(out_) < self.frequency: + out_ = np.empty(0) + + del ds + del fh + except EOFError: + # This error raises due to missing data. + # ignore this error to continue download. + pass + except SocketError: + pass + except Exception: + raise + + finally: + # clear temporary directory + shutil.rmtree(tmp_dir, ignore_errors=True) + return out_ + + def get_forecast_weather(self, user_input: DataVibe) -> WeatherVibe: + start_date = user_input.time_range[0].replace(tzinfo=None) + end_date = user_input.time_range[1].replace(tzinfo=None) + coords = tuple(user_input.geometry["coordinates"]) + dates = pd.date_range(start_date, end_date, freq=f"{str(self.frequency)}H") + + forecasts = [] + for date in dates: + out_ = self.ping_herbie_source(date=date, coordinates=coords) + if len(out_) > 0: + forecasts.append([date] + list(out_)) + + df = pd.DataFrame( + data=forecasts, + columns=[INDEX_COLUMN] + [f"step {x}" for x in self.forecast_lead_times], + ) + + # df = self.clean_forecast_data(forecast_df=df, start_date=start_date, end_date=end_date) + out_path = os.path.join(self.temp_dir.name, f"{self.weather_type}.csv") + df.to_csv(out_path, index=False) + asset = AssetVibe(reference=out_path, type="text/csv", id=gen_guid()) + return WeatherVibe( + gen_guid(), + user_input.time_range, + user_input.geometry, + [asset], + ) + + def __call__(self): + def weather_initialize(user_input: DataVibe) -> Dict[str, WeatherVibe]: + weather_forecast = self.get_forecast_weather(user_input) + return {"weather_forecast": weather_forecast} + + return weather_initialize + + def __del__(self): + self.temp_dir.cleanup() diff --git a/ops/download_herbie/forecast_weather.yaml b/ops/download_herbie/forecast_weather.yaml new file mode 100644 index 00000000..e2c12a10 --- /dev/null +++ b/ops/download_herbie/forecast_weather.yaml @@ -0,0 +1,23 @@ +name: download_forecast_weather +inputs: + user_input: DataVibe +output: + weather_forecast: WeatherVibe +parameters: + model: "hrrr" + overwrite: False + product: "prs" + # [frequency_hour, number of hours, increment by] + forecast_lead_times: [1, 25, 1] + search_text: "TMP:2 m" + weather_type: "temperature" +entrypoint: + callback_builder: CallbackBuilder + file: forecast_weather.py +dependencies: + parameters: + - search_text + - weather_type + - forecast_lead_times +description: + short_description: Downloads forecast observations with Herbie. \ No newline at end of file diff --git a/ops/download_landsat_from_pc/download_landsat_from_pc.yaml b/ops/download_landsat_from_pc/download_landsat_from_pc.yaml new file mode 100644 index 00000000..011a1371 --- /dev/null +++ b/ops/download_landsat_from_pc/download_landsat_from_pc.yaml @@ -0,0 +1,12 @@ +name: download_landsat_from_pc +inputs: + landsat_product: LandsatProduct +output: + downloaded_product: LandsatProduct +parameters: + api_key: "" +entrypoint: + file: download_landsat_pc.py + callback_builder: CallbackBuilder +description: + short_description: Downloads LANDSAT tile bands from product. \ No newline at end of file diff --git a/ops/download_landsat_from_pc/download_landsat_pc.py b/ops/download_landsat_from_pc/download_landsat_pc.py new file mode 100644 index 00000000..3778b948 --- /dev/null +++ b/ops/download_landsat_from_pc/download_landsat_pc.py @@ -0,0 +1,48 @@ +import logging +from tempfile import TemporaryDirectory +from typing import Dict, Optional + +import planetary_computer as pc + +from vibe_core.data import LandsatProduct, gen_hash_id +from vibe_lib.planetary_computer import LandsatCollection + +LOGGER = logging.getLogger(__name__) + + +class CallbackBuilder: + def __init__(self, api_key: str): + self.tmp_dir = TemporaryDirectory() + self.api_key = api_key + + def __call__(self): + def download_product( + landsat_product: LandsatProduct, + ) -> Dict[str, Optional[LandsatProduct]]: + pc.set_subscription_key(self.api_key) + collection = LandsatCollection() + item = collection.query_by_id(landsat_product.tile_id) + + downloaded_product = LandsatProduct.clone_from( + landsat_product, + id=gen_hash_id( + f"{landsat_product.id}_download_landsat_product", + landsat_product.geometry, + landsat_product.time_range, + ), + assets=[], + ) + + for k in collection.asset_keys: + try: + asset_path = collection.download_asset(item.assets[k], self.tmp_dir.name) + downloaded_product.add_downloaded_band(k, asset_path) + except KeyError as e: + LOGGER.warning(f"No band {k} found. Original exception {e}") + + return {"downloaded_product": downloaded_product} + + return download_product + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_modis_sr/download_modis_sr.py b/ops/download_modis_sr/download_modis_sr.py new file mode 100644 index 00000000..c1b328d7 --- /dev/null +++ b/ops/download_modis_sr/download_modis_sr.py @@ -0,0 +1,79 @@ +from tempfile import TemporaryDirectory +from typing import Dict, Optional + +import numpy as np +import planetary_computer as pc +import rioxarray as rio +import xarray as xr + +from vibe_core.data import ModisProduct, ModisRaster, gen_guid +from vibe_lib.planetary_computer import Modis8DaySRCollection +from vibe_lib.raster import save_raster_to_asset + +MODIS_SPYNDEX: Dict[str, str] = { + "sur_refl_b01": "R", + "sur_refl_b02": "N", + "sur_refl_b03": "B", + "sur_refl_b04": "G", + "sur_refl_b06": "S1", + "sur_refl_b07": "S2", +} + + +class CallbackBuilder: + def __init__(self, qa_mask_value: int, pc_key: Optional[str]): + self.tmp_dir = TemporaryDirectory() + self.qa_mask_value = qa_mask_value + pc.set_subscription_key(pc_key) # type: ignore + + def __call__(self): + def callback(product: ModisProduct) -> Dict[str, ModisRaster]: + col = Modis8DaySRCollection(product.resolution) + items = col.query( + roi=product.bbox, + time_range=product.time_range, + ids=[product.id], + ) + assert len(items) == 1 + item = items[0] + bands = sorted([k for k in item.assets if k.find("sur_refl") >= 0]) + tifs = [col.download_asset(item.assets[k], self.tmp_dir.name) for k in bands] + da = ( + xr.open_mfdataset(tifs, engine="rasterio", combine="nested", concat_dim="bands") + .to_array() + .squeeze() + ) + + if self.qa_mask_value: + if np.any([b.find("sur_refl_state_") >= 0 for b in bands]): + idx = next( + filter(lambda b: b[1].find("sur_refl_state_") >= 0, enumerate(bands)) + )[0] + qa_pixel = rio.open_rasterio(tifs[idx]).squeeze().values.astype(int) # type: ignore + mask = np.logical_not(np.bitwise_and(qa_pixel, self.qa_mask_value)) + del qa_pixel + da = da.where(mask) + else: + raise ValueError("sur_refl_state not found") + + asset = save_raster_to_asset(da, self.tmp_dir.name) + + band_idx = {name: idx for idx, name in enumerate(bands)} + # Add Spyndex aliases to available bands + for k, v in MODIS_SPYNDEX.items(): + if k in bands: + band_idx[v] = band_idx[k] + + return { + "raster": ModisRaster.clone_from( + product, + id=gen_guid(), + assets=[asset], + bands=band_idx, + ) + } + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_modis_sr/download_modis_sr.yaml b/ops/download_modis_sr/download_modis_sr.yaml new file mode 100644 index 00000000..681ec0fb --- /dev/null +++ b/ops/download_modis_sr/download_modis_sr.yaml @@ -0,0 +1,14 @@ +name: download_modis_sr +inputs: + product: ModisProduct +output: + raster: Raster +parameters: + pc_key: + qa_mask_value: 1024 +entrypoint: + file: download_modis_sr.py + callback_builder: CallbackBuilder +version: 2 +description: + short_description: Downloads MODIS surface reflectance rasters. \ No newline at end of file diff --git a/ops/download_modis_vegetation/download_modis_vegetation.py b/ops/download_modis_vegetation/download_modis_vegetation.py new file mode 100644 index 00000000..f6afa9fe --- /dev/null +++ b/ops/download_modis_vegetation/download_modis_vegetation.py @@ -0,0 +1,49 @@ +from tempfile import TemporaryDirectory +from typing import Dict, Optional + +import planetary_computer as pc + +from vibe_core.data import AssetVibe, ModisProduct, Raster, gen_guid +from vibe_lib.planetary_computer import Modis16DayVICollection + +VALID_INDICES = ("evi", "ndvi") + + +class CallbackBuilder: + def __init__(self, index: str, pc_key: Optional[str]): + self.tmp_dir = TemporaryDirectory() + if index not in VALID_INDICES: + raise ValueError(f"Expected index to be one of {VALID_INDICES}, got '{index}'.") + self.index = index + pc.set_subscription_key(pc_key) # type: ignore + + def __call__(self): + def callback(product: ModisProduct) -> Dict[str, Raster]: + col = Modis16DayVICollection(product.resolution) + items = col.query( + roi=product.bbox, + time_range=product.time_range, + ids=[product.id], + ) + assert len(items) == 1 + item = items[0] + assets = [v for k, v in item.assets.items() if self.index.upper() in k] + assert len(assets) == 1 + asset = assets[0] + assets = [ + AssetVibe( + reference=col.download_asset(asset, self.tmp_dir.name), + type="image/tiff", + id=gen_guid(), + ) + ] + return { + "index": Raster.clone_from( + product, id=gen_guid(), assets=assets, bands={self.index: 0} + ) + } + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_modis_vegetation/download_modis_vegetation.yaml b/ops/download_modis_vegetation/download_modis_vegetation.yaml new file mode 100644 index 00000000..c5c664e0 --- /dev/null +++ b/ops/download_modis_vegetation/download_modis_vegetation.yaml @@ -0,0 +1,16 @@ +name: download_modis_vegetation +inputs: + product: ModisProduct +output: + index: Raster +parameters: + index: ndvi + pc_key: +dependencies: + parameters: + - index +entrypoint: + file: download_modis_vegetation.py + callback_builder: CallbackBuilder +description: + short_description: Downloads selected index raster from Modis product. \ No newline at end of file diff --git a/ops/download_modis_vegetation/test_download_modis_vegetation.py b/ops/download_modis_vegetation/test_download_modis_vegetation.py new file mode 100644 index 00000000..8c1079ff --- /dev/null +++ b/ops/download_modis_vegetation/test_download_modis_vegetation.py @@ -0,0 +1,76 @@ +import os +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest +from pystac import Asset, Item +from shapely import geometry as shpg + +from vibe_core.data import ModisProduct, Raster +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.planetary_computer import Modis16DayVICollection + +HERE = os.path.dirname(os.path.abspath(__file__)) +INDICES = ("ndvi", "evi") +FAKE_TIME_RANGE = (datetime(2020, 11, 1), datetime(2020, 11, 2)) +INVALID_INDEX = "🙅" + + +def fake_items(resolution: int): + assets = { + f"250m_16_days_{index.upper()}": Asset(href=f"fake_href_{resolution}_{index}") + for index in INDICES + } + return [ + Item( + id=f"{resolution}m-id", # type: ignore + geometry=None, + bbox=None, + datetime=None, + properties={ + "start_datetime": FAKE_TIME_RANGE[0].isoformat() + "Z", + "end_datetime": FAKE_TIME_RANGE[1].isoformat() + "Z", + }, + assets=assets, + ) + ] + + +@pytest.mark.parametrize("resolution", (250, 500)) +@pytest.mark.parametrize("index", ("ndvi", "evi")) +@patch("vibe_lib.planetary_computer.get_available_collections") +@patch.object(Modis16DayVICollection, "download_asset") +@patch.object(Modis16DayVICollection, "query") +def test_op( + query: MagicMock, + download_asset: MagicMock, + get_collections: MagicMock, + index: str, + resolution: int, +): + get_collections.return_value = list(Modis16DayVICollection.collections.values()) + items = fake_items(resolution) + query.return_value = items + download_asset.side_effect = lambda asset, path: asset.href + + geom = shpg.Point(1, 1).buffer(0.01, cap_style=3) + time_range = (datetime(2022, 11, 1), datetime(2022, 11, 2)) + x = ModisProduct( + id="1", time_range=time_range, geometry=shpg.mapping(geom), resolution=resolution, assets=[] + ) + + op_tester = OpTester(os.path.join(HERE, "download_modis_vegetation.yaml")) + op_tester.update_parameters({"index": index}) + o = op_tester.run(product=x) + + query.assert_called_once_with(roi=x.bbox, time_range=x.time_range, ids=[x.id]) + download_asset.assert_called_once() + assert isinstance(o["index"], Raster) + assert o["index"].raster_asset.local_path == f"fake_href_{resolution}_{index}" + + +def test_op_fails_invalid_index(): + op_tester = OpTester(os.path.join(HERE, "download_modis_vegetation.yaml")) + op_tester.update_parameters({"index": INVALID_INDEX}) + with pytest.raises(ValueError): + op_tester.run(product=None) # type: ignore diff --git a/ops/download_naip/download_naip.py b/ops/download_naip/download_naip.py new file mode 100644 index 00000000..23682e75 --- /dev/null +++ b/ops/download_naip/download_naip.py @@ -0,0 +1,52 @@ +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Dict, cast + +import planetary_computer as pc + +from vibe_core.data import AssetVibe, NaipProduct, NaipRaster, gen_guid, gen_hash_id +from vibe_lib.planetary_computer import NaipCollection +from vibe_lib.raster import json_to_asset + + +class CallbackBuilder: + def __init__(self, api_key: str): + self.tmp_dir = TemporaryDirectory() + self.api_key = api_key + + def __call__(self): + def op(input_product: NaipProduct) -> Dict[str, NaipRaster]: + pc.set_subscription_key(self.api_key) + collection = NaipCollection() + item = collection.query_by_id(input_product.tile_id) + assets = collection.download_item( + item, os.path.join(self.tmp_dir.name, input_product.id) + ) + vibe_assets = [ + AssetVibe(reference=a, type=cast(str, mimetypes.guess_type(a)[0]), id=gen_guid()) + for a in assets + ] + vis_asset = json_to_asset({"bands": list(range(3))}, self.tmp_dir.name) + vibe_assets.append(vis_asset) + downloaded_product = NaipRaster( + id=gen_hash_id( + f"{input_product.id}_download_naip_product", + input_product.geometry, + input_product.time_range, + ), + time_range=input_product.time_range, + geometry=input_product.geometry, + assets=vibe_assets, + bands={k: v for v, k in enumerate(("red", "green", "blue", "nir"))}, + tile_id=input_product.tile_id, + year=input_product.year, + resolution=input_product.resolution, + ) + + return {"downloaded_product": downloaded_product} + + return op + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_naip/download_naip.yaml b/ops/download_naip/download_naip.yaml new file mode 100644 index 00000000..626794da --- /dev/null +++ b/ops/download_naip/download_naip.yaml @@ -0,0 +1,12 @@ +name: download_naip +inputs: + input_product: NaipProduct +output: + downloaded_product: NaipRaster +parameters: + api_key: "" +entrypoint: + file: download_naip.py + callback_builder: CallbackBuilder +description: + short_description: Downloads Naip raster from Naip product. \ No newline at end of file diff --git a/ops/download_naip/test_download_naip.py b/ops/download_naip/test_download_naip.py new file mode 100644 index 00000000..49f7931e --- /dev/null +++ b/ops/download_naip/test_download_naip.py @@ -0,0 +1,49 @@ +import os +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +from shapely.geometry import Polygon, box, mapping + +from vibe_core.data import NaipProduct +from vibe_core.data.rasters import NaipRaster +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.planetary_computer import NaipCollection + +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "download_naip.yaml") + + +@patch( + "vibe_lib.planetary_computer.get_available_collections", + return_value=[NaipCollection.collection], +) +@patch.object(NaipCollection, "query_by_id") +@patch.object(NaipCollection, "download_item", return_value=["/tmp/test.tif"]) +def test_op(_: MagicMock, __: MagicMock, ___: MagicMock): + latitude = 42.21422 + longitude = -93.22890 + buffer = 0.001 + bbox = [longitude - buffer, latitude - buffer, longitude + buffer, latitude + buffer] + polygon: Polygon = box(*bbox, ccw=True) + start_date = datetime(year=2018, month=2, day=1, tzinfo=timezone.utc) + end_date = datetime(year=2021, month=2, day=11, tzinfo=timezone.utc) + + output: NaipProduct = NaipProduct( + id=str("ia_m_4209355_nw_15_060_20190730_20191105"), + time_range=( + start_date, + end_date, + ), + geometry=mapping(polygon), # type: ignore + assets=[], + tile_id=str("ia_m_4209355_nw_15_060_20190730_20191105"), + resolution=0.6, + year=2019, + ) + + output_data = OpTester(CONFIG_PATH).run(**{"input_product": output}) + + # Get op result + output_name = "downloaded_product" + assert output_name in output_data + output_product = output_data[output_name] + assert isinstance(output_product, NaipRaster) diff --git a/ops/download_road_geometries/download_road_geometries.py b/ops/download_road_geometries/download_road_geometries.py new file mode 100644 index 00000000..0c30c766 --- /dev/null +++ b/ops/download_road_geometries/download_road_geometries.py @@ -0,0 +1,62 @@ +import os +from tempfile import TemporaryDirectory +from typing import Dict, cast + +import geopandas as gpd +import osmnx as ox +from shapely import geometry as shpg + +from vibe_core.data import DataVibe, GeometryCollection +from vibe_core.data.core_types import AssetVibe, gen_guid +from vibe_lib.geometry import wgs_to_utm + + +def get_road_geometries(geom: shpg.Polygon, network_type: str) -> gpd.GeoDataFrame: + graph = ox.graph_from_polygon( + geom, network_type=network_type, truncate_by_edge=True, retain_all=True + ) + df_edges = cast(gpd.GeoDataFrame, ox.graph_to_gdfs(graph, nodes=False)) + df_edges = cast(gpd.GeoDataFrame, df_edges[df_edges.intersects(geom)]) + # Encode Metadata as strings to avoid lists + for k in df_edges.columns: + if k == "geometry": + continue + df_edges[k] = df_edges[k].apply( # type: ignore + lambda x: ",".join([str(i) for i in x]) if isinstance(x, list) else str(x) + ) + return cast(gpd.GeoDataFrame, df_edges) + + +class CallbackBuilder: + def __init__(self, network_type: str, buffer_size: float): + self.network_type = network_type + self.buffer_size = buffer_size + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def callback(input_region: DataVibe) -> Dict[str, GeometryCollection]: + geom = shpg.box(*input_region.bbox) + crs = "epsg:4326" + proj_crs = f"epsg:{wgs_to_utm(geom)}" + buffered_geom = ( + gpd.GeoSeries(geom, crs=crs) + .to_crs(proj_crs) + .buffer(self.buffer_size) + .to_crs(crs=crs) + .iloc[0] + .envelope + ) + df = get_road_geometries(buffered_geom, self.network_type) + guid = gen_guid() + filepath = os.path.join(self.tmp_dir.name, f"{guid}.gpkg") + df.to_file(filepath, driver="GPKG") + asset = AssetVibe(reference=filepath, type="application/geopackage+sqlite3", id=guid) + + out = GeometryCollection.clone_from(input_region, id=gen_guid(), assets=[asset]) + + return {"roads": out} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_road_geometries/download_road_geometries.yaml b/ops/download_road_geometries/download_road_geometries.yaml new file mode 100644 index 00000000..ebb4595a --- /dev/null +++ b/ops/download_road_geometries/download_road_geometries.yaml @@ -0,0 +1,17 @@ +name: download_road_geometries +inputs: + input_region: DataVibe +output: + roads: GeometryCollection +parameters: + network_type: all_private + buffer_size: 100 # In meters +entrypoint: + file: download_road_geometries.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - network_type + - buffer_size +description: + short_description: Downloads road geometry for input region from Open Street Maps. \ No newline at end of file diff --git a/ops/download_sentinel1/download_sentinel1.yaml b/ops/download_sentinel1/download_sentinel1.yaml new file mode 100644 index 00000000..0a822e70 --- /dev/null +++ b/ops/download_sentinel1/download_sentinel1.yaml @@ -0,0 +1,27 @@ +name: download_sentinel1 +inputs: + sentinel_product: Sentinel1Product +output: + downloaded_product: Sentinel1Raster +parameters: + api_key: "" + block_size: 2048 + num_workers: 20 + timeout_s: 120 +entrypoint: + file: download_sentinel1_rtc.py + callback_builder: CallbackBuilder +description: + short_description: Downloads the Sentinel-1 RTC product bands. + long_description: + The op will read the bands from the Planetary Computer and stack them into a single 2 band TIFF + file. + inputs: + sentinel_product: Product to be downloaded. + output: + sentinel_products: Downloaded product with an asset that contains both Sentinel-1 RTC bands. + parameters: + api_key: Planetary Computer API key. + block_size: Size of blocks that are read by each thread + num_workers: How many threads to use when reading data from the Planetary Computer blobs. + timeout_s: Maximum time, in seconds, before a band reading operation times out. diff --git a/ops/download_sentinel1/download_sentinel1_rtc.py b/ops/download_sentinel1/download_sentinel1_rtc.py new file mode 100644 index 00000000..d5dea014 --- /dev/null +++ b/ops/download_sentinel1/download_sentinel1_rtc.py @@ -0,0 +1,76 @@ +import logging +import os +from concurrent.futures import TimeoutError +from tempfile import TemporaryDirectory +from typing import Dict + +import planetary_computer as pc +import rasterio +from pystac import Item +from rasterio.enums import Resampling +from rasterio.windows import Window + +from vibe_core.data import AssetVibe, Sentinel1Product, Sentinel1Raster, gen_guid +from vibe_lib.planetary_computer import Sentinel1RTCCollection +from vibe_lib.raster import FLOAT_COMPRESSION_KWARGS, get_profile_from_ref, serial_stack_bands + +LOGGER = logging.getLogger(__name__) + + +def read_block(raster_url: str, win: Window): + with rasterio.open(raster_url) as src: + return src.read(window=win), win + + +class CallbackBuilder: + def __init__(self, api_key: str, num_workers: int, block_size: int, timeout_s: float): + self.api_key = api_key + self.num_workers = num_workers + self.block_size = block_size + self.timeout_s = timeout_s + self.tmp_dir = TemporaryDirectory() + + def stack_bands(self, col: Sentinel1RTCCollection, item: Item) -> AssetVibe: + asset_guid = gen_guid() + out_path = os.path.join(self.tmp_dir.name, f"{asset_guid}.tif") + LOGGER.debug(f"Downloading Sentinel-1 RTC bands for product {item.id}") + band_hrefs = col.download_item(item, os.path.join(self.tmp_dir.name, item.id)) + LOGGER.debug(f"Done downloading Sentinel-1 RTC bands for product {item.id}") + kwargs = get_profile_from_ref( + band_hrefs[0], count=len(band_hrefs), **FLOAT_COMPRESSION_KWARGS + ) + LOGGER.debug(f"Stacking Sentinel-1 RTC bands for product {item.id}") + serial_stack_bands( + band_hrefs, + out_path, + (self.block_size, self.block_size), + Resampling.bilinear, + **kwargs, + ) + LOGGER.debug(f"Done stacking Sentinel-1 RTC bands for product {item.id}") + return AssetVibe(reference=out_path, type="image/tiff", id=asset_guid) + + def __call__(self): + def callback(sentinel_product: Sentinel1Product) -> Dict[str, Sentinel1Raster]: + pc.set_subscription_key(self.api_key) + col = Sentinel1RTCCollection() + item = pc.sign(col.query_by_id(sentinel_product.id)) + try: + asset = self.stack_bands(col, item) + except TimeoutError as e: + raise TimeoutError( + f"Timeout while stacking bands for products {sentinel_product.product_name}" + ) from e + raster = Sentinel1Raster.clone_from( + sentinel_product, + sentinel_product.id, + assets=[asset], + bands={k.upper(): i for i, k in enumerate(col.asset_keys)}, + tile_id="", + ) + return {"downloaded_product": raster} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_sentinel1/test_download_sentinel1_rtc.py b/ops/download_sentinel1/test_download_sentinel1_rtc.py new file mode 100644 index 00000000..2e99bd64 --- /dev/null +++ b/ops/download_sentinel1/test_download_sentinel1_rtc.py @@ -0,0 +1,91 @@ +import os +from datetime import datetime +from pathlib import Path +from unittest.mock import Mock, patch + +import numpy as np +import planetary_computer as pc +import pytest +import rasterio +from pystac import Asset, Item +from shapely import geometry as shpg + +from vibe_core.data import Sentinel1Product, Sentinel1Raster +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.planetary_computer import Sentinel1RTCCollection + +HERE = os.path.dirname(os.path.abspath(__file__)) +CONFIG_PATH = os.path.join(HERE, "download_sentinel1.yaml") + +IMG_SIZE = 100 + + +@pytest.fixture +def fake_item(tmp_path: Path): + assets = {} + for i, band in enumerate(("vh", "vv"), start=1): + band_path = str(tmp_path / f"{band}.tif") + with rasterio.open( + band_path, + "w", + driver="GTiff", + count=1, + width=IMG_SIZE, + height=IMG_SIZE, + dtype="float32", + nodata=0, + ) as dst: + dst.write(i * np.ones((1, IMG_SIZE, IMG_SIZE))) + assets[band] = Asset(href=band_path) + + return Item( + id="1", + geometry=shpg.mapping(shpg.box(0, 0, 1, 1)), + bbox=None, + datetime=datetime.now(), + properties={}, + assets=assets, + ) + + +@patch.object(pc, "sign") +@patch.object(Sentinel1RTCCollection, "download_item") +@patch.object(Sentinel1RTCCollection, "query_by_id") +@patch("vibe_lib.planetary_computer.get_available_collections", return_value=["sentinel-1-rtc"]) +def test_op( + collections_mock: Mock, query_mock: Mock, download_mock: Mock, sign_mock: Mock, fake_item: Item +): + query_mock.return_value = fake_item + download_mock.return_value = [fake_item.assets["vh"].href, fake_item.assets["vv"].href] + sign_mock.side_effect = lambda x: x + geom = shpg.box(0, 0, 1, 1) + fake_input = Sentinel1Product( + id="1", + time_range=(datetime.now(), datetime.now()), + geometry=shpg.mapping(geom), + assets=[], + product_name="product_name", + orbit_number=0, + relative_orbit_number=0, + orbit_direction="", + platform="", + extra_info={}, + sensor_mode="", + polarisation_mode="", + ) + + op_tester = OpTester(CONFIG_PATH) + op_tester.update_parameters({"num_workers": 1}) + out = op_tester.run(sentinel_product=fake_input) + key = "downloaded_product" + assert key in out + product = out[key] + assert isinstance(product, Sentinel1Raster) + assert product.time_range == fake_input.time_range + assert product.geometry == fake_input.geometry + with rasterio.open(product.raster_asset.local_path) as src: + profile = src.profile + ar = src.read() + assert profile["dtype"] == "float32" + assert profile["nodata"] == 0.0 + assert ar.shape == (2, IMG_SIZE, IMG_SIZE) diff --git a/ops/download_sentinel1_grd/download_sentinel1_grd.py b/ops/download_sentinel1_grd/download_sentinel1_grd.py new file mode 100644 index 00000000..6e06c65e --- /dev/null +++ b/ops/download_sentinel1_grd/download_sentinel1_grd.py @@ -0,0 +1,88 @@ +import logging +import os +import shutil +import time +from tempfile import TemporaryDirectory +from typing import Final, cast + +import planetary_computer as pc +from requests import RequestException + +from vibe_core.data import DownloadedSentinel1Product, Sentinel1Product +from vibe_core.file_downloader import download_file +from vibe_lib.planetary_computer import ( + get_complete_s1_prefix, + get_sentinel1_container_client, + get_sentinel1_scene_files, +) + +RETRY_WAIT: Final[int] = 10 +MAX_RETRIES: Final[int] = 5 +LOGGER: Final[logging.Logger] = logging.getLogger(__name__) +READ_TIMEOUT_S: Final[int] = 90 +MAX_CONCURRENCY: Final[int] = 3 + + +def download_from_blob(item: Sentinel1Product, save_path: str) -> str: + container_client = get_sentinel1_container_client() + scene_files = get_sentinel1_scene_files(item) + LOGGER.debug(f"Obtained {len(scene_files)} scene files for product '{item.product_name}'") + + if not scene_files: + # No scene files found! + raise RuntimeError( + f"Failed to download sentinel 1 product {item.product_name}, no scene files found." + ) + + blob_prefix = get_complete_s1_prefix(scene_files) + LOGGER.debug(f"Obtained blob prefix '{blob_prefix}' for product name '{item.product_name}'") + product_name = blob_prefix.split("/")[-1] + + zip_name = os.path.join(save_path, product_name) + base_dir = f"{zip_name}.SAFE" + + LOGGER.debug(f"Downloading scene files for product '{item.product_name}'") + for blob in scene_files: + out_path = os.path.join(base_dir, os.path.relpath(cast(str, blob.name), blob_prefix)) + save_dir = os.path.dirname(out_path) + os.makedirs(save_dir, exist_ok=True) + for retry in range(MAX_RETRIES): + try: + url = container_client.get_blob_client(blob.name).url + download_file(url, out_path) + break + except RequestException as e: + LOGGER.warning( + f"Exception {e} downloading from blob {blob.name}." + f" Retrying after {RETRY_WAIT}s ({retry+1}/{MAX_RETRIES})." + ) + time.sleep(RETRY_WAIT) + else: + raise RuntimeError(f"Failed asset {blob.name} after {MAX_RETRIES} retries.") + LOGGER.debug(f"Making zip archive '{zip_name}' for root dir '{save_path}'") + zip_path = shutil.make_archive( + zip_name, "zip", root_dir=save_path, base_dir=f"{product_name}.SAFE" + ) + return zip_path + + +class CallbackBuilder: + def __init__(self, api_key: str): + self.tmp_dir = TemporaryDirectory() + self.api_key = api_key + + def __call__(self): + def download_sentinel1_from_pc(sentinel_product: Sentinel1Product): + pc.set_subscription_key(self.api_key) + save_path = os.path.join(self.tmp_dir.name, sentinel_product.id) + zip_path = download_from_blob(sentinel_product, save_path) + new_item = DownloadedSentinel1Product.clone_from( + sentinel_product, sentinel_product.id, assets=[] + ) + new_item.add_zip_asset(zip_path) + return {"downloaded_product": new_item} + + return download_sentinel1_from_pc + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_sentinel1_grd/download_sentinel1_grd.yaml b/ops/download_sentinel1_grd/download_sentinel1_grd.yaml new file mode 100644 index 00000000..7cfc47cb --- /dev/null +++ b/ops/download_sentinel1_grd/download_sentinel1_grd.yaml @@ -0,0 +1,12 @@ +name: download_sentinel_1_grd +inputs: + sentinel_product: Sentinel1Product +output: + downloaded_product: DownloadedSentinel1Product +parameters: + api_key: "" +entrypoint: + file: download_sentinel1_grd.py + callback_builder: CallbackBuilder +description: + short_description: Downloads Sentinel-1 GRD products. \ No newline at end of file diff --git a/ops/download_sentinel1_grd/test_download_sentinel1.py b/ops/download_sentinel1_grd/test_download_sentinel1.py new file mode 100644 index 00000000..e1d14290 --- /dev/null +++ b/ops/download_sentinel1_grd/test_download_sentinel1.py @@ -0,0 +1,87 @@ +import os +from datetime import datetime +from unittest.mock import Mock, patch +from zipfile import ZipFile + +import pytest +from shapely import geometry as shpg + +from vibe_core.data import DownloadedSentinel1Product, Sentinel1Product +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.planetary_computer import generate_sentinel1_blob_path + +HERE = os.path.dirname(os.path.abspath(__file__)) +CONFIG_PATH = os.path.join(HERE, "download_sentinel1_grd.yaml") +FULL_PRODUCT_NAME = "S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0" + + +class MockBlob: + def __init__(self, name: str): + self.name = name + + def __getitem__(self, key: str): + return getattr(self, key) + + +def fake_download(_, file_path: str): + with open(os.path.join(file_path), "w") as f: + f.write("🌎") + + +@pytest.mark.parametrize("product_name", ("complete", "incomplete")) +@patch("vibe_core.file_downloader.download_file") +@patch("vibe_lib.planetary_computer.get_sentinel1_scene_name") +@patch("vibe_lib.planetary_computer.get_sentinel1_scene_files") +@patch("vibe_lib.planetary_computer.get_sentinel1_container_client") +def test_op( + get_s1_client: Mock, + s1_scene_files: Mock, + s1_scene_name: Mock, + download_file: Mock, + product_name: str, +): + s1_scene_name.return_value = FULL_PRODUCT_NAME + download_file.side_effect = fake_download + geom = shpg.box(0, 0, 1, 1) + fake_input = Sentinel1Product( + id="1", + time_range=(datetime.now(), datetime.now()), + geometry=shpg.mapping(geom), + assets=[], + product_name=FULL_PRODUCT_NAME, + orbit_number=0, + relative_orbit_number=0, + orbit_direction="", + platform="", + extra_info={}, + sensor_mode="", + polarisation_mode="", + ) + blob_path = generate_sentinel1_blob_path(fake_input) + s1_scene_files.return_value = [ + MockBlob(f"{blob_path}/fake.txt"), + MockBlob(f"{blob_path}/fake_dir/fake2.txt"), + ] + op_tester = OpTester(CONFIG_PATH) + if product_name == "incomplete": + fake_input.product_name = FULL_PRODUCT_NAME[:-4] + out = op_tester.run(sentinel_product=fake_input) + key = "downloaded_product" + assert key in out + product = out[key] + assert isinstance(product, DownloadedSentinel1Product) + zip_path = product.get_zip_asset().local_path + assert os.path.basename(zip_path) == f"{FULL_PRODUCT_NAME}.zip" + base_dir = f"{FULL_PRODUCT_NAME}.SAFE" + with ZipFile(zip_path) as zf: + il = zf.infolist() + assert len(il) == 4 + assert f"{base_dir}/" == il[0].filename + assert f"{base_dir}/fake_dir/" == il[1].filename + assert f"{base_dir}/fake.txt" == il[2].filename + assert f"{base_dir}/fake_dir/fake2.txt" == il[3].filename + with zf.open(il[2]) as f: + content = f.read() + with zf.open(il[3]) as f: + content2 = f.read() + assert content.decode("utf-8") == content2.decode("utf-8") == "🌎" diff --git a/ops/download_sentinel2_from_pc/download_s2_pc.py b/ops/download_sentinel2_from_pc/download_s2_pc.py new file mode 100644 index 00000000..c327ffc0 --- /dev/null +++ b/ops/download_sentinel2_from_pc/download_s2_pc.py @@ -0,0 +1,76 @@ +import logging +import os +import re +from tempfile import TemporaryDirectory +from typing import Dict, Optional + +import planetary_computer as pc +from azure.storage.blob import BlobClient + +from vibe_core.data import gen_guid +from vibe_core.data.sentinel import DownloadedSentinel2Product, Sentinel2Product, discriminator_date +from vibe_core.file_downloader import download_file +from vibe_lib.planetary_computer import Sentinel2Collection + +LOGGER = logging.getLogger(__name__) + + +def get_partial_id(product_id: str) -> str: + return "_".join(re.sub(r"_N[\d]{4}_", "_", product_id).split("_")[:-1]) + + +class CallbackBuilder: + def __init__(self, api_key: str): + self.tmp_dir = TemporaryDirectory() + self.api_key = api_key + + def __call__(self): + def download_product( + sentinel_product: Sentinel2Product, + ) -> Dict[str, Optional[DownloadedSentinel2Product]]: + pc.set_subscription_key(self.api_key) + collection = Sentinel2Collection() + items = collection.query( + roi=sentinel_product.bbox, time_range=sentinel_product.time_range + ) + partial_id = get_partial_id(sentinel_product.product_name) + matches = [item for item in items if get_partial_id(item.id) == partial_id] + if not matches: + raise RuntimeError( + f"Could not find matches for sentinel 2 product " + f"{sentinel_product.product_name}" + ) + if len(matches) > 1: + matches = sorted(matches, key=lambda x: discriminator_date(x.id), reverse=True) + LOGGER.warning( + f"Found {len(matches)} > 1 matches for product " + f"{sentinel_product.product_name}: {', '.join([m.id for m in matches])}. " + f"Picking newest one ({matches[0].id})." + ) + + item = matches[0] + downloaded_product = DownloadedSentinel2Product.clone_from( + sentinel_product, sentinel_product.id, [] + ) + # Adding bands + for k in collection.asset_keys: # where actual download happens + asset_path = collection.download_asset(item.assets[k], self.tmp_dir.name) + downloaded_product.add_downloaded_band(k, asset_path) + + # Adding cloud mask + gml_out_path = os.path.join(self.tmp_dir.name, f"{gen_guid()}.gml") + mask_pc_path = collection.get_cloud_mask(item) + if BlobClient.from_blob_url(mask_pc_path).exists(): + download_file(mask_pc_path, gml_out_path) + downloaded_product.add_downloaded_cloudmask(gml_out_path) + else: + LOGGER.warning( + f"GML file is not available for product {sentinel_product.product_name}" + ) + + return {"downloaded_product": downloaded_product} + + return download_product + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_sentinel2_from_pc/download_sentinel2_from_pc.yaml b/ops/download_sentinel2_from_pc/download_sentinel2_from_pc.yaml new file mode 100644 index 00000000..8788e93f --- /dev/null +++ b/ops/download_sentinel2_from_pc/download_sentinel2_from_pc.yaml @@ -0,0 +1,12 @@ +name: download_sentinel2_from_pc +inputs: + sentinel_product: Sentinel2Product +output: + downloaded_product: DownloadedSentinel2Product +parameters: + api_key: "" +entrypoint: + file: download_s2_pc.py + callback_builder: CallbackBuilder +description: + short_description: Downloads Sentinel-2 products. \ No newline at end of file diff --git a/ops/download_soilgrids/download_soilgrids.py b/ops/download_soilgrids/download_soilgrids.py new file mode 100644 index 00000000..c4a7adf2 --- /dev/null +++ b/ops/download_soilgrids/download_soilgrids.py @@ -0,0 +1,127 @@ +import logging +import mimetypes +import os +import time +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Dict, Final, List, cast + +from owslib.wcs import WebCoverageService + +from vibe_core.data import AssetVibe, DataVibe, Raster +from vibe_core.data.core_types import gen_guid, gen_hash_id + +LOGGER = logging.getLogger(__name__) + +MAX_RETRIES = 5 +RETRY_WAIT_S = 10 + + +class SoilGridsWS: + MAPS: Final[Dict[str, List[str]]] = { + "wrb": [ + "World Reference Base classes and probabilites", + "https://maps.isric.org/mapserv?map=/map/wrb.map", + ], + "bdod": ["Bulk density", "https://maps.isric.org/mapserv?map=/map/bdod.map"], + "cec": [ + "Cation exchange capacity at ph 7", + "https://maps.isric.org/mapserv?map=/map/cec.map", + ], + "cfvo": ["Coarse fragments volumetric", "https://maps.isric.org/mapserv?map=/map/cfvo.map"], + "clay": ["Clay content", "https://maps.isric.org/mapserv?map=/map/clay.map"], + "nitrogen": ["Nitrogen", "https://maps.isric.org/mapserv?map=/map/nitrogen.map"], + "phh2o": ["Soil pH in H2O", "https://maps.isric.org/mapserv?map=/map/phh2o.map"], + "sand": ["Sand content", "https://maps.isric.org/mapserv?map=/map/sand.map"], + "silt": ["Silt content", "https://maps.isric.org/mapserv?map=/map/silt.map"], + "soc": ["Soil organic carbon content", "https://maps.isric.org/mapserv?map=/map/soc.map"], + "ocs": ["Soil organic carbon stock", "https://maps.isric.org/mapserv?map=/map/ocs.map"], + "ocd": ["Organic carbon densities", "https://maps.isric.org/mapserv?map=/map/ocd.map"], + } + + def __init__(self, map: str): + self.map = map + try: + _, self.url = self.MAPS[map] + except KeyError: + raise ValueError( + f"Map {map} cannot be found. " + f"The maps available are: all {' '.join(self.MAPS.keys())}." + ) + for retry in range(MAX_RETRIES): + try: + self.wcs = WebCoverageService(self.url, version="2.0.1") # type: ignore + return + except Exception as e: + LOGGER.warning( + f"Exception {e} requesting from {self.url}." + f" Retrying after {RETRY_WAIT_S}s ({retry+1}/{MAX_RETRIES})" + ) + time.sleep(RETRY_WAIT_S) + raise RuntimeError(f"Failed request to {self.url} after {MAX_RETRIES} retries.") + + def get_ids(self) -> List[str]: + return list(self.wcs.contents) # type: ignore + + def download_id(self, id: str, tmpdir: str, input_item: DataVibe) -> Raster: + if id not in self.get_ids(): + raise ValueError( + f"Identifier {id} not found in {self.url}. Identifiers available" + f" are: {' '.join(self.get_ids())}" + ) + bbox = input_item.bbox + subsets = [("long", bbox[0], bbox[2]), ("lat", bbox[1], bbox[3])] + for retry in range(MAX_RETRIES): + try: + response = self.wcs.getCoverage( # type: ignore + identifier=[id], + subsets=subsets, + SUBSETTINGCRS="http://www.opengis.net/def/crs/EPSG/0/4326", + OUTPUTCRS="http://www.opengis.net/def/crs/EPSG/0/4326", + format="image/tiff", + ) + fpath = os.path.join(tmpdir, f"{id}_{gen_guid()}.tif") + with open(fpath, "wb") as file: + file.write(response.read()) + vibe_asset = AssetVibe( + reference=fpath, type=cast(str, mimetypes.guess_type(fpath)[0]), id=gen_guid() + ) + res = Raster( + id=gen_hash_id( + f"soilgrids_{self.map}_{id}", + input_item.geometry, + (datetime(2022, 1, 1), datetime(2022, 1, 1)), # dummy date + ), + time_range=input_item.time_range, + geometry=input_item.geometry, + assets=[vibe_asset], + bands={f"{self.map}:{id}": 0}, + ) + return res + except Exception as e: + LOGGER.warning( + f"Exception {e} downloading {id} from {self.url}." + f" Retrying after {RETRY_WAIT_S}s ({retry+1}/{MAX_RETRIES})" + ) + time.sleep(RETRY_WAIT_S) + raise RuntimeError(f"Failed request for {id} in {self.url} after {MAX_RETRIES} retries.") + + +class CallbackBuilder: + def __init__(self, map: str, identifier: str): + self.tmp_dir = TemporaryDirectory() + self.map = map + self.identifier = identifier + + def __call__(self): + def download_soilgrids( + input_item: DataVibe, + ) -> Dict[str, Raster]: + sg = SoilGridsWS(self.map) + res = sg.download_id(self.identifier, self.tmp_dir.name, input_item) + return {"downloaded_raster": res} + + return download_soilgrids + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_soilgrids/download_soilgrids.yaml b/ops/download_soilgrids/download_soilgrids.yaml new file mode 100644 index 00000000..c52a1b55 --- /dev/null +++ b/ops/download_soilgrids/download_soilgrids.yaml @@ -0,0 +1,17 @@ +name: download_soilgrids +inputs: + input_item: DataVibe +output: + downloaded_raster: Raster +parameters: + map: soc + identifier: soc_0-5cm_mean +dependencies: + parameters: + - map + - identifier +entrypoint: + file: download_soilgrids.py + callback_builder: CallbackBuilder +description: + short_description: Downloads digital soil mapping information from SoilGrids for the input geometry. \ No newline at end of file diff --git a/ops/download_stack_sentinel2/download_stack_s2.py b/ops/download_stack_sentinel2/download_stack_s2.py new file mode 100644 index 00000000..381790fa --- /dev/null +++ b/ops/download_stack_sentinel2/download_stack_s2.py @@ -0,0 +1,190 @@ +import hashlib +import logging +import os +import re +from tempfile import TemporaryDirectory +from typing import Dict, Union + +import geopandas as gpd +import numpy as np +import planetary_computer as pc +from azure.storage.blob import BlobClient +from rasterio.enums import Resampling +from rasterio.features import rasterize + +from vibe_core.data import AssetVibe, gen_guid +from vibe_core.data.sentinel import ( + Sentinel2CloudMask, + Sentinel2Product, + Sentinel2Raster, + discriminator_date, +) +from vibe_lib.planetary_computer import Sentinel2Collection +from vibe_lib.raster import ( + INT_COMPRESSION_KWARGS, + get_profile_from_ref, + open_raster_from_ref, + serial_stack_bands, +) + +LOGGER = logging.getLogger(__name__) + +CLOUD_CATEGORIES = ["NO-CLOUD", "OPAQUE", "CIRRUS", "OTHER"] + +SENTINEL2_SPYNDEX: Dict[str, str] = { + "B01": "A", + "B02": "B", + "B03": "G", + "B04": "R", + "B05": "RE1", + "B06": "RE2", + "B07": "RE3", + "B08": "N", + "B8A": "N2", + "B09": "WV", + "B11": "S1", + "B12": "S2", +} + + +def get_partial_id(product_id: str) -> str: + return "_".join(re.sub(r"_N[\d]{4}_", "_", product_id).split("_")[:-1]) + + +def rasterize_clouds(cloud_gml_ref: str, ref_file: str, out_path: str, product_name: str) -> None: + """ + Rasterize cloud shapes and save compressed tiff file. + """ + with open_raster_from_ref(ref_file) as src: + meta = src.meta + meta.update( + {"nodata": 100, "count": 1, "driver": "GTiff", "dtype": "uint8", **INT_COMPRESSION_KWARGS} + ) + out = np.zeros((meta["height"], meta["width"])) + try: + # The file might not exist, in this case we generate empty cloud masks (no clouds) + if BlobClient.from_blob_url(cloud_gml_ref).exists(): + df = gpd.read_file(cloud_gml_ref, WRITE_GFS="NO") + cloud_map = { + "OPAQUE": CLOUD_CATEGORIES.index("OPAQUE"), + "CIRRUS": CLOUD_CATEGORIES.index("CIRRUS"), + } + values = ( + df["maskType"] # type: ignore + .map(cloud_map) # type: ignore + .fillna(CLOUD_CATEGORIES.index("OTHER")) + ) + rasterize( + ((g, v) for g, v in zip(df["geometry"], values)), # type: ignore + out=out, + transform=meta["transform"], + ) + else: + LOGGER.debug( + f"Cloud GML file is not available for product {product_name}, generating empty mask" + ) + except ValueError: + # Empty file means no clouds + LOGGER.debug( + "ValueError when opening cloud GML file. Assuming there are no clouds and ignoring", + exc_info=True, + ) + pass + with open_raster_from_ref(out_path, "w", **meta) as dst: + dst.write(out, 1) + + +class CallbackBuilder: + def __init__(self, api_key: str, num_workers: int, block_size: int, timeout_s: float): + self.tmp_dir = TemporaryDirectory() + self.api_key = api_key + self.num_workers = num_workers + self.block_size = block_size + self.timeout_s = timeout_s + + def __call__(self): + def callback( + sentinel_product: Sentinel2Product, + ) -> Dict[str, Union[Sentinel2Raster, Sentinel2CloudMask]]: + pc.set_subscription_key(self.api_key) + collection = Sentinel2Collection() + items = collection.query( + roi=sentinel_product.bbox, time_range=sentinel_product.time_range + ) + partial_id = get_partial_id(sentinel_product.product_name) + matches = [item for item in items if get_partial_id(item.id) == partial_id] + if not matches: + raise RuntimeError( + f"Could not find matches for sentinel 2 product " + f"{sentinel_product.product_name}" + ) + if len(matches) > 1: + matches = sorted(matches, key=lambda x: discriminator_date(x.id), reverse=True) + LOGGER.warning( + f"Found {len(matches)} > 1 matches for product " + f"{sentinel_product.product_name}: {', '.join([m.id for m in matches])}. " + f"Picking newest one ({matches[0].id})." + ) + + item = matches[0] + item = pc.sign(item) + LOGGER.debug( + f"Downloading Sentinel-2 bands for product {sentinel_product.product_name}" + ) + band_hrefs = collection.download_item( + item, os.path.join(self.tmp_dir.name, sentinel_product.product_name) + ) + LOGGER.debug( + f"Done downloading Sentinel-2 bands for product {sentinel_product.product_name}" + ) + tiff_args = get_profile_from_ref( + band_hrefs[collection.asset_keys.index("B02")], + count=len(band_hrefs), + nodata=0, + **INT_COMPRESSION_KWARGS, + ) + bands_id = gen_guid() + bands_path = os.path.join(self.tmp_dir.name, f"{bands_id}.tif") + LOGGER.debug(f"Stacking Sentinel-2 bands for product {sentinel_product.product_name}") + serial_stack_bands( + band_hrefs, + bands_path, + block_size=(self.block_size, self.block_size), + resampling=Resampling.bilinear, + **tiff_args, + ) + LOGGER.debug(f"Done stacking bands for product {sentinel_product.product_name}") + + # Adding cloud mask + mask_id = gen_guid() + mask_path = os.path.join(self.tmp_dir.name, f"{mask_id}.tif") + + rasterize_clouds( + collection.get_cloud_mask(item), + bands_path, + mask_path, + sentinel_product.product_name, + ) + band_idx = {name: idx for idx, name in enumerate(collection.asset_keys)} + # Add band aliases for spyndex + for k, v in SENTINEL2_SPYNDEX.items(): + band_idx[v] = band_idx[k] + bands_raster = Sentinel2Raster.clone_from( + sentinel_product, + bands=band_idx, + id=hashlib.sha256(f"stacked bands {sentinel_product.id}".encode()).hexdigest(), + assets=[AssetVibe(reference=bands_path, type="image/tiff", id=bands_id)], + ) + cloud_raster = Sentinel2CloudMask.clone_from( + sentinel_product, + bands={"cloud": 0}, + categories=CLOUD_CATEGORIES, + id=hashlib.sha256(f"clouds {sentinel_product.id}".encode()).hexdigest(), + assets=[AssetVibe(reference=mask_path, type="image/tiff", id=mask_id)], + ) + return {"raster": bands_raster, "cloud": cloud_raster} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_stack_sentinel2/download_stack_sentinel2.yaml b/ops/download_stack_sentinel2/download_stack_sentinel2.yaml new file mode 100644 index 00000000..5ff52288 --- /dev/null +++ b/ops/download_stack_sentinel2/download_stack_sentinel2.yaml @@ -0,0 +1,32 @@ +name: download_stack_sentinel2 +inputs: + sentinel_product: Sentinel2Product +output: + raster: Sentinel2Raster + cloud: Sentinel2CloudMask +parameters: + api_key: "" + block_size: 2048 + num_workers: 20 + timeout_s: 120 +entrypoint: + file: download_stack_s2.py + callback_builder: CallbackBuilder +description: + short_description: Downloads and preprocesses Sentinel-2 products. + long_description: + The op will read the bands from the Planetary Computer, resample them to 10m resolution, and + stack them into a single 12-band TIFF file. A cloud mask obtained from quality indicators is + also generated at 10m resolution. + inputs: + sentinel_product: Product to be downloaded. + output: + raster: + Downloaded product with an asset that contains 12 Sentinel-2 L2A bands at 10m resolution. + cloud: Cloud mask at 10m resolution + parameters: + api_key: Planetary Computer API key. + block_size: Size of blocks that are read by each thread + num_workers: + Number of threads used when reading and resampling data from the Planetary Computer blobs. + timeout_s: Maximum time, in seconds, before a band reading operation times out. diff --git a/ops/download_usda_soils/download_usda_soils.py b/ops/download_usda_soils/download_usda_soils.py new file mode 100644 index 00000000..d31c8bba --- /dev/null +++ b/ops/download_usda_soils/download_usda_soils.py @@ -0,0 +1,68 @@ +import os +import zipfile +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Dict, Optional + +import pandas as pd +import rasterio +from shapely import geometry as shpg +from shapely.geometry import mapping + +from vibe_core.data import AssetVibe, CategoricalRaster, DataVibe +from vibe_core.data.core_types import gen_guid, gen_hash_id +from vibe_core.file_downloader import download_file +from vibe_lib.raster import json_to_asset + + +class CallbackBuilder: + def __init__(self, url: str, zip_file: str, tiff_file: str, meta_file: str): + self.url = url + self.zip_file = zip_file + self.tiff_file = tiff_file + self.meta_file = meta_file + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def download_usda_soils( + input_item: DataVibe, + ) -> Dict[str, Optional[CategoricalRaster]]: + fpath = os.path.join(self.tmp_dir.name, self.zip_file) + ftiff = os.path.join(self.tmp_dir.name, self.tiff_file) + fmeta = os.path.join(self.tmp_dir.name, self.meta_file) + + download_file(self.url, fpath) + with zipfile.ZipFile(fpath) as zf: + with open(ftiff, "wb") as f: + f.write(zf.read(self.tiff_file)) + with open(fmeta, "wb") as f: + f.write(zf.read(self.meta_file)) + + vibe_asset = AssetVibe(reference=ftiff, type="image/tiff", id=gen_guid()) + + with rasterio.open(ftiff) as ds: + geometry = mapping(shpg.box(*ds.bounds)) + + classes = pd.read_table(fmeta, index_col=0) + classes = classes["SOIL_ORDER"] + ":" + classes["SUBORDER"] # type: ignore + classes = {v: k for k, v in classes.to_dict().items()} + + downloaded_raster = CategoricalRaster.clone_from( + input_item, + id=gen_hash_id( + "usda_soil", + geometry, + (datetime(2015, 1, 1), datetime(2015, 12, 31)), # dummy dates + ), + assets=[vibe_asset, json_to_asset(classes, self.tmp_dir.name)], + time_range=input_item.time_range, + geometry=geometry, + bands={"soil_order:suborder": 0}, + categories=list(classes.keys()), + ) + return {"downloaded_raster": downloaded_raster} + + return download_usda_soils + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/download_usda_soils/download_usda_soils.yaml b/ops/download_usda_soils/download_usda_soils.yaml new file mode 100644 index 00000000..9f0c5850 --- /dev/null +++ b/ops/download_usda_soils/download_usda_soils.yaml @@ -0,0 +1,18 @@ +name: download_usda_soils +inputs: + input_item: DataVibe +output: + downloaded_raster: CategoricalRaster +parameters: + url: https://www.nrcs.usda.gov/wps/PA_NRCSConsumption/download?cid=nrcseprd1765433&ext=zip + zip_file: global_soil_regions_geoTIFF.zip + tiff_file: so2015v2.tif + meta_file: 2015_suborders_and_gridcode.txt +entrypoint: + file: download_usda_soils.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - tiff_file +description: + short_description: Downloads a global raster with USDA soil classes at 1/30 degree resolution. \ No newline at end of file diff --git a/ops/ensemble_cloud_prob/ensemble_cloud_prob.py b/ops/ensemble_cloud_prob/ensemble_cloud_prob.py new file mode 100644 index 00000000..50367e97 --- /dev/null +++ b/ops/ensemble_cloud_prob/ensemble_cloud_prob.py @@ -0,0 +1,31 @@ +from tempfile import TemporaryDirectory +from typing import Dict + +import xarray as xr + +from vibe_core.data import Sentinel2CloudProbability, gen_guid +from vibe_lib.raster import load_raster, save_raster_to_asset + + +class CallbackBuilder: + def __init__(self) -> None: + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def ensemble_cloud_prob( + cloud1: Sentinel2CloudProbability, + cloud2: Sentinel2CloudProbability, + cloud3: Sentinel2CloudProbability, + cloud4: Sentinel2CloudProbability, + cloud5: Sentinel2CloudProbability, + ) -> Dict[str, Sentinel2CloudProbability]: + ar = [load_raster(c) for c in (cloud1, cloud2, cloud3, cloud4, cloud5)] + ar = xr.concat(ar, dim="band").mean(dim="band") + asset = save_raster_to_asset(ar, self.tmp_dir.name) + return { + "cloud_probability": Sentinel2CloudProbability.clone_from( + cloud1, id=gen_guid(), assets=[asset] + ) + } + + return ensemble_cloud_prob diff --git a/ops/ensemble_cloud_prob/ensemble_cloud_prob.yaml b/ops/ensemble_cloud_prob/ensemble_cloud_prob.yaml new file mode 100644 index 00000000..cdf67dc5 --- /dev/null +++ b/ops/ensemble_cloud_prob/ensemble_cloud_prob.yaml @@ -0,0 +1,16 @@ +# Compute ensemble cloud probabilities from all 5 models +name: ensemble_cloud_prob +inputs: + cloud1: Sentinel2CloudProbability + cloud2: Sentinel2CloudProbability + cloud3: Sentinel2CloudProbability + cloud4: Sentinel2CloudProbability + cloud5: Sentinel2CloudProbability +output: + cloud_probability: Sentinel2CloudProbability +parameters: +entrypoint: + file: ensemble_cloud_prob.py + callback_builder: CallbackBuilder +description: + short_description: Computes ensemble cloud probabilities from all 5 models. \ No newline at end of file diff --git a/ops/estimate_canopy_cover/estimate_canopy.py b/ops/estimate_canopy_cover/estimate_canopy.py new file mode 100644 index 00000000..504a40cc --- /dev/null +++ b/ops/estimate_canopy_cover/estimate_canopy.py @@ -0,0 +1,70 @@ +from tempfile import TemporaryDirectory +from typing import Any, Dict, List + +import numpy as np +import xarray as xr +from numpy.typing import NDArray +from sklearn.linear_model import Ridge +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import PolynomialFeatures + +from vibe_core.data import Raster +from vibe_lib.raster import get_cmap, json_to_asset, load_raster, save_raster_from_ref + +SUPPORTED_INDICES: Dict[str, Dict[str, NDArray[np.float32]]] = { + "ndvi": { + "coefficients": np.array([[0.0, 0.28480232, 0.8144678, 0.63961434]], dtype=np.float32), + "intercept": np.array([-0.10434419], dtype=np.float32), + }, +} + + +def calibrate(model: Pipeline, index: xr.DataArray): + """ + Calibrate non-masked values, clip to [0, 1] and copy over the geodata from original array + """ + index_masked = index.to_masked_array() + index_compressed = index_masked.compressed() + calibrated = model.predict(index_compressed[:, None]).squeeze().clip(0, 1) # type: ignore + calibrated_masked = index_masked.copy() + calibrated_masked.data[~calibrated_masked.mask] = calibrated + return index.copy(data=calibrated_masked) + + +class CallbackBuilder: + def __init__(self, index: str): + self.tmp_dir = TemporaryDirectory() + if index not in SUPPORTED_INDICES: + raise ValueError(f"Operation estimate_canopy called with unsupported index {index}") + self.index = index + + def __call__(self): + def calibration_callback(index_raster: Raster) -> Raster: + output_dir = self.tmp_dir.name + + # Create model and copy weights + model = make_pipeline(PolynomialFeatures(degree=3), Ridge()) + model[0].fit(np.zeros((1, 1))) + model[1].coef_ = SUPPORTED_INDICES[self.index]["coefficients"].copy() # type: ignore + model[1].intercept_ = SUPPORTED_INDICES[self.index]["intercept"].copy() # type: ignore + index = load_raster(index_raster, use_geometry=True) + calibrated = calibrate(model, index) + + vis_dict: Dict[str, Any] = { + "bands": [0], + "colormap": get_cmap("viridis"), + "range": (0, 1), + } + calibrated_raster = save_raster_from_ref( + calibrated, output_dir, ref_raster=index_raster + ) + calibrated_raster.assets.append(json_to_asset(vis_dict, output_dir)) + return calibrated_raster + + def calibration_callback_list(indices: List[Raster]) -> Dict[str, List[Raster]]: + return {"estimated_canopy_cover": [calibration_callback(index) for index in indices]} + + return calibration_callback_list + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/estimate_canopy_cover/estimate_canopy_cover.yaml b/ops/estimate_canopy_cover/estimate_canopy_cover.yaml new file mode 100644 index 00000000..e9f8f48b --- /dev/null +++ b/ops/estimate_canopy_cover/estimate_canopy_cover.yaml @@ -0,0 +1,13 @@ +name: estimate_canopy_cover +inputs: + indices: List[Raster] +output: + estimated_canopy_cover: List[Raster] +parameters: + index: ndvi +entrypoint: + file: estimate_canopy.py + callback_builder: CallbackBuilder +description: + short_description: + Applies a linear regressor with pre-computed polynomial features on top of the index raster to estimate canopy cover. \ No newline at end of file diff --git a/ops/extract_gedi_rh100/extract_gedi_rh100.py b/ops/extract_gedi_rh100/extract_gedi_rh100.py new file mode 100644 index 00000000..af65d4b8 --- /dev/null +++ b/ops/extract_gedi_rh100/extract_gedi_rh100.py @@ -0,0 +1,104 @@ +import logging +import os +from collections import defaultdict +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, cast + +import geopandas as gpd +import h5py +import numpy as np +from geopandas.array import GeometryArray +from shapely import geometry as shpg +from shapely.geometry.base import BaseGeometry + +from vibe_core.data import AssetVibe, DataVibe, GEDIProduct, GeometryCollection, gen_guid +from vibe_core.data.core_types import BBox + +BEAMS = [ + "BEAM0000", + "BEAM0001", + "BEAM0010", + "BEAM0011", + "BEAM0101", + "BEAM0110", + "BEAM1000", + "BEAM1011", +] +L2B = "GEDI02_B.002" +LOGGER = logging.getLogger(__name__) + + +def extract_dataset(filepath: str, geometry: BaseGeometry, check_quality: bool): + lon_min, lat_min, lon_max, lat_max = cast(BBox, geometry.bounds) + d: Dict[str, List[Any]] = defaultdict(list) + with h5py.File(filepath) as h5: + for b in BEAMS: + lon = cast(h5py.Dataset, h5.get(f"{b}/geolocation/lon_lowestmode"))[()] + lat = cast(h5py.Dataset, h5.get(f"{b}/geolocation/lat_lowestmode"))[()] + bbox_mask = (lon_min <= lon) & (lon <= lon_max) & (lat_min <= lat) & (lat <= lat_max) + if not bbox_mask.any(): + continue + bbox_idx = np.where(bbox_mask)[0] + pts = gpd.points_from_xy(lon[bbox_idx], lat[bbox_idx]) + within = pts.within(geometry) + if not within.any(): + continue + within_idx = np.where(within)[0] + idx = bbox_idx[within_idx] + + if check_quality: + # Filter data by quality flag: 1 = good, 0 = bad + qual = cast(h5py.Dataset, h5.get(f"{b}/l2b_quality_flag"))[idx].astype(bool) + if not qual.any(): + continue + within_idx = within_idx[qual] + idx = idx[qual] + + d["geometry"].extend(cast(GeometryArray, pts[within_idx])) + d["beam"].extend(cast(h5py.Dataset, h5.get(f"{b}/beam"))[idx]) + d["rh100"].extend(cast(h5py.Dataset, h5.get(f"{b}/rh100"))[idx]) + if not d or any(not v for v in d.values()): + return None + df = gpd.GeoDataFrame(d, crs="epsg:4326") # type: ignore + return df + + +class CallbackBuilder: + def __init__(self, check_quality: bool): + self.tmp_dir = TemporaryDirectory() + self.check_quality = check_quality + + def __call__(self): + def callback(gedi_product: GEDIProduct, roi: DataVibe) -> Dict[str, GeometryCollection]: + if gedi_product.processing_level != L2B: + raise ValueError( + f"Processing level must be {L2B}, found {gedi_product.processing_level}" + ) + h5_path = gedi_product.assets[0].local_path + geom = shpg.shape(roi.geometry) + asset_guid = gen_guid() + LOGGER.info(f"Extracting data from hdf5 file {h5_path}") + df = extract_dataset(h5_path, geom, self.check_quality) + if df is not None: + asset_path = os.path.join(self.tmp_dir.name, f"{asset_guid}.gpkg") + LOGGER.info(f"Saving data to {asset_path}") + df.to_file(asset_path, driver="GPKG") + LOGGER.info("All done! Creating GeometryCollection") + + assets = [ + AssetVibe( + reference=asset_path, type="application/geopackage+sqlite3", id=asset_guid + ) + ] + else: + LOGGER.info( + f"No data available in product {gedi_product.product_name} after filtering, " + "creating assetless output" + ) + assets = [] + rh100 = GeometryCollection.clone_from( + gedi_product, geometry=roi.geometry, id=gen_guid(), assets=assets + ) + return {"rh100": rh100} + + return callback diff --git a/ops/extract_gedi_rh100/extract_gedi_rh100.yaml b/ops/extract_gedi_rh100/extract_gedi_rh100.yaml new file mode 100644 index 00000000..0635dbab --- /dev/null +++ b/ops/extract_gedi_rh100/extract_gedi_rh100.yaml @@ -0,0 +1,14 @@ +# Save rh100 data from a GEDI L2B product that intersects with the RoI as a GPKG file +name: extract_gedi_rh100 +inputs: + gedi_product: GEDIProduct + roi: DataVibe +output: + rh100: GeometryCollection +parameters: + check_quality: true +entrypoint: + file: extract_gedi_rh100.py + callback_builder: CallbackBuilder +description: + short_description: Extracts RH100 variables within the region of interest of a GEDIProduct. diff --git a/ops/extract_gedi_rh100/test_extract_gedi_rh100.py b/ops/extract_gedi_rh100/test_extract_gedi_rh100.py new file mode 100644 index 00000000..fba0cbda --- /dev/null +++ b/ops/extract_gedi_rh100/test_extract_gedi_rh100.py @@ -0,0 +1,91 @@ +import os +from datetime import datetime +from pathlib import Path +from typing import cast + +import geopandas as gpd +import h5py +import numpy as np +import pytest +from shapely import geometry as shpg + +from vibe_core.data import AssetVibe, DataVibe, GEDIProduct, GeometryCollection +from vibe_dev.testing.op_tester import OpTester + +HERE = os.path.dirname(os.path.abspath(__file__)) +CONFIG_PATH = os.path.join(HERE, "extract_gedi_rh100.yaml") + +NUM_POINTS = 10 +BEAMS = [ + "BEAM0000", + "BEAM0001", + "BEAM0010", + "BEAM0011", + "BEAM0101", + "BEAM0110", + "BEAM1000", + "BEAM1011", +] +L2B = "GEDI02_B.002" + + +@pytest.fixture +def fake_asset(tmp_path: Path): + beam_value = 0 + filepath = os.path.join(tmp_path.absolute(), "fake.h5") + with h5py.File(filepath, "w") as f: + for b in BEAMS: + beam_value = int(b.replace("BEAM", ""), 2) + f.create_dataset(f"{b}/geolocation/lon_lowestmode", data=np.linspace(0, 2, NUM_POINTS)) + f.create_dataset(f"{b}/geolocation/lat_lowestmode", data=np.linspace(0, 2, NUM_POINTS)) + f.create_dataset(f"{b}/beam", data=beam_value * np.ones(NUM_POINTS)) + f.create_dataset(f"{b}/rh100", data=np.linspace(0, 1, NUM_POINTS) + beam_value) + fake_qual = np.ones(NUM_POINTS) + fake_qual[0] = 0 + f.create_dataset(f"{b}/l2b_quality_flag", data=fake_qual) + return filepath + + +@pytest.mark.parametrize("check_quality", (True, False)) +def test_op(check_quality: bool, fake_asset: str): + now = datetime.now() + x = GEDIProduct( + id="1", + time_range=(now, now), + geometry=shpg.mapping(shpg.box(0, 0, 2, 2)), + product_name="fake_product", + start_orbit=0, + stop_orbit=0, + processing_level=L2B, + assets=[AssetVibe(reference=fake_asset, type="application/x-hdf5", id="fake-id")], + ) + geom = shpg.box(-1, -1, 1, 1) + roi = DataVibe(id="2", time_range=(now, now), geometry=shpg.mapping(geom), assets=[]) + op_tester = OpTester(CONFIG_PATH) + op_tester.update_parameters({"check_quality": check_quality}) + out = op_tester.run(gedi_product=x, roi=roi) + assert "rh100" in out + rh100 = cast(GeometryCollection, out["rh100"]) + assert rh100.geometry == roi.geometry + assert rh100.time_range == x.time_range + + df = gpd.read_file(rh100.assets[0].url) + quality_offset = int(check_quality) + num_points = NUM_POINTS // 2 - quality_offset + assert df.shape[0] == len(BEAMS) * num_points + assert all(isinstance(g, shpg.Point) for g in df.geometry) + assert np.allclose( + df["rh100"], # type: ignore + np.concatenate( + [ + np.linspace(0, 1, NUM_POINTS)[quality_offset : num_points + quality_offset] + + int(b.replace("BEAM", ""), 2) + for b in BEAMS + ] + ), + ) + + # Op breaks with wrong processing level + x.processing_level = "invalid" + with pytest.raises(ValueError): + op_tester.run(gedi_product=x, roi=roi) diff --git a/ops/extract_protein_sequence/extract_protein_sequence.py b/ops/extract_protein_sequence/extract_protein_sequence.py new file mode 100644 index 00000000..bf0418c0 --- /dev/null +++ b/ops/extract_protein_sequence/extract_protein_sequence.py @@ -0,0 +1,48 @@ +import os +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Dict, List + +import pandas as pd +from shapely import geometry as shpg + +from vibe_core.data import AssetVibe, FoodVibe, ProteinSequence, gen_guid + + +def append_nones(length: int, list_: List[str]): + """ + Appends Nones to list to get length of list equal to `length`. + If list is too long raise AttributeError + """ + diff_len = length - len(list_) + if diff_len < 0: + raise AttributeError("Length error list is too long.") + return list_ + [" 0"] * diff_len + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def protein_sequence_callback( + food_item: FoodVibe, + ) -> Dict[str, ProteinSequence]: + protein_list = append_nones(3, food_item.fasta_sequence) + + guid = gen_guid() + filepath = os.path.join(self.tmp_dir.name, f"{guid}.csv") + + df = pd.DataFrame(protein_list, columns=["protein_list"]) + df.to_csv(filepath, index=False) + + protein_sequence = ProteinSequence( + gen_guid(), + time_range=(datetime.now(), datetime.now()), # these are just placeholders + geometry=shpg.mapping(shpg.Point(0, 0)), # this location is a placeholder + assets=[AssetVibe(reference=filepath, type="text/csv", id=guid)], + ) + + return {"protein_sequence": protein_sequence} + + return protein_sequence_callback diff --git a/ops/extract_protein_sequence/extract_protein_sequence.yaml b/ops/extract_protein_sequence/extract_protein_sequence.yaml new file mode 100644 index 00000000..abbcbc0a --- /dev/null +++ b/ops/extract_protein_sequence/extract_protein_sequence.yaml @@ -0,0 +1,9 @@ +name: extract_protein_sequence +inputs: + food_item: FoodVibe +output: + protein_sequence: ProteinSequence +parameters: +entrypoint: + file: extract_protein_sequence.py + callback_builder: CallbackBuilder diff --git a/ops/get_angles/get_angles.py b/ops/get_angles/get_angles.py new file mode 100644 index 00000000..68470285 --- /dev/null +++ b/ops/get_angles/get_angles.py @@ -0,0 +1,275 @@ +import io +import mimetypes +import os +import xml.etree.ElementTree as ET +from datetime import datetime, timedelta +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Sequence, Tuple, cast +from xml.etree.ElementTree import Element, ElementTree + +import numpy as np +import planetary_computer as pc +import requests +import rioxarray as rio # noqa: F401 +import xarray as xr +from numpy.typing import NDArray +from pystac.item import Item +from pystac_client import Client +from rasterio.warp import Resampling +from rioxarray.merge import merge_arrays +from shapely import geometry as shpg +from shapely.geometry.base import BaseGeometry + +from vibe_core.data import AssetVibe, Raster, gen_guid +from vibe_lib.raster import get_crs + +CATALOG_URL = "https://planetarycomputer.microsoft.com/api/stac/v1" +COLLECTION = "sentinel-2-l2a" +DATE_FORMAT = "%Y-%m-%d" + +BBox = Tuple[float, float, float, float] +Angles = Tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray] + + +def query_catalog(roi: BBox, time_range: Tuple[datetime, datetime]): + """ + Query the planetary computer for items that intersect with the desired RoI in the time range + """ + catalog = Client.open(CATALOG_URL) + search = catalog.search( + collections=[COLLECTION], + bbox=roi, + datetime="/".join(i.strftime(DATE_FORMAT) for i in time_range), + ) + items = list(search.get_items()) + return items + + +def get_catalog_items(raster: Raster, tol: timedelta = timedelta(days=5)) -> List[Item]: + """ + Get sentinel2 tiles that intersect with the raster geometry + within a tolerance of the raster datetime + """ + geom = shpg.shape(raster.geometry) + roi = cast(BBox, geom.bounds) + raster_dt = raster.time_range[0] + time_range = (raster_dt - tol, raster_dt + tol) + items = query_catalog(roi, time_range) + # Filter items by closest date + dates = list(set(cast(datetime, item.datetime) for item in items)) + date_distance = cast(NDArray[Any], [abs(raster_dt - d).total_seconds() for d in dates]) + closest_date = dates[np.argmin(date_distance)] + items = [item for item in items if item.datetime == closest_date] + + # Return items necessary to cover all the spatial extent of the raster + return filter_necessary_items(geom, items) + + +def filter_necessary_items(poly: BaseGeometry, items: Sequence[Item]) -> List[Item]: + """ + Greedily filter the items so that only a subset necessary to cover all the raster spatial extent + is returned + """ + + def area_func(item: Item) -> float: + bbox = item.bbox + assert bbox is not None + return -shpg.box(*bbox, ccw=True).intersection(poly).area + + sorted_items = sorted(items, key=area_func) + + # Get item with largest intersection + item = sorted_items[0] + assert item + assert item.bbox is not None + item_box = shpg.box(*item.bbox, ccw=True) + if poly.within(item_box): + return [item] + return [item] + filter_necessary_items(poly - item_box, sorted_items[1:]) + + +def get_xml_data(item: Item) -> ElementTree: + """ + Get granule metadata XML from the planetary computer STAC item + """ + href = item.assets["granule-metadata"].href + signed_href = pc.sign(href) + response = requests.get(signed_href) + return ET.parse(io.BytesIO(response.content)) + + +def parse_grid_params(tree: ElementTree) -> Tuple[float, float, float, float, str]: + """ + Parse center grid coordinates and grid resolution from the metadata XML + """ + res = 10 + height, width = [ + int(cast(str, v.text)) + for node in tree.iter("Size") + if node.attrib["resolution"] == str(res) + for tag in ("NROWS", "NCOLS") + for v in node.iter(tag) + ] + xmin, ymax = [ + int(cast(str, v.text)) + for node in tree.iter("Geoposition") + if node.attrib["resolution"] == str(res) + for tag in ("ULX", "ULY") + for v in node.iter(tag) + ] + + xc = xmin + res * width / 2 + yc = ymax - res * height / 2 + res_x = float(cast(str, next(tree.iter("COL_STEP")).text)) + res_y = -float(cast(str, next(tree.iter("ROW_STEP")).text)) + crs = cast(str, next(tree.iter("HORIZONTAL_CS_CODE")).text) + return xc, yc, res_x, res_y, crs + + +def parse_angle_grids(node: Element) -> NDArray[Any]: + """ + Parse zenith and azimuth grids from XML node + Returns array of shape 2 (zenith, azimuth) x H x W + """ + angles = ( + np.array( + [ + [ + [cast(str, line.text).split(" ") for line in mat.iter("VALUES")] + for mat in node.iter(za) + ] + for za in ["Zenith", "Azimuth"] + ] + ) + .astype(float) + .squeeze() # Get rid of the singleton dimension from node.iter(za) + ) + return angles + + +def get_view_angles(tree: ElementTree) -> Tuple[NDArray[Any], NDArray[Any]]: + """ + Parse view angles from XML tree, join per-band detector grids, then average over bands + """ + grid_list = [ + [ + parse_angle_grids(node) + for node in tree.iter("Viewing_Incidence_Angles_Grids") + if node.attrib["bandId"] == str(bi) + ] + for bi in range(13) + ] + # Band indices x Detector ID x Zenith or Azimuth x H x W + partial_grids = np.array(grid_list) + # Join partial grids from all detectors + n = np.nan_to_num(partial_grids).sum(axis=1) + d = np.isfinite(partial_grids).sum(axis=1) + angles = n / d + # Get the average from all bands + view_zenith_mean, view_azimuth_mean = angles.mean(axis=0) + return view_zenith_mean, view_azimuth_mean + + +def get_sun_angles(tree: ElementTree) -> Tuple[NDArray[Any], NDArray[Any]]: + """ + Parse sun angles from XML tree + """ + node = next(tree.iter("Sun_Angles_Grid")) + sun_zenith, sun_azimuth = parse_angle_grids((node)) + return sun_zenith, sun_azimuth + + +def to_georeferenced_array( + angle_grid: NDArray[Any], center: Tuple[float, float], resolution: Tuple[float, float], crs: str +) -> xr.DataArray: + """""" + height, width = angle_grid.shape + grid_x, grid_y = ( + np.linspace(c - (dim - 1) / 2 * res, c + (dim - 1) / 2 * res, dim) + for c, res, dim in zip(center, resolution, (width, height)) + ) + + array = xr.DataArray(angle_grid[None], {"band": [1], "y": grid_y, "x": grid_x}) + array.rio.set_crs(crs) + return array + + +def get_angles_from_item( + item: Item, +) -> Angles: + """ + Get georeferenced view and sun angle grids by querying planetary computer, + parsing the metadata XML for grid coordinates and values, and joining per-band view grids. + Returns mean view zenith, mean view azimuth, sun zenith, and sun azimuth grids, respectively. + """ + tree = get_xml_data(item) + xc, yc, res_x, res_y, crs = parse_grid_params(tree) + angles = (*get_view_angles(tree), *get_sun_angles(tree)) + # get geospatial grid for these arrays + return cast( + Angles, + tuple( + to_georeferenced_array(angle_grid, (xc, yc), (res_x, res_y), crs) + for angle_grid in angles + ), + ) + + +def get_angles(raster: Raster, tol: timedelta = timedelta(days=5)) -> Angles: + """ + Fetch view and sun angle grids, according to the raster geometry and time range. + Time range is assumed to be one value. The closest visit is used in case there is no samples + for the exact date. In case the geometry spans multiple tiles, the angle grids will be merged. + Grids are reprojected to native tif CRS and clipped according to the geometry. + Angle grid resolution is kept at 5000m. + Returns mean view zenith, mean view azimuth, sun zenith, and sun azimuth grids, respectively. + """ + geom = shpg.shape(raster.geometry) + items = get_catalog_items(raster, tol) + items = filter_necessary_items(geom, items) + angles_list = zip(*(get_angles_from_item(item) for item in items)) + + raster_crs = get_crs(raster) + return cast( + Angles, + tuple( + merge_arrays( + [ + ang.rio.reproject(raster_crs, resampling=Resampling.bilinear, nodata=np.nan) + for ang in angles + ] + ).rio.clip([geom], crs="epsg:4326", all_touched=True) + for angles in angles_list + ), + ) + + +class CallbackBuilder: + def __init__(self, tolerance: int): + self.tmp_dir = TemporaryDirectory() + self.tolerance = timedelta(days=tolerance) + + def __call__(self): + def fcover_callback(raster: Raster) -> Dict[str, Raster]: + angles = xr.concat(get_angles(raster, tol=self.tolerance), dim="band") + uid = gen_guid() + out_path = os.path.join(self.tmp_dir.name, f"{uid}.tif") + angles.rio.to_raster(out_path) + asset = AssetVibe(reference=out_path, type=mimetypes.types_map[".tif"], id=uid) + out_raster = Raster.clone_from( + raster, + id=gen_guid(), + assets=[asset], + bands={ + k: v + for v, k in enumerate( + ["view_zenith", "view_azimuth", "sun_zenith", "sun_azimuth"] + ) + }, + ) + return {"angles": out_raster} + + return fcover_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/get_angles/get_angles.yaml b/ops/get_angles/get_angles.yaml new file mode 100644 index 00000000..756f75b4 --- /dev/null +++ b/ops/get_angles/get_angles.yaml @@ -0,0 +1,10 @@ +name: get_angles +inputs: + raster: Raster +output: + angles: Raster +parameters: + tolerance: 5 +entrypoint: + file: get_angles.py + callback_builder: CallbackBuilder diff --git a/ops/gfs_download/gfs_download.py b/ops/gfs_download/gfs_download.py new file mode 100644 index 00000000..953f53ae --- /dev/null +++ b/ops/gfs_download/gfs_download.py @@ -0,0 +1,81 @@ +import logging +import os +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Dict, List + +from azure.core.exceptions import ResourceNotFoundError +from azure.storage.blob import ContainerClient + +from vibe_core.data import AssetVibe, GfsForecast, gen_forecast_time_hash_id, gen_guid +from vibe_lib.gfs_blob_utils import blob_url_from_offset, get_sas_uri + +LOGGER = logging.getLogger(__name__) + + +def get_noaa_data(time: GfsForecast, output_dir: str, sas_token: str) -> GfsForecast: + """Get the global forecast for the given input time data + + Args: + time: GfsForecast containing forecast publish time and desired forecast time + output_dir: directory in which to save the grib file + sas_token: token used to access Azure blob storage + + Returns: + GfsForecast containing global forecast for the specified time + + Raises: + azure.core.exceptions.ResourceNotFoundError if forecast file cannot be found + """ + container_client: ContainerClient = ContainerClient.from_container_url(get_sas_uri(sas_token)) + publish_time = datetime.fromisoformat(time.publish_time) + forecast_time = time.time_range[0] + forecast_offset = (forecast_time - publish_time).seconds // 3600 + + blob_url = blob_url_from_offset(publish_time, forecast_offset) + grib_file = "{date}T{cycle_runtime:02}-f{offset:03}.grib".format( + date=publish_time.date().isoformat(), + cycle_runtime=publish_time.hour, + offset=forecast_offset, + ) + + file_path = os.path.join(output_dir, grib_file) + + try: + with open(file_path, "wb") as blob_file: + blob_file.write(container_client.download_blob(blob_url).readall()) + except ResourceNotFoundError as e: + # the specified forecast date has no publications + LOGGER.exception("Failed to download blob {}".format(blob_url)) + raise e + + return GfsForecast( + id=gen_forecast_time_hash_id( + "GlobalForecast", time.geometry, publish_time, time.time_range + ), + time_range=time.time_range, + geometry=time.geometry, + assets=[grib_to_asset(file_path)], + publish_time=time.publish_time, + ) + + +def grib_to_asset(file_path: str) -> AssetVibe: + """Convert the given file to an VibeAsset""" + return AssetVibe(reference=file_path, type=None, id=gen_guid()) + + +class CallbackBuilder: + def __init__(self, sas_token: str): + self.sas_token = sas_token + self.temp_dir = TemporaryDirectory() + + def __call__(self): + def get_weather_forecast(time: List[GfsForecast]) -> Dict[str, List[GfsForecast]]: + global_forecast = get_noaa_data(time[0], self.temp_dir.name, self.sas_token) + return {"global_forecast": [global_forecast]} + + return get_weather_forecast + + def __del__(self): + self.temp_dir.cleanup() diff --git a/ops/gfs_download/gfs_download.yaml b/ops/gfs_download/gfs_download.yaml new file mode 100644 index 00000000..1b291fd2 --- /dev/null +++ b/ops/gfs_download/gfs_download.yaml @@ -0,0 +1,12 @@ +name: gfs_download +inputs: + time: List[GfsForecast] +output: + global_forecast: List[GfsForecast] +parameters: + sas_token: "@SECRET(eywa-secrets, noaa-gfs-sas)" +entrypoint: + callback_builder: CallbackBuilder + file: gfs_download.py +description: + short_description: Downloads the global forecast for the given input time. \ No newline at end of file diff --git a/ops/gfs_preprocess/gfs_preprocess.py b/ops/gfs_preprocess/gfs_preprocess.py new file mode 100644 index 00000000..71838008 --- /dev/null +++ b/ops/gfs_preprocess/gfs_preprocess.py @@ -0,0 +1,110 @@ +import logging +from datetime import datetime, time, timedelta, timezone +from typing import Any, Dict, List, Tuple, Union + +from azure.storage.blob import ContainerClient +from shapely.geometry import Point, mapping + +from vibe_core.data import DataVibe, GfsForecast, gen_forecast_time_hash_id, gen_hash_id +from vibe_lib.gfs_blob_utils import blob_url_from_offset, get_sas_uri + +LOGGER = logging.getLogger(__name__) + +# Geometry pointing to Null Island +NULL_ISLAND: Dict[str, Any] = mapping(Point(0, 0)) + +# The number of hours between model cycle runtimes for GFS data +CC_GAP: int = 6 + + +def datetime_to_query_date( + user_input: DataVibe, sas_token: str +) -> Tuple[datetime, Tuple[datetime, datetime]]: + """Gets the most relevant model date and forecast hour of product for the given day and time + + Input: + user_input: EwyaData representing the day and hour of interest + sas_token: token used to access Azure blob storage + + Output: + published_datetime: datetime representing the publish date and + time of the most relevant forecast data + forecast_datetime: datetime representing the date and time reflected in the forecast + """ + container_client: ContainerClient = ContainerClient.from_container_url(get_sas_uri(sas_token)) + # get the forecast for the beginning of the time range in UTC + input_utc = user_input.time_range[0].astimezone(timezone.utc) + now_utc = datetime.now(tz=timezone.utc) + + if input_utc > now_utc: + # forecast is for a future time; get the latest data + publish_date = now_utc + else: + # forecast is for a past time; fetch old forecasts + publish_date = input_utc + + # modify time to be one of 00, 06, 12, 18 hours + time_utc = publish_date.time() + query_hour = (time_utc.hour // CC_GAP) * CC_GAP + + published_datetime = datetime.combine( + publish_date.date(), time.min.replace(hour=query_hour), tzinfo=timezone.utc + ) + + # compute the difference between the forecast publish time and the target forecast time + forecast_offset = int((input_utc - published_datetime).total_seconds() // 3600) + + # Find the most relevant blob + blob_found = False + valid_duration = 1 + while not blob_found: + blob_url = blob_url_from_offset(published_datetime, forecast_offset) + blob_client = container_client.get_blob_client(blob=blob_url) + if blob_client.exists(): + blob_found = True + else: + # Try the previous cycle runtime + published_datetime -= timedelta(hours=CC_GAP) + forecast_offset += CC_GAP + if forecast_offset > 120 and forecast_offset <= 384: + valid_duration = 3 + # forecasts this far into the future are made with 3 hour granularity + forecast_offset -= forecast_offset % 3 + elif forecast_offset > 384: + # forecasts are not made this far out + LOGGER.exception( + "Could not find valid forecast for time {}".format(input_utc.isoformat) + ) + raise RuntimeError("Forecast not found") + + forecast_datetime = published_datetime + timedelta(hours=forecast_offset) + forecast_end = forecast_datetime + timedelta(hours=valid_duration) + return published_datetime, (forecast_datetime, forecast_end) + + +class CallbackBuilder: + def __init__(self, sas_token: str): + self.sas_token = sas_token + + def __call__(self): + def preprocess_input( + user_input: List[DataVibe], + ) -> Dict[str, List[Union[GfsForecast, DataVibe]]]: + publish_time, time_valid = datetime_to_query_date(user_input[0], self.sas_token) + location = user_input[0].geometry + time_data = GfsForecast( + id=gen_forecast_time_hash_id( + "forecast_time", NULL_ISLAND, publish_time, time_valid + ), + time_range=time_valid, + geometry=NULL_ISLAND, + assets=[], + publish_time=publish_time.isoformat(), + ) + location_data = DataVibe( + gen_hash_id("forecast_location", location, time_valid), time_valid, location, [] + ) + + return {"time": [time_data], "location": [location_data]} + + return preprocess_input diff --git a/ops/gfs_preprocess/gfs_preprocess.yaml b/ops/gfs_preprocess/gfs_preprocess.yaml new file mode 100644 index 00000000..912a8df8 --- /dev/null +++ b/ops/gfs_preprocess/gfs_preprocess.yaml @@ -0,0 +1,14 @@ +name: gfs_preprocess +inputs: + user_input: List[DataVibe] +output: + time: List[GfsForecast] + location: List[DataVibe] +parameters: + sas_token: "@SECRET(eywa-secrets, noaa-gfs-sas)" +entrypoint: + callback_builder: CallbackBuilder + file: gfs_preprocess.py +description: + short_description: + Gets the most relevant model date and forecast hour of product for the given input day, time and location. \ No newline at end of file diff --git a/ops/group_rasters_by_geometries/group_rasters_by_geometries.py b/ops/group_rasters_by_geometries/group_rasters_by_geometries.py new file mode 100644 index 00000000..b36a5045 --- /dev/null +++ b/ops/group_rasters_by_geometries/group_rasters_by_geometries.py @@ -0,0 +1,41 @@ +import hashlib +from functools import partial +from typing import Dict, List + +from shapely import geometry as shpg + +from vibe_core.data import DataVibe, Raster, RasterSequence +from vibe_lib.geometry import is_approx_equal + + +def callback( + rasters: List[Raster], group_by: List[DataVibe], threshold: float +) -> Dict[str, List[RasterSequence]]: + ref_bands = rasters[0].bands + if not all(r.bands == ref_bands for r in rasters): + raise ValueError("Expected to group rasters with the same bands") + sequences: List[RasterSequence] = [] + for g in group_by: + matching_rasters: List[Raster] = [] + geom_g = shpg.shape(g.geometry) + for r in rasters: + geom_r = shpg.shape(r.geometry) + if is_approx_equal(geom_r, geom_g, threshold=threshold): + matching_rasters.append(r) + matching_rasters = sorted(matching_rasters, key=lambda x: x.id) + t = [r.time_range[0] for r in matching_rasters] + seq = RasterSequence( + id=hashlib.sha256("".join([r.id for r in matching_rasters]).encode()).hexdigest(), + time_range=(min(t), max(t)), + geometry=g.geometry, + assets=[], + bands=ref_bands, + ) + for r in matching_rasters: + seq.add_item(r) + sequences.append(seq) + return {"raster_groups": sequences} + + +def callback_builder(geom_threshold: float): + return partial(callback, threshold=geom_threshold) diff --git a/ops/group_rasters_by_geometries/group_rasters_by_geometries.yaml b/ops/group_rasters_by_geometries/group_rasters_by_geometries.yaml new file mode 100644 index 00000000..38d26323 --- /dev/null +++ b/ops/group_rasters_by_geometries/group_rasters_by_geometries.yaml @@ -0,0 +1,16 @@ +name: group_rasters_by_geometries +inputs: + rasters: List[Raster] + group_by: List[DataVibe] +output: + raster_groups: List[RasterSequence] +parameters: + geom_threshold: .99 +entrypoint: + file: group_rasters_by_geometries.py + callback_builder: callback_builder +dependencies: + parameters: + - geom_threshold +description: + short_description: Groups input rasters that are contained in the geometry of a reference raster. \ No newline at end of file diff --git a/ops/group_rasters_by_time/group_rasters_by_time.py b/ops/group_rasters_by_time/group_rasters_by_time.py new file mode 100644 index 00000000..2e237cdb --- /dev/null +++ b/ops/group_rasters_by_time/group_rasters_by_time.py @@ -0,0 +1,36 @@ +from functools import partial +from itertools import groupby +from typing import Dict, List + +from vibe_core.data import Raster, RasterSequence +from vibe_core.data.core_types import gen_guid + + +def callback(rasters: List[Raster], criterion: str) -> Dict[str, List[RasterSequence]]: + key_func = { + "day_of_year": lambda x: x.time_range[0].timetuple().tm_yday, + "week": lambda x: x.time_range[0].isocalendar()[1], + "month": lambda x: x.time_range[0].month, + "year": lambda x: x.time_range[0].year, + "month_and_year": lambda x: (x.time_range[0].year, x.time_range[0].month), + } + criterion_func = key_func.get(criterion) + if criterion_func is None: + raise ValueError(f"Invalid group criterion {criterion}") + + res = [] + for key, group in groupby(sorted(rasters, key=criterion_func), criterion_func): + group = list(group) + if isinstance(key, list): + key = "_".join([str(k) for k in key]) + + raster_seq = RasterSequence.clone_from(group[0], f"group_{key}_{gen_guid()}", []) + for r in group: + raster_seq.add_item(r) + res.append(raster_seq) + + return {"raster_groups": res} + + +def callback_builder(criterion: str): + return partial(callback, criterion=criterion) diff --git a/ops/group_rasters_by_time/group_rasters_by_time.yaml b/ops/group_rasters_by_time/group_rasters_by_time.yaml new file mode 100644 index 00000000..040e7f8a --- /dev/null +++ b/ops/group_rasters_by_time/group_rasters_by_time.yaml @@ -0,0 +1,17 @@ +name: group_rasters_by_time +inputs: + rasters: List[Raster] +output: + raster_groups: List[RasterSequence] +parameters: + criterion: month +entrypoint: + file: group_rasters_by_time.py + callback_builder: callback_builder +dependencies: + parameters: + - criterion +description: + short_description: This op groups rasters in time according to 'criterion'. + parameters: + criterion: Criterion to group rasters among "day_of_year", "week", "month", "year", and "month_and_year". diff --git a/ops/group_rasters_by_time/test_group_rasters_by_time.py b/ops/group_rasters_by_time/test_group_rasters_by_time.py new file mode 100644 index 00000000..a380b9f2 --- /dev/null +++ b/ops/group_rasters_by_time/test_group_rasters_by_time.py @@ -0,0 +1,47 @@ +import mimetypes +import os +from datetime import datetime, timedelta +from typing import List, cast + +import pytest +from shapely.geometry import Polygon, box, mapping + +from vibe_core.data import Raster, RasterSequence +from vibe_core.data.core_types import AssetVibe, BaseVibe, gen_guid +from vibe_dev.testing.op_tester import OpTester + +START_DATE = datetime(2022, 1, 1) +NDAYS = 730 # 2 years +EXPECTED = [("day_of_year", 365), ("week", 52), ("month", 12), ("year", 2), ("month_and_year", 24)] + +YAML_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "group_rasters_by_time.yaml") + + +@pytest.mark.parametrize("criterion, expected", EXPECTED) +def test_op(criterion: str, expected: int): + op_tester = OpTester(YAML_PATH) + op_tester.update_parameters({"criterion": criterion}) + + latitude = 42.0 + longitude = 42.0 + buffer = 0.0042 + bbox = [longitude - buffer, latitude - buffer, longitude + buffer, latitude + buffer] + polygon: Polygon = box(*bbox, ccw=True) + + fake_asset = AssetVibe(reference="", type=mimetypes.types_map[".tif"], id="fake_asset") + + rasters = [ + Raster( + id=gen_guid(), + time_range=(START_DATE + timedelta(i), START_DATE + timedelta(i)), + geometry=mapping(polygon), + assets=[fake_asset], + bands={}, + ) + for i in range(NDAYS) + ] + + res = cast( + List[RasterSequence], op_tester.run(rasters=cast(List[BaseVibe], rasters))["raster_groups"] + ) + assert len(res) == expected diff --git a/ops/group_sentinel1_orbits/group_sentinel1_orbits.py b/ops/group_sentinel1_orbits/group_sentinel1_orbits.py new file mode 100644 index 00000000..61b74910 --- /dev/null +++ b/ops/group_sentinel1_orbits/group_sentinel1_orbits.py @@ -0,0 +1,43 @@ +import hashlib +from collections import defaultdict +from typing import Dict, List, Tuple + +from shapely import geometry as shpg +from shapely.ops import unary_union + +from vibe_core.data import Sentinel1Raster, Sentinel1RasterOrbitGroup + + +def make_orbit_group( + items: List[Sentinel1Raster], +) -> Sentinel1RasterOrbitGroup: + # Make sure we are ordered by time make things consistent for the id hash + rasters = sorted(items, key=lambda x: x.time_range[0]) + # Id depends on all component ids + group_id = hashlib.sha256("".join(i.id for i in rasters).encode()).hexdigest() + geom = shpg.mapping(unary_union([shpg.shape(r.geometry) for r in rasters])) + dates = [r.time_range[0] for r in rasters] + time_range = (min(dates), max(dates)) + group = Sentinel1RasterOrbitGroup.clone_from( + rasters[0], id=group_id, assets=[], time_range=time_range, geometry=geom + ) + for r in rasters: + group.add_raster(r) + + return group + + +def callback_builder(): + def group_by_orbit( + rasters: List[Sentinel1Raster], + ) -> Dict[str, List[Sentinel1RasterOrbitGroup]]: + same_orbits: Dict[Tuple[int, str], List[Sentinel1Raster]] = defaultdict(list) + for item in rasters: + orbit_key = (item.orbit_number, item.tile_id) + same_orbits[orbit_key].append(item) + + groups = [make_orbit_group(v) for v in same_orbits.values()] + + return {"raster_groups": groups} + + return group_by_orbit diff --git a/ops/group_sentinel1_orbits/group_sentinel1_orbits.yaml b/ops/group_sentinel1_orbits/group_sentinel1_orbits.yaml new file mode 100644 index 00000000..30db5283 --- /dev/null +++ b/ops/group_sentinel1_orbits/group_sentinel1_orbits.yaml @@ -0,0 +1,13 @@ +name: group_sentinel1_orbits +inputs: + rasters: List[Sentinel1Raster] +output: + raster_groups: List[Sentinel1RasterOrbitGroup] +parameters: +entrypoint: + file: group_sentinel1_orbits.py + callback_builder: callback_builder +description: + short_description: + Groups raster files representing the same tile and moment in time that might + have been partially generated and split due to the movement of Sentinel-1 through base stations. \ No newline at end of file diff --git a/ops/group_sentinel2_orbits/group_sentinel2_orbits.py b/ops/group_sentinel2_orbits/group_sentinel2_orbits.py new file mode 100644 index 00000000..1275c86f --- /dev/null +++ b/ops/group_sentinel2_orbits/group_sentinel2_orbits.py @@ -0,0 +1,66 @@ +import hashlib +from collections import defaultdict +from typing import Dict, List, Tuple, Union, cast + +from shapely import geometry as shpg +from shapely.ops import unary_union + +from vibe_core.data.sentinel import ( + Sentinel2CloudMask, + Sentinel2CloudMaskOrbitGroup, + Sentinel2Raster, + Sentinel2RasterOrbitGroup, + discriminator_date, +) +from vibe_lib.spaceeye.utils import find_s2_product + +TileData = List[Tuple[Sentinel2Raster, Sentinel2CloudMask]] + + +def make_orbit_group( + items: TileData, +) -> Tuple[Sentinel2RasterOrbitGroup, Sentinel2CloudMaskOrbitGroup]: + # Make sure we are ordered by time make things consistent for the id hash + rasters, masks = zip(*sorted(items, key=lambda x: discriminator_date(x[0].product_name))) + rasters = cast(List[Sentinel2Raster], list(rasters)) + masks = cast(List[Sentinel2CloudMask], list(masks)) + # Id depends on all component ids + raster_group_id, cloud_group_id = [ + hashlib.sha256("".join(i.id for i in items).encode()).hexdigest() + for items in (rasters, masks) + ] + geom = shpg.mapping(unary_union([shpg.shape(r.geometry) for r in rasters])) + # dates = [r.time_range[0] for r in rasters] + # time_range = (min(dates), max(dates)) + raster_group = Sentinel2RasterOrbitGroup.clone_from( + rasters[-1], id=raster_group_id, assets=[], geometry=geom + ) + for r in rasters: + raster_group.add_raster(r) + mask_group = Sentinel2CloudMaskOrbitGroup.clone_from( + masks[-1], id=cloud_group_id, assets=[], geometry=geom + ) + for m in masks: + mask_group.add_raster(m) + return raster_group, mask_group + + +def callback_builder(): + def group_by_orbit( + rasters: List[Sentinel2Raster], + masks: List[Sentinel2CloudMask], + ) -> Dict[str, Union[List[Sentinel2RasterOrbitGroup], List[Sentinel2CloudMaskOrbitGroup]]]: + same_orbits: Dict[Tuple[int, str], TileData] = defaultdict(list) + for item in rasters: + orbit_key = (item.orbit_number, item.tile_id) + mask_item = find_s2_product(item.product_name, masks) + same_orbits[orbit_key].append((item, mask_item)) + + groups = [make_orbit_group(v) for v in same_orbits.values()] + raster_groups, mask_groups = zip(*groups) + raster_groups = cast(List[Sentinel2RasterOrbitGroup], list(raster_groups)) + mask_groups = cast(List[Sentinel2CloudMaskOrbitGroup], list(mask_groups)) + + return {"raster_groups": raster_groups, "mask_groups": mask_groups} + + return group_by_orbit diff --git a/ops/group_sentinel2_orbits/group_sentinel2_orbits.yaml b/ops/group_sentinel2_orbits/group_sentinel2_orbits.yaml new file mode 100644 index 00000000..de3df3b3 --- /dev/null +++ b/ops/group_sentinel2_orbits/group_sentinel2_orbits.yaml @@ -0,0 +1,15 @@ +name: group_sentinel2_orbits +inputs: + rasters: List[Sentinel2Raster] + masks: List[Sentinel2CloudMask] +output: + raster_groups: List[Sentinel2RasterOrbitGroup] + mask_groups: List[Sentinel2CloudMaskOrbitGroup] +parameters: +entrypoint: + file: group_sentinel2_orbits.py + callback_builder: callback_builder +description: + short_description: + Groups raster files representing the same tile and moment in time that might + have been partially generated and split due to the movement of Sentinel-2 through base stations. \ No newline at end of file diff --git a/ops/group_tile_sequence/group_s1_tile_sequence.yaml b/ops/group_tile_sequence/group_s1_tile_sequence.yaml new file mode 100644 index 00000000..75f08dc6 --- /dev/null +++ b/ops/group_tile_sequence/group_s1_tile_sequence.yaml @@ -0,0 +1,19 @@ +name: group_s1_tile_sequence +inputs: + rasters: List[Sentinel1Raster] + input_data: List[DataVibe] +output: + tile_sequences: List[Sentinel1RasterTileSequence] +parameters: + tile_geometry: /opt/terravibes/ops/resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml + duration: 48 + overlap: 0.5 +entrypoint: + file: group_tile_sequence.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - duration + - overlap +description: + short_description: Groups Sentinel-1 tiles into time windows of defined duration. \ No newline at end of file diff --git a/ops/group_tile_sequence/group_s2_tile_sequence.yaml b/ops/group_tile_sequence/group_s2_tile_sequence.yaml new file mode 100644 index 00000000..decf7eeb --- /dev/null +++ b/ops/group_tile_sequence/group_s2_tile_sequence.yaml @@ -0,0 +1,19 @@ +name: group_s2_tile_sequence +inputs: + rasters: List[Sentinel2Raster] + input_data: List[DataVibe] +output: + tile_sequences: List[Sentinel2RasterTileSequence] +parameters: + tile_geometry: /opt/terravibes/ops/resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml + duration: 48 + overlap: 0.5 +entrypoint: + file: group_tile_sequence.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - duration + - overlap +description: + short_description: Groups Sentinel-2 tiles into time windows of defined duration. \ No newline at end of file diff --git a/ops/group_tile_sequence/group_s2cloudmask_tile_sequence.yaml b/ops/group_tile_sequence/group_s2cloudmask_tile_sequence.yaml new file mode 100644 index 00000000..b26c983f --- /dev/null +++ b/ops/group_tile_sequence/group_s2cloudmask_tile_sequence.yaml @@ -0,0 +1,19 @@ +name: group_s2cloudmask_tile_sequence +inputs: + rasters: List[Sentinel2CloudMask] + input_data: List[DataVibe] +output: + tile_sequences: List[Sentinel2CloudMaskTileSequence] +parameters: + tile_geometry: /opt/terravibes/ops/resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml + duration: 48 + overlap: 0.5 +entrypoint: + file: group_tile_sequence.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - duration + - overlap +description: + short_description: Groups Sentinel-2 cloud masks into time windows of defined duration. \ No newline at end of file diff --git a/ops/group_tile_sequence/group_tile_sequence.py b/ops/group_tile_sequence/group_tile_sequence.py new file mode 100644 index 00000000..d0fc8bb2 --- /dev/null +++ b/ops/group_tile_sequence/group_tile_sequence.py @@ -0,0 +1,205 @@ +import hashlib +import logging +from collections import defaultdict +from datetime import timedelta +from typing import Dict, List, Tuple, cast + +import fiona +import geopandas as gpd +from shapely import geometry as shpg +from shapely.geometry.base import BaseGeometry + +from vibe_core.data import BBox, DataVibe, TimeRange +from vibe_core.data.sentinel import ListTileData, Tile2Sequence, TileData, TileSequenceData +from vibe_lib.spaceeye.dataset import get_read_intervals, get_write_intervals + +LOGGER = logging.getLogger(__name__) +KML_DRIVER_NAMES = "kml KML libkml LIBKML".split() + + +def gen_sequence_id( + items: ListTileData, + geom: BaseGeometry, + read_time_range: TimeRange, + write_time_range: TimeRange, +): + """Generate the id for a Tile Sequence, considering all rasters in the sequence""" + id = hashlib.sha256( + "".join( + [i.id for i in items] + + [geom.wkt] + + [ + t.isoformat() + for time_range in (read_time_range, write_time_range) + for t in time_range + ] + ).encode() + ).hexdigest() + return id + + +def group_rasters(rasters: ListTileData, input_data: List[DataVibe], tile_dfs: gpd.GeoDataFrame): + """Group rasters covering the same region (intersection between input geometry and a tile)""" + sequences: Dict[Tuple[str, BBox], ListTileData] = defaultdict(list) + sequences_geom: Dict[Tuple[str, BBox], BaseGeometry] = defaultdict() + sequences_time_range: Dict[Tuple[str, BBox], TimeRange] = defaultdict() + + # Iterate over all rasters that cover the input geometries + for item in rasters: + tile_id = item.tile_id + tile_geom = tile_dfs.loc[tile_dfs["Name"] == tile_id]["geometry"].iloc[0] # type: ignore + tile_start_date = item.time_range[0] + + # For now, we only consider a single geometry within input_data. In the future, + # we might allow multiple geometries, so this already covers that. + for input_geom in input_data: + # We are interested in the intersection between tile geom and input geometry + # for all tiles captured within the time range of the input geometry + geom = shpg.shape(input_geom.geometry) + start_date, end_date = input_geom.time_range + + if (start_date <= tile_start_date <= end_date) and geom.intersects(tile_geom): + intersected_geom = geom.intersection(tile_geom) + + # Use tile id and bounding box of intersecting region as keys + sequence_key = (item.tile_id, tuple(intersected_geom.bounds)) + sequences[sequence_key].append(item) + sequences_geom[sequence_key] = intersected_geom + sequences_time_range[sequence_key] = input_geom.time_range + + return sequences, sequences_geom, sequences_time_range + + +def make_tile_sequence( + items: ListTileData, + seq_geom: BaseGeometry, + read_time_range: TimeRange, + write_time_range: TimeRange, + ref_item: TileData, +) -> TileSequenceData: + """Create a TileSequenceData from the list of rasters and a sequence geometry""" + # Make sure we are ordered by time make things consistent for the id hash + sequence_type = Tile2Sequence[type(ref_item)] + sorted_items = sorted(items, key=lambda x: x.time_range[0]) + + # Generate sequence metadata + sequence_id = gen_sequence_id(sorted_items, seq_geom, read_time_range, write_time_range) + + # Create sequence object + sequence = sequence_type.clone_from( + ref_item, + id=sequence_id, + assets=[], + geometry=shpg.mapping(seq_geom), + time_range=read_time_range, + write_time_range=write_time_range, + product_name="", + orbit_number=-1, + relative_orbit_number=-1, + orbit_direction="", + platform="", + ) + + for r in sorted_items: + sequence.add_item(r) + + return sequence + + +def make_chip_sequences( + items: ListTileData, + seq_geom: BaseGeometry, + seq_time_range: TimeRange, + duration: int, + step: int, +) -> List[TileSequenceData]: + ref_item = items[0] + time_length = (seq_time_range[1] - seq_time_range[0]).days + 1 + if time_length < duration: + LOGGER.warning(f"Time length of {time_length} days is smaller than chip length {duration}") + offset = (time_length - duration) // 2 + time_length = duration + else: + offset = 0 + + read_intervals = list(zip(*get_read_intervals(time_length, duration, step, 0))) + write_intervals = list(zip(*get_write_intervals(time_length, duration, step, 0)[0])) + + sequences = [] + for read_interval, write_interval in zip(read_intervals, write_intervals): + start, end = (seq_time_range[0] + timedelta(days=int(i) + offset) for i in read_interval) + interval_items = [i for i in items if start <= i.time_range[0] < end] + if not interval_items: + LOGGER.warning( + f"Time interval {start.strftime('%Y-%m-%d')} - {end.strftime('%Y-%m-%d')} has no " + f"available data of type {type(ref_item)} for tile_id={ref_item.tile_id}, " + f"geometry={shpg.mapping(seq_geom)}" + ) + write_dates = ( + seq_time_range[0] + timedelta(days=int(write_interval[0]) + offset), + seq_time_range[0] + timedelta(days=int(write_interval[1]) + offset - 1), # type: ignore + ) + # Use end - 1 because our date range is closed at the end and our index range is not + sequences.append( + make_tile_sequence( + interval_items, + seq_geom, + (start, end - timedelta(days=1)), + write_dates, + ref_item, + ) + ) + + return sequences + + +class CallbackBuilder: + def __init__(self, tile_geometry: str, duration: int, overlap: float): + self.tile_geometry = tile_geometry + self.duration = duration + if duration <= 0: + raise ValueError(f"Duration must be larger than 0, found {duration}") + if overlap <= 0 or overlap > 1: + raise ValueError(f"Overlap value must be in range [0, 1), found {overlap}") + self.overlap = overlap + + def __call__(self): + def group_by_tile_geom( + rasters: ListTileData, input_data: List[DataVibe] + ) -> Dict[str, List[TileSequenceData]]: + # List the tiles for which we have products + tile_ids = set(p.tile_id for p in rasters) + + # Read tile geometry and filter for those that we have products + # Make fiona read the file: https://gis.stackexchange.com/questions/114066/ + for driver in KML_DRIVER_NAMES: + fiona.drvsupport.supported_drivers[driver] = "rw" # type: ignore + tile_dfs = gpd.read_file(self.tile_geometry) + # Filter only tiles for which we have products + tile_dfs = cast( + gpd.GeoDataFrame, + tile_dfs[tile_dfs["Name"].isin(tile_ids)], # type: ignore + ) + + # Group rasters by tile_id and geometry + sequences, sequences_geom, sequences_time_range = group_rasters( + rasters, input_data, tile_dfs + ) + + # Create TileSequenceData for each group + step = int(self.duration * self.overlap) + grouped_sequences = [ + group + for k in sequences.keys() + for group in make_chip_sequences( + sequences[k], + sequences_geom[k], + sequences_time_range[k], + self.duration, + step, + ) + ] + + return {"tile_sequences": grouped_sequences} + + return group_by_tile_geom diff --git a/ops/heatmap_sensor/soil_sample_heatmap_using_classification.py b/ops/heatmap_sensor/soil_sample_heatmap_using_classification.py new file mode 100644 index 00000000..3d15e4a6 --- /dev/null +++ b/ops/heatmap_sensor/soil_sample_heatmap_using_classification.py @@ -0,0 +1,192 @@ +from tempfile import TemporaryDirectory +from typing import Any, Dict, Optional, cast + +import geopandas as gpd +import numpy as np +import rasterio +from geopandas.geodataframe import GeoDataFrame, GeoSeries +from pyproj.crs import crs +from rasterio.features import sieve +from rasterio.io import DatasetReader +from rasterio.mask import mask +from shapely.geometry import shape +from sklearn.ensemble import RandomForestClassifier +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder, StandardScaler + +from vibe_core.data import DataVibe, gen_hash_id +from vibe_core.data.core_types import GeometryCollection +from vibe_core.data.rasters import Raster +from vibe_lib.shapefile import write_shapefile + + +class CallbackBuilder: + def __init__( + self, + attribute_name: str, + buffer: int, + bins: int, + simplify: str, + tolerance: float, + data_scale: bool, + max_depth: int, + n_estimators: int, + random_state: int, + ): + self.temp_dir = TemporaryDirectory() + self.attribute_name = attribute_name + self.buffer = buffer + self.bins = bins + self.simplify = simplify + self.tolerance = tolerance + self.data_scale = data_scale + self.max_depth = max_depth + self.n_estimators = n_estimators + self.random_state = random_state + + def create_heatmap(self, raster: Raster, samples: GeometryCollection) -> DataVibe: + # Read and filter GeoDataFrame using input attribute name + samples_df = cast( + gpd.GeoDataFrame, + gpd.read_file(samples.assets[0].url), + ) + samples_df = cast(GeoDataFrame, samples_df[["geometry", self.attribute_name]]) + assert samples_df.crs, "samples dataframe has no CRS" + + # Train Model + model, le, scaler = self.train_classifier( + raster_path=raster.raster_asset.url, + samples=cast(GeoDataFrame, samples_df), + ) + + # Predict + assetVibe = self.predict_classifier( + model=model, + raster_path=raster.raster_asset.url, + label_encoder=le, + scaler=scaler, + farm_boundary=samples.geometry, + samples_crs=samples_df.crs, + ) + + return DataVibe( + gen_hash_id("heatmap_nutrients", raster.geometry, raster.time_range), + raster.time_range, + raster.geometry, + [assetVibe], + ) + + def predict_classifier( + self, + model: RandomForestClassifier, + raster_path: str, + label_encoder: LabelEncoder, + scaler: Optional[StandardScaler], + farm_boundary: Dict[str, Any], + samples_crs: crs.CRS, + ): + # Read input raster and clip it to farm boundary + with rasterio.open(raster_path) as src: + p = GeoSeries([shape(farm_boundary)], crs=samples_crs).to_crs(src.crs)[0] + index_out, tr = mask(src, [p], crop=True, nodata=0) + crs = src.crs + mask1 = (index_out != 0).any(axis=0) + index_out = index_out[0] + + index_out[np.isnan(index_out)] = 0 + index_out[index_out == np.inf] = 0 + s = index_out.reshape(-1, 1) + + # scale indexes + if scaler is not None: + s = scaler.transform(s) + + # predict and perform inverse transform + ck = model.predict(s) + ck = sieve(ck.reshape(index_out.shape).astype(np.int32), self.bins, mask=mask1) + ck = label_encoder.inverse_transform(ck.reshape(-1)) + out_ = ck.reshape(index_out.shape) # type: ignore + out = out_ * mask1.astype(np.int32) + + asset = write_shapefile( + out, + crs, + tr, + mask1, + self.temp_dir.name, + self.simplify, + self.tolerance, + "cluster", + ) + return asset + + def get_train_data(self, samples: GeoDataFrame, raster: DatasetReader): + x_, y_, height = [], [], -1 + for _, row in samples.iterrows(): + # clip raster to field boundary + x, _ = mask(raster, [row["geometry"]], crop=True, nodata=0, filled=True) + x = x[0] + + x[np.isnan(x)] = 0 + height = x.shape + x_.extend(x.reshape(-1, 1)) + + y_.extend((np.ones(height) * row[self.attribute_name]).reshape(-1, 1)) # type: ignore + + # Scale the data + scaler = None + x = x_ + if self.data_scale: + scaler = StandardScaler() + x = scaler.fit_transform(x_) # type: ignore + + # assign data to bins + intervals = np.histogram(y_, bins=self.bins)[1] + intervals[0] = -1 + index = np.searchsorted(intervals, y_) - 1 + y = np.zeros(len(y_)).reshape(index.shape) + + for i in range(len(intervals)): + y[index == i] = np.array(y_)[index == i].mean() + + y = y.reshape(-1) + + # encode labels + le = LabelEncoder() + y = le.fit_transform(y) + + return x, y, le, scaler + + def train_classifier( + self, + raster_path: str, + samples: GeoDataFrame, + ): + # read input files + raster_obj = rasterio.open(raster_path, "r") + + # create grid from sample distance + samples = cast(GeoDataFrame, samples.to_crs(raster_obj.crs)) # type: ignore + samples["geometry"] = cast(GeoSeries, samples["geometry"]).buffer(self.buffer, cap_style=3) + + x, y, le, scaler = self.get_train_data(samples=samples, raster=raster_obj) + + # train model + x_train, _, y_train, _ = train_test_split(x, y, test_size=0.2) + model = RandomForestClassifier( + n_estimators=self.n_estimators, + max_depth=self.max_depth, + random_state=self.random_state, + ) + model.fit(x_train, y_train) + return model, le, scaler + + def __call__(self): + def create_heatmap_init(raster: Raster, samples: GeometryCollection) -> Dict[str, DataVibe]: + out_vibe = self.create_heatmap(raster, samples) + return {"result": out_vibe} + + return create_heatmap_init + + def __del__(self): + self.temp_dir.cleanup() diff --git a/ops/heatmap_sensor/soil_sample_heatmap_using_classification.yaml b/ops/heatmap_sensor/soil_sample_heatmap_using_classification.yaml new file mode 100644 index 00000000..e43ff4db --- /dev/null +++ b/ops/heatmap_sensor/soil_sample_heatmap_using_classification.yaml @@ -0,0 +1,32 @@ +name: soil_sample_heatmap_using_classification +inputs: + raster: Raster + samples: GeometryCollection +output: + result: DataVibe +parameters: + attribute_name: "C" + buffer: 3 + bins: 4 + simplify: "simplify" + tolerance: 1.0 + data_scale: False + max_depth: 50 + n_estimators: 25 + random_state: 100 +entrypoint: + callback_builder: CallbackBuilder + file: soil_sample_heatmap_using_classification.py +dependencies: + parameters: + - attribute_name + - buffer + - bins + - simplify + - tolerance + - data_scale + - max_depth + - n_estimators + - random_state +description: + short_description: Generate heatmap for nutrients using satellite or spaceEye imagery. diff --git a/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.py b/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.py new file mode 100644 index 00000000..59ba93f7 --- /dev/null +++ b/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.py @@ -0,0 +1,216 @@ +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Tuple, cast + +import geopandas as gpd +import numpy as np +import rasterio +from geopandas import GeoDataFrame +from numpy.typing import NDArray +from rasterio import Affine, features +from rasterio.crs import CRS +from rasterio.enums import MergeAlg +from rasterio.mask import mask +from shapely.geometry import Polygon, shape + +from vibe_core.data import DataVibe, gen_hash_id +from vibe_core.data.core_types import AssetVibe, GeometryCollection +from vibe_core.data.rasters import Raster +from vibe_lib.geometry import create_mesh_grid +from vibe_lib.heatmap_neighbor import ( + run_cluster_overlap, + run_kriging_model, + run_nearest_neighbor, +) +from vibe_lib.raster import write_to_raster +from vibe_lib.shapefile import write_shapefile + + +class CallbackBuilder: + def __init__( + self, + attribute_name: str, + simplify: str, + tolerance: float, + algorithm: str, + resolution: int, + bins: int, + ): + self.temp_shapefile_dir = TemporaryDirectory() + self.temp_tiff_dir = TemporaryDirectory() + self.attribute_name = attribute_name + self.simplify = simplify + self.tolerance = tolerance + self.algorithm = algorithm + self.resolution = resolution + self.bins = bins + + def create_heatmap( + self, + raster: Raster, + samples: GeometryCollection, + samples_boundary: GeometryCollection, + ) -> DataVibe: + with rasterio.open(raster.assets[0].path_or_url) as src: + self.raster_crs = src.crs + # Get reduced samples + samples_df = gpd.read_file(samples.assets[0].url) + samples_df = cast(GeoDataFrame, samples_df[["geometry", self.attribute_name]]) + # Get reduced sample boundaries (clusters) + samples_boundary_df = cast( + GeoDataFrame, + gpd.read_file(samples_boundary.assets[0].url), + ) + samples_boundary_df = cast(GeoDataFrame, samples_boundary_df[["geometry"]]) + boundary = cast(Polygon, shape(samples.geometry)) + # Get mesh grid geo locations for farm boundary + geo_locations = create_mesh_grid(boundary, self.resolution, self.raster_crs) + # Run nutrient algorithm and create heatmap + farm_boundary_df = GeoDataFrame(geometry=[boundary], crs=4326) # type: ignore + nutrients_df = self.run_algorithm(samples_df, samples_boundary_df, geo_locations) + assetVibe = self.generate_samples_heat_map( + nutrients_df, raster.assets[0].url, farm_boundary_df + ) + return DataVibe( + gen_hash_id( + f"heatmap_nutrients_{self.attribute_name}", + raster.geometry, + raster.time_range, + ), + raster.time_range, + raster.geometry, + assetVibe, + ) + + def run_algorithm( + self, + samples_df: GeoDataFrame, + samples_boundary_df: GeoDataFrame, + geo_locations: GeoDataFrame, + ) -> GeoDataFrame: + if self.algorithm == "cluster overlap": + return run_cluster_overlap( + attribute_name=self.attribute_name, + reduced_samples=samples_df, + minimum_sample_polygons=samples_boundary_df, + geo_locations=geo_locations, + ) + elif self.algorithm == "nearest neighbor": + return run_nearest_neighbor( + attribute_name=self.attribute_name, + reduced_samples=samples_df, + geo_locations=geo_locations, + ) + elif self.algorithm == "kriging neighbor": + return run_kriging_model( + attribute_name=self.attribute_name, + reduced_samples=samples_df, + geo_locations=geo_locations, + ) + else: + raise RuntimeError(f"Unknown algorithm: {self.algorithm}") + + def rasterize_heatmap( + self, + shapes: Tuple[Any], + ar: NDArray[Any], + tr: Affine, + raster_mask: NDArray[Any], + ): + # Rasterize the nutrient boundaries + raster_output = features.rasterize( + shapes=shapes, + out_shape=ar[0].shape, + transform=tr, + all_touched=True, + fill=-1, # background value + merge_alg=MergeAlg.replace, + dtype=rasterio.float32, + ) + raster_output[ar.sum(axis=0) == 0] = 0 + out_path = os.path.join(self.temp_tiff_dir.name, "raster_output.tif") + raster_output = self.group_to_nearest(raster_output, raster_mask) + out = raster_output * raster_mask.astype(np.uint16) + asset_vibe = write_to_raster(out, tr, out_path, self.raster_crs) + return out, asset_vibe + + def group_to_nearest(self, raster_output: NDArray[Any], raster_mask: NDArray[Any]): + raster_output[raster_output <= 0] = raster_output[raster_output > 0].mean() + + intervals = np.histogram(raster_output[raster_mask], bins=self.bins)[1] + intervals[0] = -1 + index = np.searchsorted(intervals, raster_output) - 1 + out_grouped_raster = np.zeros(raster_output.shape) + + for i in range(len(intervals)): + out_grouped_raster[index == i] = raster_output[index == i].mean() + + return out_grouped_raster + + def generate_samples_heat_map( + self, + nutrients_df: GeoDataFrame, + src_image_path: str, + farm_boundary_df: GeoDataFrame, + ) -> List[AssetVibe]: + with rasterio.open(src_image_path, "r") as o_raster: + # change spatial projection of inputs matching to sentinel image + nutrients_df = cast(GeoDataFrame, nutrients_df.to_crs(o_raster.crs)) + farm_boundary_df = cast(GeoDataFrame, farm_boundary_df.to_crs(o_raster.crs)) + # create mask for farm boundary + if not farm_boundary_df.empty: + boundary = farm_boundary_df[:1].geometry[0] # type: ignore + ar, tr = mask(o_raster, [boundary], crop=True, nodata=0) + mask1 = (ar != 0).any(axis=0) + shapes = [] + # collect shapes for rasterization + nutrients_df["geometry"] = nutrients_df.buffer(self.resolution, cap_style=3) + nutrients_df["shapes"] = nutrients_df.apply( + lambda row: (row.geometry, row[self.attribute_name]), axis=1 + ) + if not nutrients_df.empty: + shapes = tuple(nutrients_df["shapes"].values) # type: ignore + # rasterize shapes + out, raster_vibe = self.rasterize_heatmap(shapes, ar, tr, mask1) + shape_vibe = self.export_to_shapeFile(out, o_raster.crs, tr, mask1) + + vibes = [shape_vibe, raster_vibe] + return vibes + + raise RuntimeError("Model didn't identified nutrient locations") + + raise RuntimeError("No farm boundary found") + + def export_to_shapeFile( + self, + data: NDArray[Any], + crs: CRS, + tr: Affine, + mask1: NDArray[Any], + ): + asset = write_shapefile( + data, + crs, + tr, + mask1, + self.temp_shapefile_dir.name, + self.simplify, + self.tolerance, + "cluster", + ) + return asset + + def __call__(self): + def create_heatmap_init( + raster: Raster, + samples: GeometryCollection, + samples_boundary: GeometryCollection, + ) -> Dict[str, DataVibe]: + out_vibe = self.create_heatmap(raster, samples, samples_boundary) + return {"result": out_vibe} + + return create_heatmap_init + + def __del__(self): + self.temp_shapefile_dir.cleanup() + self.temp_tiff_dir.cleanup() diff --git a/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.yaml b/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.yaml new file mode 100644 index 00000000..17f247a3 --- /dev/null +++ b/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.yaml @@ -0,0 +1,27 @@ +name: soil_sample_heatmap_using_neighbors +inputs: + raster: Raster + samples: GeometryCollection + samples_boundary: GeometryCollection +output: + result: DataVibe +parameters: + attribute_name: "C" + simplify: "simplify" + tolerance: 1.0 + algorithm: "nearest neighbor" + resolution: 5 # in meters + bins: 5 +entrypoint: + callback_builder: CallbackBuilder + file: soil_sample_heatmap_using_neighbors.py +dependencies: + parameters: + - attribute_name + - simplify + - tolerance + - algorithm + - resolution + - bins +description: + short_description: Generate heatmap for nutrients using satellite or spaceEye imagery. diff --git a/ops/heatmap_sensor/test_soil_cluster_sample_heatmap_using_neighbors.py b/ops/heatmap_sensor/test_soil_cluster_sample_heatmap_using_neighbors.py new file mode 100644 index 00000000..62b54d0a --- /dev/null +++ b/ops/heatmap_sensor/test_soil_cluster_sample_heatmap_using_neighbors.py @@ -0,0 +1,206 @@ +import os +import time +from datetime import datetime +from typing import Any, Dict, Union, cast + +import geopandas as gpd +import pytest +from shapely import geometry as shpg +from shapely.geometry import MultiPolygon, Polygon + +from vibe_core.client import FarmvibesAiClient, get_default_vibe_client +from vibe_core.data import ADMAgSeasonalFieldInput, DataVibe, ExternalReferenceList +from vibe_core.data.core_types import GeometryCollection +from vibe_core.data.rasters import Raster +from vibe_dev.testing.op_tester import OpTester + +FAKE_TIME_RANGE = (datetime(2022, 6, 30), datetime(2022, 7, 2)) +CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "soil_sample_heatmap_using_neighbors.yaml", +) + + +@pytest.fixture +def vibe_client(): + return get_default_vibe_client() + + +@pytest.fixture +def vibe_geometry_dict() -> Dict[str, Any]: + farm_boundary = "op_resources/heatmap_sensor/long_block_boundary_4326.geojson" + data_frame = gpd.read_file(farm_boundary) + geometry = shpg.mapping(data_frame["geometry"][0]) # type: ignore + return geometry + + +@pytest.fixture +def vibe_geometry_shapely() -> Union[MultiPolygon, Polygon]: + farm_boundary = "op_resources/heatmap_sensor/long_block_boundary_4326.geojson" + data_frame = gpd.read_file(farm_boundary) + if not data_frame.empty: + geometry = data_frame["geometry"][0] # type: ignore + return cast(MultiPolygon, geometry) + + raise RuntimeError("Geometry is None") + + +@pytest.fixture +def download_sentinel_cluster( + vibe_client: FarmvibesAiClient, vibe_geometry_shapely: Union[MultiPolygon, Polygon] +) -> Raster: + run = vibe_client.run( + workflow="data_ingestion/sentinel2/preprocess_s2", + name="sentinel2_example", + geometry=vibe_geometry_shapely, + time_range=(datetime(2022, 6, 30), datetime(2022, 7, 2)), + ) + + while run.status == "running" or run.status == "pending": + continue + time.sleep(5) + if run.status == "done": + obj: Raster = run.output["raster"][0] # type: ignore + return obj + + raise RuntimeError("Download Raster request failed") + + +@pytest.fixture +def download_index_cluster( + vibe_client: FarmvibesAiClient, download_sentinel_cluster: Raster, index: str +) -> Raster: + parameters = {"index": index} + + run = vibe_client.run( + workflow="data_processing/index/index", + name="EVI_example", + input_data=download_sentinel_cluster, + parameters=parameters, + ) + + while run.status == "running" or run.status == "pending": + continue + time.sleep(5) + if run.status == "done": + obj: Raster = run.output["index_raster"][0] # type: ignore + return obj + + raise RuntimeError("Download Raster request failed") + + +@pytest.fixture +def download_samples_cluster( + vibe_client: FarmvibesAiClient, vibe_geometry_dict: Dict[str, Any] +) -> GeometryCollection: + geojson_url = "" + url_hash = str(hash(geojson_url)) + now = datetime.now() + + inputs = ExternalReferenceList( + id=url_hash, + time_range=(now, now), + geometry=vibe_geometry_dict, + assets=[], + urls=[geojson_url], + ) + run = vibe_client.run( + workflow="data_ingestion/user_data/ingest_geometry", + name="geometry_example", + input_data=inputs, + ) + + while run.status == "running" or run.status == "pending": + continue + time.sleep(5) + if run.status == "done": + obj: GeometryCollection = run.output["geometry"][0] # type: ignore + return obj + + raise RuntimeError("Download samples cluster request failed - ") + + +@pytest.fixture +def download_samples_boundary( + vibe_client: FarmvibesAiClient, vibe_geometry_dict: Dict[str, Any] +) -> GeometryCollection: + geojson_url = "" + url_hash = str(hash(geojson_url)) + now = datetime.now() + + inputs = ExternalReferenceList( + id=url_hash, + time_range=(now, now), + geometry=vibe_geometry_dict, + assets=[], + urls=[geojson_url], + ) + run = vibe_client.run( + workflow="data_ingestion/user_data/ingest_geometry", + name="geometry_example", + input_data=inputs, + ) + + while run.status == "running" or run.status == "pending": + continue + time.sleep(5) + if run.status == "done": + obj: GeometryCollection = run.output["geometry"][0] # type: ignore + return obj + + raise RuntimeError("Download samples boundary request failed - ") + + +@pytest.fixture +def data_vibe(vibe_geometry_dict: Dict[str, Any]): + id = str(hash("test_minimums_samples_heatmap")) + return DataVibe(id, FAKE_TIME_RANGE, vibe_geometry_dict, []) + + +@pytest.mark.skip(reason="Dependent on the cluster") +def test_heatmap_c( + download_sentinel_cluster: Raster, + download_samples_cluster: GeometryCollection, + download_samples_boundary: GeometryCollection, +): + op_ = OpTester(CONFIG_PATH) + parameters = {"attribute_name": "C", "simplify": "simplify", "tolerance": 1.0} + op_.update_parameters(parameters) + output_data = op_.run( + raster=download_sentinel_cluster, + samples=download_samples_cluster, + samples_boundary=download_samples_boundary, + ) + + # Get op result + assert "result" in output_data + + +@pytest.fixture +def prescriptions(vibe_client: FarmvibesAiClient): + parameters = { + "base_url": "base_url", + "client_id": "client_id", + "client_secret": "client_secret", + "authority": "authority", + "default_scope": "default_scope", + } + sample_inputs = ADMAgSeasonalFieldInput( + party_id="a460c833-7b96-4905-92ed-f19800b87185", + seasonal_field_id="7db1a756-b898-4ecb-8608-bc2476f242a9", + ) + inputs = {"admag_input": sample_inputs} + run = vibe_client.run( + workflow="data_ingestion/admag/prescriptions", + name="prescriptions_example", + input_data=inputs, # type: ignore + parameters=parameters, + ) + + while run.status == "running" or run.status == "pending": + continue + + if run.status == "done": + obj = cast(GeometryCollection, run.output["response"][0]) # type: ignore + return obj + raise RuntimeError("Fetch prescriptions failed - ") diff --git a/ops/heatmap_sensor/test_soil_sample_heatmap_using_classification.py b/ops/heatmap_sensor/test_soil_sample_heatmap_using_classification.py new file mode 100644 index 00000000..5349c30e --- /dev/null +++ b/ops/heatmap_sensor/test_soil_sample_heatmap_using_classification.py @@ -0,0 +1,262 @@ +import os +from datetime import datetime +from typing import Any, Dict, Union, cast + +import geopandas as gpd +import pytest +from shapely import geometry as shpg +from shapely.geometry import MultiPolygon, Polygon + +from vibe_core.client import FarmvibesAiClient, get_default_vibe_client +from vibe_core.data import ADMAgSeasonalFieldInput, ExternalReferenceList +from vibe_core.data.core_types import GeometryCollection +from vibe_core.data.rasters import Raster +from vibe_dev.testing.op_tester import OpTester + +CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "soil_sample_heatmap_using_classification.yaml", +) + + +@pytest.fixture +def vibe_client(): + return get_default_vibe_client() + + +@pytest.fixture +def vibe_geometry_dict() -> Dict[str, Any]: + farm_boundary = "op_resources/heatmap_sensor/sensor_farm_boundary.geojson" + data_frame = gpd.read_file(farm_boundary) + geometry = shpg.mapping(data_frame["geometry"][0]) # type: ignore + return geometry + + +@pytest.fixture +def vibe_geometry_shapely() -> Union[MultiPolygon, Polygon]: + farm_boundary = "op_resources/heatmap_sensor/sensor_farm_boundary.geojson" + data_frame = gpd.read_file(farm_boundary) + if not data_frame.empty: + geometry = data_frame["geometry"][0] # type: ignore + return cast(MultiPolygon, geometry) + + raise RuntimeError("Geometry is None") + + +@pytest.fixture +def download_sentinel_cluster( + vibe_client: FarmvibesAiClient, vibe_geometry_shapely: Union[MultiPolygon, Polygon] +) -> Raster: + run = vibe_client.run( + workflow="data_ingestion/sentinel2/preprocess_s2", + name="sentinel2_example", + geometry=vibe_geometry_shapely, + time_range=(datetime(2022, 6, 30), datetime(2022, 7, 2)), + ) + + while run is None or run.status == "running" or run.status == "pending": + continue + + if run.status == "done": + obj: Raster = run.output["raster"][0] # type: ignore + return obj + + raise RuntimeError("Download Raster request failed") + + +@pytest.fixture +def download_index_cluster( + vibe_client: FarmvibesAiClient, download_sentinel_cluster: Raster, index: str +) -> Raster: + parameters = {"index": index} + + run = vibe_client.run( + workflow="data_processing/index/index", + name="EVI_example", + input_data=download_sentinel_cluster, + parameters=parameters, + ) + + while run.status == "running" or run.status == "pending": + continue + + if run.status == "done": + obj: Raster = run.output["index_raster"][0] # type: ignore + return obj + + raise RuntimeError("Download Raster request failed") + + +@pytest.fixture +def download_samples_cluster( + vibe_client: FarmvibesAiClient, vibe_geometry_dict: Dict[str, Any] +) -> GeometryCollection: + geojson_url = "" + url_hash = str(hash(geojson_url)) + now = datetime.now() + + inputs = ExternalReferenceList( + id=url_hash, + time_range=(now, now), + geometry=vibe_geometry_dict, + assets=[], + urls=[geojson_url], + ) + run = vibe_client.run( + workflow="data_ingestion/user_data/ingest_geometry", + name="geometry_example", + input_data=inputs, + ) + + while run is None or run.status == "running" or run.status == "pending": + continue + + if run.status == "done": + obj: GeometryCollection = run.output["geometry"][0] # type: ignore + return obj + + raise RuntimeError("Download samples request failed - ") + + +@pytest.mark.skip(reason="Dependent on the cluster") +@pytest.mark.parametrize("index", ["evi"]) +def test_heatmap_c(download_index_cluster: Raster, download_samples_cluster: GeometryCollection): + op_ = OpTester(CONFIG_PATH) + parameters = { + "attribute_name": "C", + "buffer": 3, + "bins": 4, + "simplify": "simplify", + "tolerance": 1.0, + "data_scale": False, + "max_depth": 50, + "n_estimators": 25, + "random_state": 100, + } + op_.update_parameters(parameters) + output_data = op_.run(raster=download_index_cluster, samples=download_samples_cluster) + + # Get op result + assert "result" in output_data + + +@pytest.mark.skip(reason="Dependent on the cluster") +@pytest.mark.parametrize("index", ["evi"]) +def test_heatmap_n(download_index_cluster: Raster, download_samples_cluster: GeometryCollection): + op_ = OpTester(CONFIG_PATH) + parameters = { + "attribute_name": "N", + "buffer": 10, + "bins": 4, + "simplify": "simplify", + "tolerance": 1.0, + "data_scale": True, + "max_depth": 50, + "n_estimators": 25, + "random_state": 100, + } + op_.update_parameters(parameters) + output_data = op_.run(raster=download_index_cluster, samples=download_samples_cluster) + + # Get op result + assert "result" in output_data + + +@pytest.mark.skip(reason="Dependent on the cluster") +@pytest.mark.parametrize("index", ["pri"]) +def test_heatmap_ph(download_index_cluster: Raster, download_samples_cluster: GeometryCollection): + op_ = OpTester(CONFIG_PATH) + parameters = { + "attribute_name": "pH", + "buffer": 10, + "bins": 4, + "simplify": "simplify", + "tolerance": 1.0, + "data_scale": False, + "max_depth": 50, + "n_estimators": 25, + "random_state": 100, + } + op_.update_parameters(parameters) + output_data = op_.run(raster=download_index_cluster, samples=download_samples_cluster) + + # Get op result + assert "result" in output_data + + +@pytest.mark.skip(reason="Dependent on the cluster") +@pytest.mark.parametrize("index", ["evi"]) +def test_heatmap_p(download_index_cluster: Raster, download_samples_cluster: GeometryCollection): + parameters = { + "attribute_name": "P", + "buffer": 3, + "bins": 4, + "simplify": "simplify", + "tolerance": 1.0, + "data_scale": True, + "max_depth": 50, + "n_estimators": 25, + "random_state": 100, + } + op_ = OpTester(CONFIG_PATH) + op_.update_parameters(parameters) + output_data = op_.run(raster=download_index_cluster, samples=download_samples_cluster) + + # Get op result + assert "result" in output_data + + +@pytest.fixture +def prescriptions(vibe_client: FarmvibesAiClient): + parameters = { + "base_url": "base_url", + "client_id": "client_id", + "client_secret": "client_secret", + "authority": "authority", + "default_scope": "default_scope", + } + + sample_inputs = ADMAgSeasonalFieldInput( + party_id="a460c833-7b96-4905-92ed-f19800b87185", + seasonal_field_id="7db1a756-b898-4ecb-8608-bc2476f242a9", + ) + + inputs = {"admag_input": sample_inputs} + + run = vibe_client.run( + workflow="data_ingestion/admag/prescriptions", + name="prescriptions_example", + input_data=inputs, # type: ignore + parameters=parameters, + ) + + while run.status == "running" or run.status == "pending": + continue + + if run.status == "done": + obj = cast(GeometryCollection, run.output["response"][0]) # type: ignore + return obj + + raise RuntimeError("Fetch prescriptions failed - ") + + +@pytest.mark.skip(reason="Dependent on the cluster") +@pytest.mark.parametrize("index", ["evi"]) +def test_heatmap_p_admag(download_index_cluster: Raster, prescriptions: GeometryCollection): + parameters = { + "attribute_name": "P", + "buffer": 3, + "bins": 4, + "simplify": "simplify", + "tolerance": 1.0, + "data_scale": True, + "max_depth": 50, + "n_estimators": 25, + "random_state": 100, + } + op_ = OpTester(CONFIG_PATH) + op_.update_parameters(parameters) + output_data = op_.run(raster=download_index_cluster, samples=prescriptions) + + # Get op result + assert "result" in output_data diff --git a/ops/helloworld/helloworld.py b/ops/helloworld/helloworld.py new file mode 100644 index 00000000..c9e33e50 --- /dev/null +++ b/ops/helloworld/helloworld.py @@ -0,0 +1,118 @@ +import os +from tempfile import TemporaryDirectory +from typing import Dict, List, Union + +import geopandas as gpd +import matplotlib.pyplot as plt +import numpy as np +import rasterio +from PIL import Image, ImageDraw, ImageFont +from rasterio.features import rasterize +from rasterio.transform import from_bounds +from shapely import geometry as shpg + +from vibe_core.data import DataVibe, Raster, gen_guid +from vibe_core.data.core_types import AssetVibe +from vibe_lib.raster import INT_COMPRESSION_KWARGS + +FONT_PATHS = [ + "DejaVuSans.ttf", + "/opt/conda/fonts/DejaVuSans.ttf", + "/usr/share/fonts/TTF/DejaVuSans.ttf", + "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", + "/usr/share/fonts/dejavu-sans-fonts/DejaVuSans.ttf", +] + + +def load_default_font(): + font_set = False + for font in FONT_PATHS: + try: + ImageDraw.ImageDraw.font = ImageFont.truetype(font, 14) # type: ignore + font_set = True + break + except OSError: + pass + if not font_set: + # We failed to load the font, raise an error + raise ValueError("Failed to load font for helloworld op") + + +def get_geoms(g: Union[shpg.Polygon, shpg.MultiPolygon]) -> List[shpg.Polygon]: + """ + Map MultiPolygons and Polygons into list of Polygons + """ + if isinstance(g, shpg.MultiPolygon): + return list(g.geoms) + return [g] + + +class CallbackBuilder: + msg = "HELLO WORLD" + + def __init__(self, width: int, height: int) -> None: + self.width = width + self.height = height + self.tmp_dir = TemporaryDirectory() + load_default_font() + + def __call__(self): + def hello(user_input: DataVibe) -> Dict[str, Raster]: + geom = shpg.shape(user_input.geometry) + df = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) # type: ignore + # Find out which geometries intersect with the input geometry + # Some countries have several polygons, let's split MultiPolygons into Polygons + # So we don't have regions that are far away being highlighted + country_geoms = [gg for g in df.geometry.tolist() for gg in get_geoms(g)] + yes_geom = [(g, 1) for g in country_geoms if g.intersects(geom)] + no_geom = [(g, 2) for g in country_geoms if not g.intersects(geom)] + tr = from_bounds(-180, -90, 180, 90, self.width, self.height) + # Generate RGBA image using tab10 (blue, orange, and green) + ar = ( + plt.cm.tab10( # type: ignore + rasterize( + yes_geom + no_geom + [(geom.boundary, 3)], + out_shape=(self.height, self.width), + transform=tr, # type: ignore + ) + ) + * 255 + ).astype(np.uint8) + + # Let's write a nice message 🙂 + img = Image.fromarray(ar) + img_d = ImageDraw.Draw(img) + offset = (self.width - img_d.getfont().getbbox(self.msg)[3]) // 2 + img_d.text((offset, 10), "HELLO WORLD", fill=(255, 255, 255)) + # Get image into CHW array and pick RGB bands + ar = np.array(img).transpose((2, 0, 1))[:3] + + # Write image to tiff file with the correct CRS and transform + meta = { + "driver": "GTiff", + "dtype": "uint8", + "width": self.width, + "height": self.height, + "count": 3, + "crs": "epsg:4326", + "transform": tr, + } + raster_guid = gen_guid() + out_path = os.path.join(self.tmp_dir.name, f"{raster_guid}.tif") + with rasterio.open(out_path, "w", **meta, **INT_COMPRESSION_KWARGS) as dst: + dst.write(ar) + asset = AssetVibe(out_path, "image/tiff", raster_guid) + # Let's use the geometry and date from the input + return { + "raster": Raster.clone_from( + user_input, + id=gen_guid(), + assets=[asset], + bands={"red": 0, "blue": 1, "green": 2}, + ) + } + + return hello + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/helloworld/helloworld.yaml b/ops/helloworld/helloworld.yaml new file mode 100644 index 00000000..023af4d4 --- /dev/null +++ b/ops/helloworld/helloworld.yaml @@ -0,0 +1,14 @@ +name: helloworld +inputs: + user_input: DataVibe +output: + raster: Raster +parameters: + width: 512 + height: 256 +entrypoint: + file: helloworld.py + callback_builder: CallbackBuilder +description: + short_description: Test op that generates an image of the Earth with countries that intersect with the + input geometry highlighted in orange. diff --git a/ops/linear_trend/linear_trend.py b/ops/linear_trend/linear_trend.py new file mode 100644 index 00000000..2390ecde --- /dev/null +++ b/ops/linear_trend/linear_trend.py @@ -0,0 +1,117 @@ +import hashlib +from tempfile import TemporaryDirectory +from typing import Dict, List, Tuple + +import numpy as np +import xarray as xr +from numpy.typing import NDArray + +from vibe_core.data import RasterChunk +from vibe_core.data.rasters import Raster +from vibe_lib.raster import read_chunk_series, save_raster_to_asset + + +def fit_model_in_bulk(da: xr.Dataset) -> Tuple[NDArray[np.float64], NDArray[np.float64]]: + B, A, ATAinv, beta_hat, trend = linear_fit_in_bulk(da) + + test_stat = compute_test_statistics(da, B, A, ATAinv, beta_hat) + + return trend, test_stat + + +def compute_test_statistics( + da: xr.Dataset, + B: NDArray[np.float64], + A: NDArray[np.float64], + ATAinv: NDArray[np.float64], + beta_hat: NDArray[np.float64], +): + # estimating test statistic for the trend + n = np.sum(np.logical_not(np.isnan(B)).astype(int), axis=0) + gamma = ATAinv[0, 0] + sig_hat2 = np.nansum((B - A @ beta_hat) ** 2, axis=0) / (n - 2) + maskout = np.logical_or(np.isnan(sig_hat2), sig_hat2 == 0) + test_stat = beta_hat[0, :] / np.sqrt(np.where(np.logical_not(maskout), sig_hat2, 1.0) * gamma) + + # make sure we have at least two points to store trend + test_stat = np.where(n > 1, test_stat, np.nan) + + test_stat = np.where(np.logical_not(maskout), test_stat, np.nan) + + test_stat = test_stat.reshape(da.shape[1:]) + return test_stat + + +def linear_fit_in_bulk( + da: xr.Dataset, +) -> Tuple[ + NDArray[np.float64], + NDArray[np.float64], + NDArray[np.float64], + NDArray[np.float64], + NDArray[np.float64], +]: + # fitting a linear model in bulk + n = da.shape[0] + B = da.values.reshape((n, -1)) + t = da.time.values + if type(t[0]) is not np.datetime64: + t = np.array(list(map(lambda x: x.to_datetime64(), da.time.values))) + t = (t - np.min(t)) / np.timedelta64(1, "D") + A = np.stack((t, np.ones_like(t))).T + ATAinv = np.linalg.inv(A.T @ A) + + # this is just A.T@B, but avoing issues with nan, so that even if + # one pixel/band has a nan in a given time we still estimate the trend + # by ignoring the particular time (also in test statistic estimation) + ATB = np.nansum(A.reshape(n, 2, 1) * B.reshape(n, 1, -1), axis=0) + + beta_hat = ATAinv @ ATB + trend = beta_hat[0, :] + + # make sure we have at least two points to store trend + trend = np.where(n > 1, trend, np.nan) + + trend = trend.reshape(da.shape[1:]) + return B, A, ATAinv, beta_hat, trend + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def linear_trend_callback( + series: RasterChunk, rasters: List[Raster] + ) -> Dict[str, RasterChunk]: + da = read_chunk_series(series.limits, rasters) + + trend, test_stat = fit_model_in_bulk(da) + + # store results + coords = {k: v for k, v in da.coords.items() if k != "time" and k != "band"} + data = np.concatenate((trend, test_stat)) + res = xr.DataArray(data=data, dims=list(da.dims)[1:], coords=coords, attrs=da.attrs) + asset = save_raster_to_asset(res, self.tmp_dir.name) + bands: Dict[str, int] = {} + for k, v in series.bands.items(): + bands[f"trend_{k}"] = int(v) + bands[f"test_stat_{k}"] = int(v) + len(series.bands) + res = RasterChunk( + id=hashlib.sha256(f"linear_trend-{series.id}".encode()).hexdigest(), + time_range=series.time_range, + geometry=series.geometry, + assets=[asset], + bands=bands, + chunk_pos=series.chunk_pos, + num_chunks=series.num_chunks, + limits=series.limits, + write_rel_limits=series.write_rel_limits, + ) + + return {"trend": res} + + return linear_trend_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/linear_trend/linear_trend.yaml b/ops/linear_trend/linear_trend.yaml new file mode 100644 index 00000000..d3b466c2 --- /dev/null +++ b/ops/linear_trend/linear_trend.yaml @@ -0,0 +1,12 @@ +name: linear_trend +inputs: + series: RasterChunk + rasters: List[Raster] +output: + trend: RasterChunk +parameters: +entrypoint: + file: linear_trend.py + callback_builder: CallbackBuilder +description: + short_description: Computes the pixel-wise linear trend across rasters. \ No newline at end of file diff --git a/ops/linear_trend/test_linear_trend.py b/ops/linear_trend/test_linear_trend.py new file mode 100644 index 00000000..544d09d9 --- /dev/null +++ b/ops/linear_trend/test_linear_trend.py @@ -0,0 +1,32 @@ +import numpy as np +import xarray as xr +from linear_trend import fit_model_in_bulk +from pandas import Timedelta, Timestamp + + +def _one_test_fit_model_in_bulk(sy: int, sx: int, sz: int): + TOL = 1e-10 + t = [Timestamp(2001, 1, 1) + Timedelta(days=d) for d in range(sz)] # type: ignore + + fake_rasters = [] + true_trend = [] + for i in range(sy * sx): + h = i / (sy * sx - 1) + true_trend.append(h) + fake_rasters.append(np.linspace(0, h * (sz - 1), sz)) + fake_rasters = np.stack(fake_rasters).reshape((sy, sx, sz)).transpose((2, 0, 1)) + true_trend = np.array(true_trend).reshape((sy, sx)) + + da = xr.DataArray(data=fake_rasters, dims=["time", "y", "x"], coords={"time": t}) + + trend_hat, _ = fit_model_in_bulk(da) # type: ignore + + assert np.max(np.abs(trend_hat - true_trend)) < TOL + + +def test_fit_model_in_bulk(): + s = [32, 64, 128] + for sy in s: + for sx in s: + for sz in s: + _one_test_fit_model_in_bulk(sy, sx, sz) diff --git a/ops/list_airbus_products/list_airbus.py b/ops/list_airbus_products/list_airbus.py new file mode 100644 index 00000000..5daa1a7c --- /dev/null +++ b/ops/list_airbus_products/list_airbus.py @@ -0,0 +1,57 @@ +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Any, Dict, List + +from shapely import geometry as shpg +from shapely.geometry.base import BaseGeometry + +from vibe_core.data import AirbusProduct, DataVibe, gen_guid +from vibe_lib.airbus import AirBusAPI, Constellation + + +def convert_product(product: Dict[str, Any], geom: BaseGeometry) -> AirbusProduct: + dt = datetime.fromisoformat(product["acquisitionDate"].replace("Z", "+00:00")) + # This is the geometry for the whole product + product["product_geometry"] = product.pop("geometry") + + # Get actual bounds from the raster + return AirbusProduct( + id=gen_guid(), + time_range=(dt, dt), + geometry=shpg.mapping(geom), + assets=[], + acquisition_id=product.pop("acquisitionIdentifier"), + extra_info=product, + ) + + +class CallbackBuilder: + def __init__( + self, + api_key: str, + constellations: List[str], + max_cloud_cover: int, + ): + self.api_key = api_key + self.constellations = [Constellation(c) for c in constellations] + self.max_cloud_cover = max_cloud_cover + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def download_products( + input_item: DataVibe, + ) -> Dict[str, List[AirbusProduct]]: + api = AirBusAPI( + self.api_key, + projected_crs=False, + constellations=self.constellations, + ) + geom = shpg.shape(input_item.geometry) + + search_results = api.query( + geom, input_item.time_range, self.max_cloud_cover, my_workspace=False + ) + + return {"airbus_products": [convert_product(p, geom) for p in search_results]} + + return download_products diff --git a/ops/list_airbus_products/list_airbus_products.yaml b/ops/list_airbus_products/list_airbus_products.yaml new file mode 100644 index 00000000..b9578b58 --- /dev/null +++ b/ops/list_airbus_products/list_airbus_products.yaml @@ -0,0 +1,14 @@ +name: list_airbus_products +inputs: + input_item: DataVibe +output: + airbus_products: List[AirbusProduct] +parameters: + api_key: "@SECRET(eywa-secrets, msr-airbus-api)" + constellations: ["PHR", "SPOT"] + max_cloud_cover: 10 +entrypoint: + file: list_airbus.py + callback_builder: CallbackBuilder +description: + short_description: Lists available AirBus products for the input geometry and time range. \ No newline at end of file diff --git a/ops/list_alos_products/list_alos_products.py b/ops/list_alos_products/list_alos_products.py new file mode 100644 index 00000000..afc09a97 --- /dev/null +++ b/ops/list_alos_products/list_alos_products.py @@ -0,0 +1,42 @@ +from typing import Any, Dict, List, cast + +from dateutil.parser import parse +from pystac import Item +from shapely import geometry as shpg + +from vibe_core.data import AlosProduct, DataVibe +from vibe_lib.planetary_computer import AlosForestCollection + + +class CallbackBuilder: + def __init__(self): + pass + + def validate_item(self, item: Item): + if item.geometry is None: + raise ValueError(f"Item {item.id} is missing geometry field") + if not isinstance(item.geometry, dict): + raise ValueError(f"Item {item.id} geometry is not a dict") + + def convert_product(self, item: Item) -> AlosProduct: + self.validate_item(item) + time_range = tuple(parse(item.properties[k]) for k in ("start_datetime", "end_datetime")) + geometry = cast(Dict[str, Any], item.geometry) + return AlosProduct(id=item.id, geometry=geometry, time_range=time_range, assets=[]) + + def __call__(self): + def callback(input_data: DataVibe) -> Dict[str, List[AlosProduct]]: + collection = AlosForestCollection() + items = collection.query( + geometry=shpg.shape(input_data.geometry), time_range=input_data.time_range + ) + + if not items: + raise ValueError( + f"No items found for geometry {input_data.geometry} " + f"and time range {input_data.time_range}" + ) + + return {"alos_products": [self.convert_product(i) for i in items]} + + return callback diff --git a/ops/list_alos_products/list_alos_products.yaml b/ops/list_alos_products/list_alos_products.yaml new file mode 100644 index 00000000..bd2fd36c --- /dev/null +++ b/ops/list_alos_products/list_alos_products.yaml @@ -0,0 +1,11 @@ +name: list_alos_products +inputs: + input_data: DataVibe +output: + alos_products: List[AlosProduct] +parameters: +entrypoint: + file: list_alos_products.py + callback_builder: CallbackBuilder +description: + short_description: Lists ALOS forest products for input geometry and time range. \ No newline at end of file diff --git a/ops/list_alos_products/test_alos_list.py b/ops/list_alos_products/test_alos_list.py new file mode 100644 index 00000000..9596a898 --- /dev/null +++ b/ops/list_alos_products/test_alos_list.py @@ -0,0 +1,92 @@ +import os +from datetime import datetime +from typing import Any, Dict, Tuple +from unittest.mock import Mock, patch + +import pytest +from pystac import Item + +from vibe_core.data import AlosProduct, DataVibe +from vibe_dev.testing.op_tester import OpTester + + +@pytest.fixture +def geometry(): + return { + "type": "Polygon", + "coordinates": [ + [ + [-86.773827, 14.575498], + [-86.770459, 14.579301], + [-86.764283, 14.575102], + [-86.769591, 14.567595], + [-86.773827, 14.575498], + ] + ], + } + + +@pytest.fixture +def time_range(): + return (datetime(2019, 1, 1), datetime(2020, 1, 1)) + + +@pytest.fixture +def data_vibe(geometry: Dict[str, Any], time_range: Tuple[datetime, datetime]): + return DataVibe( + id=str("test_id"), + time_range=time_range, + geometry=geometry, + assets=[], + ) + + +def expected_items(geometry: Dict[str, Any], time_range: Tuple[datetime, datetime]): + bbox = [-87.0, 14.0, -86.0, 15.0] + first_item = Item( + id="N15W087_20_FNF", + geometry=geometry, + bbox=bbox, + datetime=time_range[0], + properties={ + "start_datetime": time_range[0].strftime("%Y-%m-%d"), + "end_datetime": time_range[0].strftime("%Y-%m-%d"), + }, + ) + second_item = Item( + id="N15W087_19_FNF", + geometry=geometry, + bbox=bbox, + datetime=time_range[1], + properties={ + "start_datetime": time_range[1].strftime("%Y-%m-%d"), + "end_datetime": time_range[1].strftime("%Y-%m-%d"), + }, + ) + return [first_item, second_item] + + +@patch("vibe_lib.planetary_computer.AlosForestCollection.query") +def test_alos_list(query: Mock, data_vibe: DataVibe): + mock_items = expected_items(geometry=data_vibe.geometry, time_range=data_vibe.time_range) + query.return_value = mock_items + config_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "list_alos_products.yaml" + ) + + op = OpTester(config_path) + output_data = op.run(**{"input_data": data_vibe}) + assert output_data + + assert "alos_products" in output_data + products = output_data["alos_products"] + + # Check variable products is a list of AlosProduct + assert isinstance(products, list) + assert len(products) == len(mock_items) + for item, product in zip(mock_items, products): + assert isinstance(product, AlosProduct) + assert product.id == item.id + assert product.geometry == item.geometry + assert product.time_range == (item.datetime, item.datetime) + assert product.assets == [] diff --git a/ops/list_bing_maps/list_bing_maps.py b/ops/list_bing_maps/list_bing_maps.py new file mode 100644 index 00000000..766e6090 --- /dev/null +++ b/ops/list_bing_maps/list_bing_maps.py @@ -0,0 +1,71 @@ +import hashlib +from datetime import datetime +from typing import Dict, List, Optional + +from pystac.item import Item + +from vibe_core.data import DataVibe +from vibe_core.data.products import BingMapsProduct +from vibe_lib.bing_maps import MAX_ZOOM_LEVEL, MIN_ZOOM_LEVEL, BingMapsCollection + + +class CallbackBuilder: + def __init__( + self, + api_key: str, + zoom_level: int, + imagery_set: str, + map_layer: str, + orientation: Optional[float], + ): + if not api_key: + raise ValueError("BingMaps API key was not provided.") + if imagery_set != "Aerial": + raise ValueError("Only imagery set 'Aerial' is supported.") + if map_layer != "Basemap": + raise ValueError("Only map layer 'Basemap' is supported.") + if orientation is not None: + raise ValueError("Setting an orientation is currently not supported.") + if zoom_level < MIN_ZOOM_LEVEL or zoom_level > MAX_ZOOM_LEVEL: + raise ValueError( + f"Zoom level must be within [{MIN_ZOOM_LEVEL}, {MAX_ZOOM_LEVEL}]. " + f"Found {zoom_level}." + ) + + self.collection = BingMapsCollection(api_key) + self.zoom_level = zoom_level + self.imagery_set = imagery_set + self.map_layer = map_layer + self.orientation = 0.0 if orientation is None else orientation + + def convert_product(self, item: Item) -> BingMapsProduct: + assert item.geometry is not None, "input Item has no geometry" + + product = BingMapsProduct( + id=hashlib.sha256( + (f"bingmaps-{item.id}-{self.imagery_set}-{self.map_layer}").encode() + ).hexdigest(), + time_range=(datetime.now(), datetime.now()), + geometry=item.geometry, + assets=[], + url=item.properties["url"], + zoom_level=self.zoom_level, + imagery_set=self.imagery_set, + map_layer=self.map_layer, + orientation=self.orientation, + ) + return product + + def __call__(self): + def list_bing_maps( + user_input: DataVibe, + ) -> Dict[str, List[BingMapsProduct]]: + items = self.collection.query_tiles(user_input.bbox, self.zoom_level) + + if not items: + raise RuntimeError("No products found for input geometry and zoom level.") + + products = [self.convert_product(item) for item in items] + return {"products": products} + + return list_bing_maps diff --git a/ops/list_bing_maps/list_bing_maps.yaml b/ops/list_bing_maps/list_bing_maps.yaml new file mode 100644 index 00000000..d97eb456 --- /dev/null +++ b/ops/list_bing_maps/list_bing_maps.yaml @@ -0,0 +1,44 @@ +name: list_bing_maps +inputs: + user_input: DataVibe +output: + products: List[BingMapsProduct] +parameters: + api_key: + zoom_level: 10 + imagery_set: Aerial + map_layer: Basemap + orientation: null +dependencies: + parameters: + - zoom_level + - imagery_set + - map_layer + - orientation +entrypoint: + file: list_bing_maps.py + callback_builder: CallbackBuilder +description: + short_description: + Lists BingMaps basemap tile products intersecting the input geometry for a given `zoom_level`. + inputs: + user_input: Geometry of interest. + output: + products: Listed products. + parameters: + api_key: Required BingMaps API key. + zoom_level: >- + Zoom level of interest, ranging from 0 to 20. For instance, a zoom level of 1 corresponds to + a resolution of 78271.52 m/pixel, a zoom level of 10 corresponds to 152.9 m/pixel, and a zoom + level of 19 corresponds to 0.3 m/pixel. For more information on zoom levels and their + corresponding scale and resolution, please refer to the BingMaps API documentation + at https://learn.microsoft.com/en-us/bingmaps/articles/understanding-scale-and-resolution + imagery_set: >- + The type of imagery. Currently only supporting 'Aerial'. + map_layer: >- + The display layer that renders on top of the imagery set. Currently only supporting 'Basemap'. + orientation: >- + The orientation of the viewport to use for the imagery metadata. + Currently unused and set to None. + + diff --git a/ops/list_bing_maps/test_list_bing_maps.py b/ops/list_bing_maps/test_list_bing_maps.py new file mode 100644 index 00000000..86cd9bcc --- /dev/null +++ b/ops/list_bing_maps/test_list_bing_maps.py @@ -0,0 +1,145 @@ +import os +from datetime import datetime +from typing import List, Optional, cast +from unittest.mock import MagicMock, patch + +import pytest +from shapely.geometry import Polygon, box, mapping + +from vibe_core.data import DataVibe +from vibe_core.data.products import BingMapsProduct +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.bing_maps import BingMapsCollection + +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_bing_maps.yaml") +FAKE_TIME_RANGE = (datetime.now(), datetime.now()) + +# Geometries +WORLD_GEOMETRY = box(-180, -90, 180, 90) +WESTERN_HEMISPHERE_GEOMETRY = box(-180, -90, -0.00001, 90) +EASTERN_HEMISPHERE_GEOMETRY = box(0.00001, -90, 180, 90) +NORTHERN_HEMISPHERE_GEOMETRY = box(-180, 0.00001, 180, 90) +SOUTHERN_HEMISPHERE_GEOMETRY = box(-180, -90, 180, -0.00001) +QUARTER_WORLD_CENTERED_GEOMETRY = box(-89.99999, -44.99999, 89.99999, 44.99999) + +FIELD_GEOMETRY = Polygon( + [ + (-118.940490, 46.998848), + (-118.876148, 46.998848), + (-118.876148, 47.013422), + (-118.940490, 47.013422), + ] +) + + +@pytest.mark.parametrize( + "input_geometry, zoom_level, num_tiles", + [ # Whole world geometry + (WORLD_GEOMETRY, zoom_level, n_tiles) + for zoom_level, n_tiles in [(1, 4), (2, 16), (3, 64), (5, 1024), (7, 16384)] + ] + + [ # Half world geometries + (geom, zoom_level, n_tiles) + for geom in [ + WESTERN_HEMISPHERE_GEOMETRY, + EASTERN_HEMISPHERE_GEOMETRY, + NORTHERN_HEMISPHERE_GEOMETRY, + SOUTHERN_HEMISPHERE_GEOMETRY, + ] + for zoom_level, n_tiles in [(1, 2), (2, 8), (3, 32), (5, 512), (7, 8192)] + ] + + [ # Quarter world geometry + (QUARTER_WORLD_CENTERED_GEOMETRY, zoom_level, n_tiles) + for zoom_level, n_tiles in [(1, 4), (2, 4), (3, 16), (5, 160), (7, 2304)] + ] + + [ # Small field geometry + (FIELD_GEOMETRY, zoom_level, n_tiles) + for zoom_level, n_tiles in [ + (1, 1), + (10, 1), + (12, 2), + (14, 8), + (15, 21), + (18, 816), + ] + ], +) +@patch.object( + BingMapsCollection, + "get_download_url_and_subdomains", + return_value=( + "fake_download_url_{subdomain}_{quadkey}_{api_key}", + ["fake_subdomain"], + ), +) +@patch("vibe_lib.bing_maps.tile_is_available", return_value=True) +def test_list_bing_maps( + _: MagicMock, + __: MagicMock, + input_geometry: Polygon, + zoom_level: int, + num_tiles: int, +): + user_input = DataVibe("user_input", FAKE_TIME_RANGE, mapping(input_geometry), []) + + op_tester = OpTester(CONFIG_PATH) + op_tester.update_parameters( + { + "api_key": "valid_fake_api_key", + "zoom_level": zoom_level, + "imagery_set": "Aerial", + "map_layer": "Basemap", + "orientation": None, + } + ) + output_data = op_tester.run(user_input=user_input) + + # Get op result + output_name = "products" + assert output_name in output_data + output_product = output_data[output_name] + assert isinstance(output_product, list) + assert len(cast(List[BingMapsProduct], output_data["products"])) == num_tiles + + +@pytest.mark.parametrize( + "zoom_level, api_key, imagery_set, map_layer, orientation", + [ + # Invalid api_key + (10, "", "Aerial", "Basemap", None), + (10, None, "Aerial", "Basemap", None), + # Invalid zoom_level + (0, "valid_fake_api_key", "Aerial", "Basemap", None), + (21, "valid_fake_api_key", "Aerial", "Basemap", None), + # Invalid imagery_set + (10, "valid_fake_api_key", "invalid_imagery_set", "Basemap", None), + # Invalid map_layer + (10, "valid_fake_api_key", "Aerial", "invalid_map_layer", None), + # Invalid orientation + (10, "valid_fake_api_key", "Aerial", "Basemap", -1), + (10, "valid_fake_api_key", "Aerial", "Basemap", 180), + (10, "valid_fake_api_key", "Aerial", "Basemap", 380), + ], +) +def test_invalid_parameters( + zoom_level: int, + api_key: str, + imagery_set: str, + map_layer: str, + orientation: Optional[float], +): + user_input = DataVibe("user_input", FAKE_TIME_RANGE, mapping(FIELD_GEOMETRY), []) + + op_tester = OpTester(CONFIG_PATH) + + op_tester.update_parameters( + { + "api_key": api_key, + "zoom_level": zoom_level, + "imagery_set": imagery_set, + "map_layer": map_layer, + "orientation": orientation, + } + ) + with pytest.raises(ValueError): + op_tester.run(user_input=user_input) diff --git a/ops/list_cdl_products/list_cdl_products.py b/ops/list_cdl_products/list_cdl_products.py new file mode 100644 index 00000000..59caf831 --- /dev/null +++ b/ops/list_cdl_products/list_cdl_products.py @@ -0,0 +1,65 @@ +# This op receives a date range and geometry and list the respective CDL products +from datetime import datetime +from typing import Dict, List + +from shapely import geometry as shpg +from shapely import wkt + +from vibe_core.data import DataVibe +from vibe_core.data.core_types import gen_hash_id +from vibe_core.data.products import CDL_DOWNLOAD_URL, CDLProduct +from vibe_core.file_downloader import verify_url + + +def check_cdl_for_year(year: int) -> bool: + """Verify if there is a CDL file available for that year""" + url = CDL_DOWNLOAD_URL.format(year) + return verify_url(url) + + +class CallbackBuilder: + def __init__(self, cdl_geometry_wkt: str): + with open(cdl_geometry_wkt, "r") as wkt_file: + self.cdl_geometry = wkt.load(wkt_file) + + def convert_product(self, year: int) -> CDLProduct: + """Given the year, builds the CDLProduct""" + + start_date = datetime(year, 1, 1) + end_date = datetime(year, 12, 31) + time_range = (start_date, end_date) + + cdl_geom = shpg.mapping(self.cdl_geometry) + + product = CDLProduct( + id=gen_hash_id(f"cdl_product_{year}", cdl_geom, time_range), + time_range=time_range, + geometry=cdl_geom, + assets=[], + ) + + return product + + def __call__(self): + def list_cdl_products(input_item: DataVibe) -> Dict[str, List[CDLProduct]]: + """List all years for the input time range and create a product for each of them""" + + # Verify if input geometry intersects with cdl geometry + input_geom = shpg.shape(input_item.geometry) + if input_geom.intersects(self.cdl_geometry): + # List all years + start_date, end_date = input_item.time_range + input_years = range(start_date.year, end_date.year + 1) + + # Create a product for each year that has a CDL map available + products = [ + self.convert_product(year) for year in input_years if check_cdl_for_year(year) + ] + else: + raise ValueError( + "Input geometry does not intersect with CDL coverage area (continental US)." + ) + + return {"cdl_products": products} + + return list_cdl_products diff --git a/ops/list_cdl_products/list_cdl_products.yaml b/ops/list_cdl_products/list_cdl_products.yaml new file mode 100644 index 00000000..c7643a0c --- /dev/null +++ b/ops/list_cdl_products/list_cdl_products.yaml @@ -0,0 +1,13 @@ +name: list_cdl_products +inputs: + input_item: DataVibe +output: + cdl_products: List[CDLProduct] +parameters: + cdl_geometry_wkt: /opt/terravibes/ops/resources/cdl_metadata/us_continental.wkt +entrypoint: + file: list_cdl_products.py + callback_builder: CallbackBuilder +description: + short_description: + Lists all years for the input time range and creates a product for each of them to be downloaded. \ No newline at end of file diff --git a/ops/list_chirps/list_chirps.py b/ops/list_chirps/list_chirps.py new file mode 100644 index 00000000..abb41701 --- /dev/null +++ b/ops/list_chirps/list_chirps.py @@ -0,0 +1,214 @@ +import hashlib +from calendar import monthrange +from datetime import datetime, timedelta, timezone +from typing import Any, Dict, List, Optional, Tuple, Union + +import pytz +import rasterio +import requests +from dateutil.parser import isoparse +from dateutil.relativedelta import relativedelta +from pystac import MediaType +from pystac.asset import Asset +from pystac.item import Item +from shapely import geometry as shpg +from shapely.geometry import Polygon, mapping + +from vibe_core.data import ChirpsProduct, DataVibe +from vibe_core.data.core_types import BBox + + +class ChirpsCollection: + INI = datetime(1981, 1, 1, tzinfo=timezone.utc) # first day Chirps is available + VALID_FREQ = {"daily", "monthly"} + VALID_RES = {"p05", "p25"} + + def __init__(self, freq: str, res: str): + if freq not in self.VALID_FREQ: + raise ValueError( + f"Invalid Chirps frequency {freq} - valid options are {','.join(self.VALID_FREQ)}" + ) + if res not in self.VALID_RES: + raise ValueError( + f"Invalid Chirps resolution {res} - valid options are {','.join(self.VALID_RES)}" + ) + if freq == "monthly" and res != "p05": + raise ValueError("Monthly Chirps is only available on p05 resolution") + + self.freq = freq + self.res = res + self.end = self.get_latest_chirps() + # all bbox are the same, so we pick from the latest file + self.bbox, self.footprint = self.get_bbox_and_footprint(self.end) + self.var = "precipitation" + + def url(self, year: int) -> str: + if self.freq == "monthly": + return "https://data.chc.ucsb.edu/products/CHIRPS-2.0/global_monthly/cogs/" + else: + return ( + f"https://data.chc.ucsb.edu/products/CHIRPS-2.0/global_daily/" + f"cogs/{self.res}/{year}/" + ) + + def fname(self, date: datetime) -> str: + if self.freq == "monthly": + return f"chirps-v2.0.{date.year}.{date.month:02}.cog" + else: + return f"chirps-v2.0.{date.year}.{date.month:02}.{date.day:02}.cog" + + def get_latest_chirps(self) -> datetime: + ini = self.INI + end = datetime( + datetime.today().year, + datetime.today().month, + datetime.today().day, + tzinfo=timezone.utc, + ) + date = end + for year in range(end.year, ini.year - 1, -1): + text = requests.get(self.url(year)).text + while date >= datetime(year, 1, 1, tzinfo=timezone.utc): + if text.find(self.fname(date)) > 0: + return date + if self.freq == "daily": + date -= timedelta(days=1) + else: + date -= relativedelta(months=1) + date = date.replace(day=monthrange(date.year, date.month)[1]) + date = datetime(year - 1, 12, 31, tzinfo=timezone.utc) + raise ValueError("no Chirps file found") # this point should never be reached + + def get_bbox_and_footprint(self, date: datetime) -> Tuple[BBox, Polygon]: + url = self.url(date.year) + self.fname(date) + with rasterio.open(url) as ds: + bounds = ds.bounds + bbox = (bounds.left, bounds.bottom, bounds.right, bounds.top) + footprint = shpg.box(*bounds) + return (bbox, footprint) + + def get_chirps_list( + self, time_range: Tuple[datetime, datetime] + ) -> List[Tuple[datetime, str, str]]: + tr = [dt.astimezone(pytz.timezone("UTC")) for dt in time_range] + end_range = ( + tr[1] + if self.freq == "daily" + else tr[1].replace(day=monthrange(tr[1].year, tr[1].month)[1]) + ) + if ( + time_range[1].timestamp() < self.INI.timestamp() + or time_range[0].timestamp() > self.end.timestamp() + ): + raise ValueError( + f"Invalid time range {time_range[0].isoformat()} - " + f"{time_range[1].isoformat()} - valid values are in the range" + f"{self.INI.isoformat()} - {self.end.isoformat()}" + ) + ini = tr[0] if tr[0] >= self.INI else self.INI + end = end_range if end_range <= self.end else self.end + date = end + res = [] + while date >= ini: + url = self.url(date.year) + self.fname(date) + fname = self.fname(date) + res.append((date, url, fname)) + if self.freq == "daily": + date -= timedelta(days=1) + else: + date -= relativedelta(months=1) + date = date.replace(day=monthrange(date.year, date.month)[1]) + return res + + def _get_id(self, fname: str) -> str: + return hashlib.sha256(f"{self.res}_{fname}".encode()).hexdigest() + + def query( + self, + roi: Optional[BBox] = None, + time_range: Optional[Tuple[datetime, datetime]] = None, + ids: Optional[List[str]] = None, + ) -> List[Item]: + if roi is not None: + pgon = shpg.box(*roi) + if not pgon.intersects(self.footprint): + return [] + ini = time_range[0] if time_range is not None else self.INI + end = time_range[1] if time_range is not None else self.end + chirpsl = self.get_chirps_list((ini, end)) + res = [] + for date, url, fname in chirpsl: + id = self._get_id(fname) + if ids is not None and id not in ids: + continue + item = self._create_item(date, url, id) + res.append(item) + return res + + def _create_item(self, date: datetime, url: str, id: str) -> Item: + item = Item( + id=id, + geometry=mapping(self.footprint), + bbox=[self.bbox[i] for i in range(4)], + datetime=date, + properties={}, + ) + asset = Asset(href=url, media_type=MediaType.COG) + item.add_asset(self.var, asset) + return item + + def query_by_id(self, id: Union[str, List[str]]) -> List[Item]: + if isinstance(id, str): + ids = [id] + else: + ids = id + res = [] + for date, url, fname in self.get_chirps_list((self.INI, self.end)): + id = self._get_id(fname) + if id in ids: + item = self._create_item(date, url, id) + res.append(item) + return res + + +def convert_product(item: Dict[str, Any], freq: str) -> ChirpsProduct: + date = isoparse(item["properties"]["datetime"]).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + if freq == "daily": + time_range = (date, date) + else: + time_range = (date.replace(day=1), date) + url = item["assets"]["precipitation"]["href"] + output = ChirpsProduct( + id=item["id"], + time_range=time_range, + geometry=item["geometry"], + assets=[], + url=url, + ) + return output + + +class CallbackBuilder: + def __init__(self, freq: str, res: str): + self.freq = freq + self.res = res + + def __call__(self): + def list_chirps( + input_item: DataVibe, + ) -> Dict[str, List[ChirpsProduct]]: + collection = ChirpsCollection(self.freq, self.res) + items = collection.query(roi=input_item.bbox, time_range=input_item.time_range) + + products = [convert_product(item.to_dict(), freq=self.freq) for item in items] + + if not products: + raise RuntimeError( + f"No product found for time range {input_item.time_range} " + f"and geometry {input_item.geometry}" + ) + return {"chirps_products": products} + + return list_chirps diff --git a/ops/list_chirps/list_chirps.yaml b/ops/list_chirps/list_chirps.yaml new file mode 100644 index 00000000..c191aa1c --- /dev/null +++ b/ops/list_chirps/list_chirps.yaml @@ -0,0 +1,23 @@ +name: list_chips +inputs: + input_item: DataVibe +output: + chirps_products: List[ChirpsProduct] +parameters: + freq: daily + res: p05 +dependencies: + parameters: + - freq + - res +entrypoint: + file: list_chirps.py + callback_builder: CallbackBuilder +description: + short_description: + Lists products from the CHIRPS dataset with desired frequency and resolution + for input geometry and time range. + parameters: + freq: daily or monthly frequencies + res: p05 for 0.05 degree resolution or p25 for 0.25 degree resolution, + p25 is only available daily \ No newline at end of file diff --git a/ops/list_climatology_lab/list_climatology_lab.py b/ops/list_climatology_lab/list_climatology_lab.py new file mode 100644 index 00000000..7c87cea3 --- /dev/null +++ b/ops/list_climatology_lab/list_climatology_lab.py @@ -0,0 +1,61 @@ +from datetime import datetime +from typing import Dict, List + +from pystac.item import Item + +from vibe_core.data import DataVibe +from vibe_core.data.products import ClimatologyLabProduct +from vibe_lib.climatology_lab import ( + ClimatologyLabCollection, + GridMETCollection, + TerraClimateCollection, +) + + +class CallbackBuilder: + collection: ClimatologyLabCollection + + def __init__(self, variable: str): + if variable not in self.collection.asset_keys: + raise ValueError( + f"Requested variable '{variable}' not valid.\n" + f"Available properties: {', '.join(self.collection.asset_keys)}" + ) + self.variable = variable + + def convert_product(self, item: Item) -> ClimatologyLabProduct: + assert item.geometry is not None, "input Item has no geometry" + assert item.datetime is not None, "input Item has no datetime" + time_range = (datetime(item.datetime.year, 1, 1), datetime(item.datetime.year, 12, 31)) + + product = ClimatologyLabProduct( + id=item.id, + time_range=time_range, + geometry=item.geometry, + assets=[], + url=item.properties["url"], + variable=item.properties["variable"], + ) + return product + + def __call__(self): + def list_climatology_lab( + input_item: DataVibe, + ) -> Dict[str, List[ClimatologyLabProduct]]: + items = self.collection.query(variable=self.variable, time_range=input_item.time_range) + + if not items: + raise RuntimeError(f"No products found for time range {input_item.time_range}") + + products = [self.convert_product(item) for item in items] + return {"products": products} + + return list_climatology_lab + + +class CallbackBuilderGridMET(CallbackBuilder): + collection = GridMETCollection() + + +class CallbackBuilderTerraClimate(CallbackBuilder): + collection = TerraClimateCollection() diff --git a/ops/list_climatology_lab/list_gridmet.yaml b/ops/list_climatology_lab/list_gridmet.yaml new file mode 100644 index 00000000..0b23049e --- /dev/null +++ b/ops/list_climatology_lab/list_gridmet.yaml @@ -0,0 +1,39 @@ +name: list_gridmet +inputs: + input_item: DataVibe +output: + products: List[ClimatologyLabProduct] +parameters: + variable: pr +dependencies: + parameters: + - variable +entrypoint: + file: list_climatology_lab.py + callback_builder: CallbackBuilderGridMET +description: + short_description: + Lists GridMET products of `variable` from years intersecting with input time range. + inputs: + input_item: Time range of interest. + output: + products: Listed products. + parameters: + variable: >- + Options are: + bi - Burning Index + erc - Energy Release Component + etr - Daily reference evapotranspiration (alfafa, units = mm) + fm100 - Fuel Moisture (100-hr, units = %) + fm1000 - Fuel Moisture (1000-hr, units = %) + pet - Potential evapotranspiration (reference grass evapotranspiration, units = mm) + pr - Precipitation amount (daily total, units = mm) + rmax - Maximum relative humidity (units = %) + rmin - Minimum relative humidity (units = %) + sph - Specific humididy (units = kg/kg) + srad - Downward surface shortwave radiation (units = W/m^2) + th - Wind direction (degrees clockwise from North) + tmmn - Minimum temperature (units = K) + tmmx - Maximum temperature (units = K) + vpd - Vapor Pressure Deficit (units = kPa) + vs - Wind speed at 10m (units = m/s) diff --git a/ops/list_climatology_lab/list_terraclimate.yaml b/ops/list_climatology_lab/list_terraclimate.yaml new file mode 100644 index 00000000..5cba8935 --- /dev/null +++ b/ops/list_climatology_lab/list_terraclimate.yaml @@ -0,0 +1,37 @@ +name: list_terraclimate +inputs: + input_item: DataVibe +output: + products: List[ClimatologyLabProduct] +parameters: + variable: tmax +dependencies: + parameters: + - variable +entrypoint: + file: list_climatology_lab.py + callback_builder: CallbackBuilderTerraClimate +description: + short_description: + Lists TerraClimate products of `variable` from years intersecting with input time range. + inputs: + input_item: Time range of interest. + output: + products: Listed products. + parameters: + variable: >- + Options are: + aet - Actual Evapotranspiration (monthly total, units = mm) + def - Climate Water Deficit (monthly total, units = mm) + pet - Potential evapotranspiration (monthly total, units = mm) + ppt - Precipitation (monthly total, units = mm) + q - Runoff (monthly total, units = mm) + soil - Soil Moisture (total column at end of month, units = mm) + srad - Downward surface shortwave radiation (units = W/m2) + swe - Snow water equivalent (at end of month, units = mm) + tmax - Max Temperature (average for month, units = C) + tmin - Min Temperature (average for month, units = C) + vap - Vapor pressure (average for month, units = kPa) + ws - Wind speed (average for month, units = m/s) + vpd - Vapor Pressure Deficit (average for month, units = kPa) + PDSI - Palmer Drought Severity Index (at end of month, units = unitless) diff --git a/ops/list_climatology_lab/test_list_climatology_lab.py b/ops/list_climatology_lab/test_list_climatology_lab.py new file mode 100644 index 00000000..709d7f0f --- /dev/null +++ b/ops/list_climatology_lab/test_list_climatology_lab.py @@ -0,0 +1,64 @@ +import os +from datetime import datetime, timezone +from typing import List, cast +from unittest.mock import MagicMock, patch + +import pytest +from shapely.geometry import Point, mapping + +from vibe_core.data import DataVibe +from vibe_core.data.products import ClimatologyLabProduct +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.climatology_lab import ( + ClimatologyLabCollection, + GridMETCollection, + TerraClimateCollection, +) + +TERRACLIMATE_CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "list_terraclimate.yaml" +) +GRIDMET_CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_gridmet.yaml") + +FAKE_GEOMETRY = Point(-92.99900, 42.03580).buffer(0.1, cap_style=3) +FAKE_TIME_RANGE = ( + datetime(year=2019, month=1, day=1, tzinfo=timezone.utc), + datetime(year=2020, month=12, day=31, tzinfo=timezone.utc), +) + +INVALID_VARIABLE = "🙅" + + +@pytest.mark.parametrize( + "config_path, variable", + [ + (p, v) + for p, c in [ + (TERRACLIMATE_CONFIG_PATH, TerraClimateCollection), + (GRIDMET_CONFIG_PATH, GridMETCollection), + ] + for v in c.asset_keys + ], +) +@patch.object(ClimatologyLabCollection, "check_url_variable_year", return_value=True) +def test_gridmet_op(_: MagicMock, config_path: str, variable: str): + input_item = DataVibe("input_item", FAKE_TIME_RANGE, mapping(FAKE_GEOMETRY), []) + + op_tester = OpTester(config_path) + op_tester.update_parameters({"variable": variable}) + output_data = op_tester.run(input_item=input_item) + + # Get op result + output_name = "products" + assert output_name in output_data + output_product = output_data[output_name] + assert isinstance(output_product, list) + assert len(cast(List[ClimatologyLabProduct], output_data["products"])) == 2 + + +@pytest.mark.parametrize("config_path", [TERRACLIMATE_CONFIG_PATH, GRIDMET_CONFIG_PATH]) +def test_op_fails_invalid_variable(config_path: str): + op_tester = OpTester(config_path) + op_tester.update_parameters({"variable": INVALID_VARIABLE}) + with pytest.raises(ValueError): + op_tester.run(input_item=[]) diff --git a/ops/list_dem_products/list_dem_products.py b/ops/list_dem_products/list_dem_products.py new file mode 100644 index 00000000..2405e76c --- /dev/null +++ b/ops/list_dem_products/list_dem_products.py @@ -0,0 +1,53 @@ +# This operator receives a region and obtains the digital elevation model +# items associated with the input region. The collection 3dep-seamless +# only covers CONUS (continental us) and contains tiles with distinct +# spatial resolutions (10 and 30 meters). This operator returns a list of +# DemProduct. +from functools import partial +from typing import Any, Dict, List + +from dateutil.parser import isoparse +from shapely import geometry as shpg +from shapely import ops as shpo + +from vibe_core.data import DataVibe, DemProduct +from vibe_lib.planetary_computer import validate_dem_provider + + +def convert_product(item: Dict[str, Any], provider: str) -> DemProduct: + date = isoparse(item["properties"]["datetime"]) + output = DemProduct( + id=str(item["id"]), + time_range=(date, date), + geometry=item["geometry"], + assets=[], + tile_id=str(item["id"]), + resolution=int(item["properties"]["gsd"]), + provider=provider, + ) + + return output + + +def list_dem_products( + input_items: List[DataVibe], resolution: int, provider: str +) -> Dict[str, List[DemProduct]]: + collection = validate_dem_provider(provider.upper(), resolution) + + geom = shpo.unary_union([shpg.shape(i.geometry) for i in input_items]) + items = collection.query(geometry=geom) + + products = [ + convert_product(item.to_dict(), provider) + for item in items + if item.properties["gsd"] == resolution + ] + + if not products: + raise RuntimeError("No product found on provider '{provider}' for geometry {geom}") + + return {"dem_products": products} + + +def callback_builder(resolution: int, provider: str): + return partial(list_dem_products, resolution=resolution, provider=provider) diff --git a/ops/list_dem_products/list_dem_products.yaml b/ops/list_dem_products/list_dem_products.yaml new file mode 100644 index 00000000..78863278 --- /dev/null +++ b/ops/list_dem_products/list_dem_products.yaml @@ -0,0 +1,17 @@ +name: list_dem_products +inputs: + input_items: List[DataVibe] +output: + dem_products: List[DemProduct] +parameters: + resolution: 10 + provider: "USGS3Dep" +entrypoint: + file: list_dem_products.py + callback_builder: callback_builder +dependencies: + parameters: + - resolution + - provider +description: + short_description: Lists digital elevation map tiles that intersect with the input geometry and time range. \ No newline at end of file diff --git a/ops/list_dem_products/test_list_dem_products.py b/ops/list_dem_products/test_list_dem_products.py new file mode 100644 index 00000000..64d04bb2 --- /dev/null +++ b/ops/list_dem_products/test_list_dem_products.py @@ -0,0 +1,36 @@ +import os +from datetime import datetime, timezone +from typing import List, cast + +from shapely.geometry import Polygon, box, mapping + +from vibe_core.data import DataVibe, DemProduct +from vibe_core.data.core_types import BaseVibe +from vibe_dev.testing.op_tester import OpTester + +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_dem_products.yaml") + + +def test_op(): + latitude = 44.0005556 + longitude = -97.0005556 + buffer = 0.1 + bbox = [ + longitude - buffer, + latitude - buffer, + longitude + buffer, + latitude + buffer, + ] + polygon: Polygon = box(*bbox, ccw=True) + start_date = datetime(year=2018, month=2, day=1, tzinfo=timezone.utc) + end_date = datetime(year=2021, month=2, day=11, tzinfo=timezone.utc) + input_items = [DataVibe("input_item", (start_date, end_date), mapping(polygon), [])] + + output_data = OpTester(CONFIG_PATH).run(input_items=cast(List[BaseVibe], input_items)) + + # Get op result + output_name = "dem_products" + assert output_name in output_data + output_product = output_data[output_name] + assert isinstance(output_product, list) + assert len(cast(List[DemProduct], output_data["dem_products"])) == 4 diff --git a/ops/list_era5/list_era5.py b/ops/list_era5/list_era5.py new file mode 100644 index 00000000..6535efb5 --- /dev/null +++ b/ops/list_era5/list_era5.py @@ -0,0 +1,67 @@ +from functools import partial +from typing import Any, Dict, List + +from dateutil.parser import isoparse +from shapely import geometry as shpg +from shapely.geometry import mapping + +from vibe_core.data import DataVibe, Era5Product +from vibe_lib.planetary_computer import Era5Collection + +VARS = { + "msl": "air_pressure_at_mean_sea_level", + "2t": "air_temperature_at_2_metres", + "mx2t": "air_temperature_at_2_metres_1hour_Maximum", + "mn2t": "air_temperature_at_2_metres_1hour_Minimum", + "2d": "dew_point_temperature_at_2_metres", + "100u": "eastward_wind_at_100_metres", + "10u": "eastward_wind_at_10_metres", + "ssrd": "integral_wrt_time_of_surface_direct_downwelling" + "_shortwave_flux_in_air_1hour_Accumulation", + "100v": "northward_wind_at_100_metres", + "10v": "northward_wind_at_10_metres", + "tp": "precipitation_amount_1hour_Accumulation", + "sst": "sea_surface_temperature", + "sp": "surface_air_pressure", +} + + +def convert_product(item: Dict[str, Any], var: str) -> Era5Product: + start_datetime = isoparse(item["properties"]["start_datetime"]) + end_datetime = isoparse(item["properties"]["end_datetime"]) + x_extend = item["properties"]["cube:dimensions"]["lon"]["extent"] + y_extend = item["properties"]["cube:dimensions"]["lat"]["extent"] + geometry = mapping(shpg.box(x_extend[0], y_extend[0], x_extend[1], y_extend[1])) + + output = Era5Product( + id=f"{item['id']}_{var}", + time_range=(start_datetime, end_datetime), + geometry=geometry, + assets=[], + item_id=str(item["id"]), + var=VARS[var], + ) + + return output + + +def list_era5(input_item: DataVibe, variable: str) -> Dict[str, List[Era5Product]]: + if variable not in VARS.keys(): + raise ValueError( + f"Requested variable '{variable}' not valid. " + f"Valid values are {', '.join(VARS.keys())}" + ) + collection = Era5Collection() + items = collection.query(roi=input_item.bbox, time_range=input_item.time_range) + items = filter(lambda item: VARS[variable] in item.assets.keys(), items) + products = [convert_product(item.to_dict(), variable) for item in items] + if not products: + raise RuntimeError( + f"No product found for time range {input_item.time_range} " + f"and geometry {input_item.geometry}" + ) + return {"era5_products": products} + + +def callback_builder(variable: str): + return partial(list_era5, variable=variable) diff --git a/ops/list_era5/list_era5.yaml b/ops/list_era5/list_era5.yaml new file mode 100644 index 00000000..f6a52080 --- /dev/null +++ b/ops/list_era5/list_era5.yaml @@ -0,0 +1,32 @@ +name: list_era5 +inputs: + input_item: DataVibe +output: + era5_products: List[Era5Product] +parameters: + variable: +dependencies: + parameters: + - variable +entrypoint: + file: list_era5.py + callback_builder: callback_builder +description: + short_description: + Lists ERA5 products for input geometry and time range. + parameters: + variable: >- + Options are: + sp - Surface pressure + sst - Sea surface temperature + 10u - 10 meter U wind component + 2t - 2 meter temperature + 100u - 100 meter U wind component + 10v - 10 meter V wind component + 100v - 100 meter V wind component + msl - Mean sea level pressure + 2d - 2 meter dewpoint temperature + tp - Total precipitation + mx2t - Maximum temperature at 2 meters since previous post-processing + mn2t - Minimum temperature at 2 meters since previous post-processing + ssrd - Surface solar radiation downwards diff --git a/ops/list_era5/list_era5_cds.py b/ops/list_era5/list_era5_cds.py new file mode 100644 index 00000000..59e21e07 --- /dev/null +++ b/ops/list_era5/list_era5_cds.py @@ -0,0 +1,63 @@ +import hashlib +from datetime import datetime +from functools import partial +from typing import Dict, List + +from shapely import geometry as shpg + +from vibe_core.data import DataVibe, Era5Product + +VARS = { + "msl": "mean_sea_level_pressure", + "2t": "2m_temperature", + "2d": "2m_dewpoint_temperature", + "100u": "100m_u_component_of_wind", + "10u": "10m_u_component_of_wind", + "ssrd": "surface_solar_radiation_downwards", + "100v": "100m_v_component_of_wind", + "10v": "10m_v_component_of_wind", + "tp": "total_precipitation", + "sst": "sea_surface_temperature", + "sp": "surface_pressure", +} + + +def list_era5(input_item: DataVibe, variable: str) -> Dict[str, List[Era5Product]]: + # Currently only listing the era5 variable that we have on PC in the monthly + # aggregates (instead of hourly). This should speedup statistics computation + # (and addition to save these assets in our cache). We may add the much richer + # set of variables available on CDS (all Era5 variables, Wildfire reanalysis, etc) + if variable not in VARS.keys(): + raise ValueError( + f"Requested variable '{variable}' not valid. " + f"Valid values are {', '.join(VARS.keys())}" + ) + + year_ini = input_item.time_range[0].year + year_end = input_item.time_range[1].year + + dataset = "reanalysis-era5-single-levels-monthly-means" + request = { + "format": "netcdf", + "variable": [VARS[variable]], + "product_type": "monthly_averaged_reanalysis", + "time": "00:00", + "month": [f"{i:02d}" for i in range(1, 13)], + "year": [f"{i}" for i in range(year_ini, year_end + 1)], + } + + res = Era5Product( + id=hashlib.sha256((dataset + str(request)).encode()).hexdigest(), + time_range=(datetime(year_ini, 1, 1), datetime(year_end, 12, 31)), + geometry=shpg.mapping(shpg.box(-180, -90, 180, 90)), + assets=[], + item_id="", + var=VARS[variable], + cds_request={dataset: request}, + ) + + return {"era5_products": [res]} + + +def callback_builder(variable: str): + return partial(list_era5, variable=variable) diff --git a/ops/list_era5/list_era5_cds.yaml b/ops/list_era5/list_era5_cds.yaml new file mode 100644 index 00000000..c81f152a --- /dev/null +++ b/ops/list_era5/list_era5_cds.yaml @@ -0,0 +1,29 @@ +name: list_era5_cds +inputs: + input_item: DataVibe +output: + era5_products: List[Era5Product] +parameters: + variable: +dependencies: + parameters: + - variable +entrypoint: + file: list_era5_cds.py + callback_builder: callback_builder +description: + short_description: Lists monthly ERA5 products for the input time range and geometry. + parameters: + variable: >- + Monthly variables, options are: + sp - Surface pressure + sst - Sea surface temperature + 10u - 10 meter U wind component + 2t - 2 meter temperature + 100u - 100 meter U wind component + 10v - 10 meter V wind component + 100v - 100 meter V wind component + msl - Mean sea level pressure + 2d - 2 meter dewpoint temperature + tp - Total precipitation + ssrd - Surface solar radiation downwards diff --git a/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.py b/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.py new file mode 100644 index 00000000..2a06cf7a --- /dev/null +++ b/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.py @@ -0,0 +1,41 @@ +from typing import Any, Dict, List, cast + +from dateutil.parser import isoparse +from shapely.geometry import shape + +from vibe_core.data import BBox, DataVibe, EsriLandUseLandCoverProduct +from vibe_lib.planetary_computer import EsriLandUseLandCoverCollection + + +def convert_product(item: Dict[str, Any]) -> EsriLandUseLandCoverProduct: + start_date = isoparse(item["properties"]["start_datetime"]) + end_date = isoparse(item["properties"]["end_datetime"]) + output = EsriLandUseLandCoverProduct( + id=str(item["id"]), + time_range=(start_date, end_date), + geometry=item["geometry"], + assets=[], + ) + + return output + + +def list_products(input_item: DataVibe) -> Dict[str, List[EsriLandUseLandCoverProduct]]: + collection = EsriLandUseLandCoverCollection() + input_geometry = shape(input_item.geometry) + time_range = input_item.time_range + bbox = cast(BBox, input_geometry.bounds) + items = collection.query(roi=bbox, time_range=time_range) + products = [convert_product(item.to_dict()) for item in items] + + if not products: + raise RuntimeError( + f"No product found for time range {input_item.time_range} " + f"and geometry {input_item.geometry}" + ) + + return {"listed_products": products} + + +def callback_builder(): + return list_products diff --git a/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.yaml b/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.yaml new file mode 100644 index 00000000..489a4baa --- /dev/null +++ b/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.yaml @@ -0,0 +1,11 @@ +name: list_esri_landuse_landcover +inputs: + input_item: DataVibe +output: + listed_products: List[EsriLandUseLandCoverProduct] +parameters: +entrypoint: + file: list_esri_landuse_landcover.py + callback_builder: callback_builder +description: + short_description: Lists ESRI 10m Land Use/Land Cover (9-class) tiles that intersect with input geometry and time range. \ No newline at end of file diff --git a/ops/list_esri_landuse_landcover/test_list_esri_landuse_landcover.py b/ops/list_esri_landuse_landcover/test_list_esri_landuse_landcover.py new file mode 100644 index 00000000..240b7714 --- /dev/null +++ b/ops/list_esri_landuse_landcover/test_list_esri_landuse_landcover.py @@ -0,0 +1,33 @@ +import os +from datetime import datetime, timezone +from typing import List, cast + +from shapely.geometry import Polygon, box, mapping + +from vibe_core.data import DataVibe +from vibe_core.data.products import EsriLandUseLandCoverProduct +from vibe_dev.testing.op_tester import OpTester + +CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "list_esri_landuse_landcover.yaml" +) + + +def test_op(): + latitude = 42.21422 + longitude = -93.22890 + buffer = 0.001 + bbox = [longitude - buffer, latitude - buffer, longitude + buffer, latitude + buffer] + polygon: Polygon = box(*bbox, ccw=True) + start_date = datetime(year=2017, month=1, day=1, tzinfo=timezone.utc) + end_date = datetime(year=2017, month=12, day=31, tzinfo=timezone.utc) + input_item = DataVibe("input_item", (start_date, end_date), mapping(polygon), []) + + output_data = OpTester(CONFIG_PATH).run(input_item=input_item) + + # Get op result + output_name = "listed_products" + assert output_name in output_data + output_product = output_data[output_name] + assert isinstance(output_product, list) + assert len(cast(List[EsriLandUseLandCoverProduct], output_data["listed_products"])) == 1 diff --git a/ops/list_gedi_products/list_gedi_products.py b/ops/list_gedi_products/list_gedi_products.py new file mode 100644 index 00000000..dcc415a3 --- /dev/null +++ b/ops/list_gedi_products/list_gedi_products.py @@ -0,0 +1,69 @@ +import logging +from typing import Any, Dict, List + +from dateutil.parser import parse as parse_date +from shapely import geometry as shpg + +from vibe_core.data import DataVibe, GEDIProduct +from vibe_lib.earthdata import EarthDataAPI + +LOGGER = logging.getLogger(__name__) + + +def parse_poly(poly_str: str) -> shpg.Polygon: + coords = poly_str.split(" ") + return shpg.Polygon([(float(c2), float(c1)) for c1, c2 in zip(coords[::2], coords[1::2])]) + + +def convert_product(item: Dict[str, Any]) -> GEDIProduct: + geoms = [parse_poly(pp) for p in item["polygons"] for pp in p] + product_id = item["producer_granule_id"] + if not geoms: + raise RuntimeError(f"Failed to parse geometry from GEDI Product {product_id}") + if len(geoms) > 1: + geom = shpg.MultiPolygon(geoms) + else: + geom = geoms[0] + time_range = tuple(parse_date(item[k]) for k in ("time_start", "time_end")) + orbits = item["orbit_calculated_spatial_domains"][0] + concept_id = item["collection_concept_id"] + processing_level = [k for k, v in EarthDataAPI.concept_ids.items() if v == concept_id] + if len(processing_level) == 0: + raise RuntimeError(f"Failed to parse concept id {concept_id} from product {product_id}") + processing_level = processing_level[0] + return GEDIProduct( + id=product_id, + geometry=shpg.mapping(geom), + time_range=time_range, + product_name=product_id, + start_orbit=int(orbits["start_orbit_number"]), + stop_orbit=int(orbits["stop_orbit_number"]), + processing_level=processing_level, + assets=[], + ) + + +def callback_builder(processing_level: str): + if processing_level not in EarthDataAPI.concept_ids: + valid_levels = ", ".join([f"'{i}'" for i in EarthDataAPI.concept_ids]) + raise ValueError(f"Parameters processing_level must be one of {valid_levels}") + + def callback(input_data: DataVibe) -> Dict[str, List[GEDIProduct]]: + api = EarthDataAPI(processing_level) + geom = shpg.shape(input_data.geometry) + time_range = input_data.time_range + LOGGER.info( + f"Querying EarthData API for {processing_level=}, " + f"geometry={shpg.mapping(geom)}, {time_range=}" + ) + items = api.query(geometry=geom, time_range=time_range) + if not items: + raise RuntimeError( + f"Query returned no items for time range {time_range} " + f"and geometry {shpg.mapping(geom)}" + ) + LOGGER.info(f"EarthData API returned {len(items)} items. Converting to DataVibe") + products = [convert_product(i) for i in items] + return {"gedi_products": products} + + return callback diff --git a/ops/list_gedi_products/list_gedi_products.yaml b/ops/list_gedi_products/list_gedi_products.yaml new file mode 100644 index 00000000..9c64580e --- /dev/null +++ b/ops/list_gedi_products/list_gedi_products.yaml @@ -0,0 +1,15 @@ +name: list_gedi_products +inputs: + input_data: DataVibe +output: + gedi_products: List[GEDIProduct] +parameters: + processing_level: GEDI02_B.002 +entrypoint: + file: list_gedi_products.py + callback_builder: callback_builder +dependencies: + parameters: + - processing_level +description: + short_description: Lists GEDI Products from NASA's EarthData API. \ No newline at end of file diff --git a/ops/list_gedi_products/mock_items.json b/ops/list_gedi_products/mock_items.json new file mode 100644 index 00000000..1027a149 --- /dev/null +++ b/ops/list_gedi_products/mock_items.json @@ -0,0 +1 @@ +[{"producer_granule_id": "GEDI02_B_2021003022816_O11669_01_T07098_02_003_01_V002.h5", "time_start": "2021-01-03T02:28:16.000Z", "updated": "2021-09-16T13:33:58.248Z", "orbit_calculated_spatial_domains": [{"start_orbit_number": "11669", "stop_orbit_number": "11669"}], "dataset_id": "GEDI L2B Canopy Cover and Vertical Profile Metrics Data Global Footprint Level V002", "data_center": "LPDAAC_ECS", "title": "SC:GEDI02_B.002:2479671297", "coordinate_system": "GEODETIC", "day_night_flag": "UNSPECIFIED", "time_end": "2021-01-03T04:01:09.000Z", "id": "G2109010485-LPDAAC_ECS", "original_format": "ECHO10", "granule_size": "147.575", "browse_flag": true, "polygons": [["-30.3626576 -91.2648483 -27.6439792 -88.3897615 -24.8347156 -85.6623114 -21.9673825 -83.0671991 -19.0514493 -80.5838395 -16.095074 -78.1946169 -13.1049797 -75.8827098 -10.0913178 -73.6294082 -7.0568958 -71.422786 -4.0091158 -69.2472455 -0.9533188 -67.0897964 -0.2950898 -66.6261359 -0.2331909 -66.6864572 -0.8909761 -67.1498522 -3.9464484 -69.307554 -6.9941031 -71.4829217 -10.0280734 -73.6896102 -13.0417733 -75.9426575 -16.0305779 -78.2549303 -18.9859929 -80.6442171 -21.9008479 -83.1275515 -24.7669563 -85.7225227 -27.5748973 -88.4497367 -30.3318958 -91.3028519 -30.3626576 -91.2648483"]], "collection_concept_id": "C1908350066-LPDAAC_ECS", "online_access_flag": true, "links": [{"rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "type": "application/x-hdfeos", "title": "GEDI02_B_2021003022816_O11669_01_T07098_02_003_01_V002.h5. MimeType: application/x-hdfeos", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.03/GEDI02_B_2021003022816_O11669_01_T07098_02_003_01_V002.h5"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "type": "text/html", "title": "The Landing Page for this file may be accessed directly from this link (DOI)", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001 "}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/browse#", "type": "image/jpeg", "title": "This Browse file may be downloaded directly from this link (BROWSE)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//WORKING/BRWS/Browse.001/2021.08.24/GEDI02_B_2021003022816_O11669_01_T07098_02_003_01_V002.png"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "type": "text/xml", "title": "This Metadata file may be downloaded directly from this link (EXTENDED METADATA)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.03/GEDI02_B_2021003022816_O11669_01_T07098_02_003_01_V002.h5.xml"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://search.earthdata.nasa.gov/search?q=C1908350066-LPDAAC_ECS"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.002/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/980/gedi_l2b_dictionary_P003_v2.html"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WF_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WFGEO_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_FCCVPM_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://gedi.umd.edu/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/998/GEDI02_UserGuide_V21.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-subsetter/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/accessing-and-analyzing-gedi-lidar-data-for-vegetation-studies/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/989/GEDI_Quick_Guide_V2.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/getting-started-gedi-l2b-version-2-data-python/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-r/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-python/browse"}]}, {"producer_granule_id": "GEDI02_B_2021004140438_O11692_04_T06050_02_003_01_V002.h5", "time_start": "2021-01-04T14:04:38.000Z", "updated": "2021-09-15T14:44:45.312Z", "orbit_calculated_spatial_domains": [{"start_orbit_number": "11692", "stop_orbit_number": "11692"}], "dataset_id": "GEDI L2B Canopy Cover and Vertical Profile Metrics Data Global Footprint Level V002", "data_center": "LPDAAC_ECS", "title": "SC:GEDI02_B.002:2479801384", "coordinate_system": "GEODETIC", "day_night_flag": "UNSPECIFIED", "time_end": "2021-01-04T15:37:31.000Z", "id": "G2109313701-LPDAAC_ECS", "original_format": "ECHO10", "granule_size": "226.445", "browse_flag": true, "polygons": [["0.5505065 -81.5625675 -2.5086918 -79.4085405 -5.5611308 -77.2421343 -8.6029156 -75.0510247 -11.6259659 -72.8201955 -14.6277405 -70.5371334 -17.5991496 -68.1844306 -20.5338042 -65.7461622 -23.423009 -63.2043847 -26.2586328 -60.540614 -29.0303387 -57.7342285 -31.7271874 -54.7628274 -34.1212741 -51.86561 -34.0555803 -51.8563839 -31.6748997 -54.679901 -28.9801891 -57.6519074 -26.2103994 -60.4590336 -23.3764628 -63.1236187 -20.4887803 -65.6664175 -17.5554686 -68.1055773 -14.5850585 -70.4589025 -11.5839693 -72.7421956 -8.5619262 -74.9742452 -5.5203527 -77.1653332 -2.4687335 -79.332593 0.5910624 -81.4855825 0.5505065 -81.5625675"]], "collection_concept_id": "C1908350066-LPDAAC_ECS", "online_access_flag": true, "links": [{"rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "type": "application/x-hdfeos", "title": "GEDI02_B_2021004140438_O11692_04_T06050_02_003_01_V002.h5. MimeType: application/x-hdfeos", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.04/GEDI02_B_2021004140438_O11692_04_T06050_02_003_01_V002.h5"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "type": "text/html", "title": "The Landing Page for this file may be accessed directly from this link (DOI)", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001 "}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/browse#", "type": "image/jpeg", "title": "This Browse file may be downloaded directly from this link (BROWSE)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//WORKING/BRWS/Browse.001/2021.08.25/GEDI02_B_2021004140438_O11692_04_T06050_02_003_01_V002.png"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "type": "text/xml", "title": "This Metadata file may be downloaded directly from this link (EXTENDED METADATA)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.04/GEDI02_B_2021004140438_O11692_04_T06050_02_003_01_V002.h5.xml"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://search.earthdata.nasa.gov/search?q=C1908350066-LPDAAC_ECS"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.002/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/980/gedi_l2b_dictionary_P003_v2.html"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WF_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WFGEO_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_FCCVPM_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://gedi.umd.edu/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/998/GEDI02_UserGuide_V21.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-subsetter/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/accessing-and-analyzing-gedi-lidar-data-for-vegetation-studies/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/989/GEDI_Quick_Guide_V2.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/getting-started-gedi-l2b-version-2-data-python/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-r/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-python/browse"}]}, {"producer_granule_id": "GEDI02_B_2021007005422_O11730_01_T09944_02_003_01_V002.h5", "time_start": "2021-01-07T00:54:22.000Z", "updated": "2021-09-16T13:37:31.816Z", "orbit_calculated_spatial_domains": [{"start_orbit_number": "11730", "stop_orbit_number": "11730"}], "dataset_id": "GEDI L2B Canopy Cover and Vertical Profile Metrics Data Global Footprint Level V002", "data_center": "LPDAAC_ECS", "title": "SC:GEDI02_B.002:2479633671", "coordinate_system": "GEODETIC", "day_night_flag": "UNSPECIFIED", "time_end": "2021-01-07T02:27:15.000Z", "id": "G2108920939-LPDAAC_ECS", "original_format": "ECHO10", "granule_size": "126.573", "browse_flag": true, "polygons": [["-30.1461259 -90.9640426 -27.4215507 -88.102234 -24.606821 -85.3862242 -21.7348391 -82.8002922 -18.814933 -80.3247498 -15.8556248 -77.9418952 -12.8638124 -75.6347507 -9.8470872 -73.3867106 -6.8113281 -71.1833118 -3.7627547 -69.0099236 -0.7068341 -66.8531798 -0.2296719 -66.5171417 -0.168137 -66.5766535 -0.6451851 -66.9127594 -3.700955 -69.0695501 -6.7492633 -71.2429883 -9.7844186 -73.4465855 -12.8005944 -75.6946778 -15.7910674 -78.0022638 -18.7494616 -80.3851673 -21.6684271 -82.8606086 -24.5391962 -85.4464038 -27.3524956 -88.1622766 -30.1154458 -91.0020509 -30.1461259 -90.9640426"]], "collection_concept_id": "C1908350066-LPDAAC_ECS", "online_access_flag": true, "links": [{"rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "type": "application/x-hdfeos", "title": "GEDI02_B_2021007005422_O11730_01_T09944_02_003_01_V002.h5. MimeType: application/x-hdfeos", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.07/GEDI02_B_2021007005422_O11730_01_T09944_02_003_01_V002.h5"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "type": "text/html", "title": "The Landing Page for this file may be accessed directly from this link (DOI)", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001 "}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/browse#", "type": "image/jpeg", "title": "This Browse file may be downloaded directly from this link (BROWSE)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//WORKING/BRWS/Browse.001/2021.08.24/GEDI02_B_2021007005422_O11730_01_T09944_02_003_01_V002.png"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "type": "text/xml", "title": "This Metadata file may be downloaded directly from this link (EXTENDED METADATA)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.07/GEDI02_B_2021007005422_O11730_01_T09944_02_003_01_V002.h5.xml"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://search.earthdata.nasa.gov/search?q=C1908350066-LPDAAC_ECS"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.002/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/980/gedi_l2b_dictionary_P003_v2.html"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WF_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WFGEO_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_FCCVPM_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://gedi.umd.edu/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/998/GEDI02_UserGuide_V21.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-subsetter/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/accessing-and-analyzing-gedi-lidar-data-for-vegetation-studies/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/989/GEDI_Quick_Guide_V2.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/getting-started-gedi-l2b-version-2-data-python/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-r/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-python/browse"}]}, {"producer_granule_id": "GEDI02_B_2021008123045_O11753_04_T08896_02_003_01_V002.h5", "time_start": "2021-01-08T12:30:45.000Z", "updated": "2021-09-15T14:44:51.018Z", "orbit_calculated_spatial_domains": [{"start_orbit_number": "11753", "stop_orbit_number": "11753"}], "dataset_id": "GEDI L2B Canopy Cover and Vertical Profile Metrics Data Global Footprint Level V002", "data_center": "LPDAAC_ECS", "title": "SC:GEDI02_B.002:2479642220", "coordinate_system": "GEODETIC", "day_night_flag": "UNSPECIFIED", "time_end": "2021-01-08T14:03:38.000Z", "id": "G2108936022-LPDAAC_ECS", "original_format": "ECHO10", "granule_size": "228.481", "browse_flag": true, "polygons": [["0.3636339 -81.368489 -2.6948222 -79.2131182 -5.7477117 -77.0464724 -8.7890118 -74.8544509 -11.8139481 -72.6232694 -14.8152193 -70.3379173 -17.7870662 -67.983062 -20.7213548 -65.5421638 -23.6120414 -62.9982185 -26.445649 -60.3281273 -29.2154254 -57.5144296 -31.9098845 -54.5345779 -34.3245843 -51.5987621 -34.2589016 -51.5895276 -31.8573652 -54.4517723 -29.1654292 -57.4318291 -26.3975764 -60.2462847 -23.5658728 -62.9177241 -20.6765899 -65.4622016 -17.7433957 -67.9039735 -14.7727599 -70.259492 -11.7722016 -72.5454193 -8.7483977 -74.7775267 -5.7075006 -76.9700498 -2.6550172 -79.1369356 0.4037688 -81.291898 0.3636339 -81.368489"]], "collection_concept_id": "C1908350066-LPDAAC_ECS", "online_access_flag": true, "links": [{"rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "type": "application/x-hdfeos", "title": "GEDI02_B_2021008123045_O11753_04_T08896_02_003_01_V002.h5. MimeType: application/x-hdfeos", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.08/GEDI02_B_2021008123045_O11753_04_T08896_02_003_01_V002.h5"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "type": "text/html", "title": "The Landing Page for this file may be accessed directly from this link (DOI)", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001 "}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/browse#", "type": "image/jpeg", "title": "This Browse file may be downloaded directly from this link (BROWSE)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//WORKING/BRWS/Browse.001/2021.08.24/GEDI02_B_2021008123045_O11753_04_T08896_02_003_01_V002.png"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "type": "text/xml", "title": "This Metadata file may be downloaded directly from this link (EXTENDED METADATA)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.08/GEDI02_B_2021008123045_O11753_04_T08896_02_003_01_V002.h5.xml"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://search.earthdata.nasa.gov/search?q=C1908350066-LPDAAC_ECS"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.002/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/980/gedi_l2b_dictionary_P003_v2.html"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WF_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WFGEO_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_FCCVPM_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://gedi.umd.edu/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/998/GEDI02_UserGuide_V21.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-subsetter/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/accessing-and-analyzing-gedi-lidar-data-for-vegetation-studies/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/989/GEDI_Quick_Guide_V2.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/getting-started-gedi-l2b-version-2-data-python/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-r/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-python/browse"}]}, {"producer_granule_id": "GEDI02_B_2021010232025_O11791_01_T07251_02_003_01_V002.h5", "time_start": "2021-01-10T23:20:25.000Z", "updated": "2021-09-16T13:48:44.353Z", "orbit_calculated_spatial_domains": [{"start_orbit_number": "11791", "stop_orbit_number": "11791"}], "dataset_id": "GEDI L2B Canopy Cover and Vertical Profile Metrics Data Global Footprint Level V002", "data_center": "LPDAAC_ECS", "title": "SC:GEDI02_B.002:2479659128", "coordinate_system": "GEODETIC", "day_night_flag": "UNSPECIFIED", "time_end": "2021-01-11T00:53:18.000Z", "id": "G2108978487-LPDAAC_ECS", "original_format": "ECHO10", "granule_size": "323.92", "browse_flag": true, "polygons": [["-30.1241135 -90.8142166 -27.3988696 -87.9532901 -24.5834775 -85.2382504 -21.7107944 -82.6531393 -18.7904635 -80.1781036 -15.8303809 -77.7956122 -12.8376188 -75.4880686 -9.8199741 -73.2400657 -6.7832843 -71.0364172 -3.7340327 -68.8628696 -0.6778065 -66.7054872 -0.1361488 -66.3240182 -0.0740696 -66.3841249 -0.6157131 -66.765595 -3.6717973 -68.9230187 -6.7209337 -71.0962995 -9.7571196 -73.300345 -12.7745146 -75.5482063 -15.7660982 -77.856108 -18.7253112 -80.238563 -21.644579 -82.7135771 -24.5159226 -85.298641 -27.3301816 -88.0133733 -30.0934383 -90.8522218 -30.1241135 -90.8142166"]], "collection_concept_id": "C1908350066-LPDAAC_ECS", "online_access_flag": true, "links": [{"rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "type": "application/x-hdfeos", "title": "GEDI02_B_2021010232025_O11791_01_T07251_02_003_01_V002.h5. MimeType: application/x-hdfeos", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.10/GEDI02_B_2021010232025_O11791_01_T07251_02_003_01_V002.h5"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "type": "text/html", "title": "The Landing Page for this file may be accessed directly from this link (DOI)", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001 "}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/browse#", "type": "image/jpeg", "title": "This Browse file may be downloaded directly from this link (BROWSE)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//WORKING/BRWS/Browse.001/2021.08.24/GEDI02_B_2021010232025_O11791_01_T07251_02_003_01_V002.png"}, {"rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "type": "text/xml", "title": "This Metadata file may be downloaded directly from this link (EXTENDED METADATA)", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_B.002/2021.01.10/GEDI02_B_2021010232025_O11791_01_T07251_02_003_01_V002.h5.xml"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://search.earthdata.nasa.gov/search?q=C1908350066-LPDAAC_ECS"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/data#", "hreflang": "en-US", "href": "https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.002/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://doi.org/10.5067/GEDI/GEDI02_B.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/980/gedi_l2b_dictionary_P003_v2.html"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WF_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_WFGEO_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://doi.org/10.5067/DOC/GEDI/GEDI_FCCVPM_ATBD.001"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/metadata#", "hreflang": "en-US", "href": "https://gedi.umd.edu/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/998/GEDI02_UserGuide_V21.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-subsetter/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/accessing-and-analyzing-gedi-lidar-data-for-vegetation-studies/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/documents/989/GEDI_Quick_Guide_V2.pdf"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://lpdaac.usgs.gov/resources/e-learning/getting-started-gedi-l2b-version-2-data-python/"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-r/browse"}, {"inherited": true, "rel": "http://esipfed.org/ns/fedsearch/1.1/documentation#", "hreflang": "en-US", "href": "https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-finder-tutorial-python/browse"}]}] \ No newline at end of file diff --git a/ops/list_gedi_products/test_list_gedi_products.py b/ops/list_gedi_products/test_list_gedi_products.py new file mode 100644 index 00000000..fdaaa9ae --- /dev/null +++ b/ops/list_gedi_products/test_list_gedi_products.py @@ -0,0 +1,45 @@ +import json +import os +from datetime import datetime +from typing import Any, Dict, List, cast +from unittest.mock import Mock, patch + +import pytest +from dateutil.parser import parse as parse_date +from shapely import geometry as shpg + +from vibe_core.data import DataVibe, GEDIProduct +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.earthdata import EarthDataAPI + +HERE = os.path.dirname(os.path.abspath(__file__)) +CONFIG_PATH = os.path.join(HERE, "list_gedi_products.yaml") + + +@pytest.fixture +def mock_items(): + with open(os.path.join(HERE, "mock_items.json")) as f: + return json.load(f) + + +def compare_product_with_json(product: GEDIProduct, item: Dict[str, Any]): + assert product.product_name == item["producer_granule_id"] + assert isinstance(shpg.shape(product.geometry), shpg.Polygon) + assert product.time_range[0] == parse_date(item["time_start"]) + assert product.start_orbit == int( + item["orbit_calculated_spatial_domains"][0]["start_orbit_number"] + ) + + +@patch.object(EarthDataAPI, "query") +def test_op(query: Mock, mock_items: List[Dict[str, Any]]): + query.return_value = mock_items + now = datetime.now() + geom = shpg.box(0, 0, 1, 1) + x = DataVibe(id="1", time_range=(now, now), geometry=shpg.mapping(geom), assets=[]) + out = OpTester(CONFIG_PATH).run(input_data=x) + assert "gedi_products" in out + products = cast(List[GEDIProduct], out["gedi_products"]) + assert len(products) == 5 + for p, i in zip(products, mock_items): + compare_product_with_json(p, i) diff --git a/ops/list_glad_products/list_glad_products.py b/ops/list_glad_products/list_glad_products.py new file mode 100644 index 00000000..6820fd77 --- /dev/null +++ b/ops/list_glad_products/list_glad_products.py @@ -0,0 +1,42 @@ +import hashlib +import itertools +from datetime import datetime +from typing import Dict, List, cast + +import geopandas as gpd + +from vibe_core.data import DataVibe, GLADProduct +from vibe_lib import glad + + +class CallbackBuilder: + def __init__(self, tile_geometry: str): + self.tiles_gdf: gpd.GeoDataFrame = cast(gpd.GeoDataFrame, gpd.read_file(tile_geometry)) + + def __call__(self): + def list_glad_products(input_item: DataVibe) -> Dict[str, List[GLADProduct]]: + geom_tiles = glad.intersecting_tiles(self.tiles_gdf, input_item.geometry) + years_range = range(input_item.time_range[0].year, input_item.time_range[1].year + 1) + intersection_years = itertools.product(geom_tiles, years_range) + + out_glad_products = [ + GLADProduct.clone_from( + input_item, + id=hashlib.sha256((f"glad-product-{tile_name}-{year}").encode()).hexdigest(), + assets=[], + time_range=(datetime(year, 1, 1), datetime(year, 12, 31)), + geometry=glad.get_tile_geometry(self.tiles_gdf, tile_name), + url=glad.GLAD_DOWNLOAD_URL.format(year=year, tile_name=tile_name), + ) + for tile_name, year in intersection_years + if glad.check_glad_for_year(tile_name, year) + ] + if len(out_glad_products) == 0: + raise RuntimeError( + f"No Glad products found for time range {input_item.time_range}" + f" and geometry {input_item.geometry}" + ) + + return {"glad_products": out_glad_products} + + return list_glad_products diff --git a/ops/list_glad_products/list_glad_products.yaml b/ops/list_glad_products/list_glad_products.yaml new file mode 100644 index 00000000..4037cda0 --- /dev/null +++ b/ops/list_glad_products/list_glad_products.yaml @@ -0,0 +1,18 @@ +name: list_glad_products +inputs: + input_item: DataVibe +output: + glad_products: List[GLADProduct] +parameters: + tile_geometry: /opt/terravibes/ops/resources/glad_tile_geometry/10d_tiles.geojson +entrypoint: + file: list_glad_products.py + callback_builder: CallbackBuilder +description: + short_description: Lists Global Land Analysis (GLAD) forest products that intersect the user-provided geometry/time range. + long_description: | + Lists forest products from The Global Land Analysis and Discovery (GLAD) + laboratory in the Department of Geographical Sciences at the University of + Maryland. This op lists the 10x10 degree tiles that intersect the user geometry + for each year in the user-provided time range (if the tile is available for + that year). \ No newline at end of file diff --git a/ops/list_glad_products/test_glad_list.py b/ops/list_glad_products/test_glad_list.py new file mode 100644 index 00000000..4e98fdba --- /dev/null +++ b/ops/list_glad_products/test_glad_list.py @@ -0,0 +1,169 @@ +import itertools +import os +from datetime import datetime +from typing import List, Tuple, cast +from unittest.mock import Mock, patch + +import pytest + +from vibe_core import file_downloader +from vibe_core.data import DataVibe +from vibe_core.data.products import GLADProduct +from vibe_core.utils import ensure_list +from vibe_dev.testing.op_tester import OpTester + +VALID_GLAD_YEARS = [2000, 2020] +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_glad_products.yaml") +TILES_MAPPING = { + "northwest": ["50N_110W", "50N_120W", "60N_110W", "60N_120W"], + "northeast": ["50N_060E", "50N_070E", "60N_060E", "60N_070E"], + "southwest": ["10S_060W", "10S_070W", "20S_060W", "20S_070W"], + "southeast": ["10S_010E", "00N_010E", "10S_020E", "00N_020E"], +} + +MOCK_TILES = { + "50N_110W", + "50N_120W", + "60N_110W", + "60N_120W", + "50N_060E", + "50N_070E", + "60N_060E", + "60N_070E", + "10S_060W", + "10S_070W", + "20S_060W", + "20S_070W", + "10S_010E", + "00N_010E", + "10S_020E", + "00N_020E", +} + + +def custom_datavibe( + coordinates: List[List[float]], + time_range: Tuple[datetime, datetime] = (datetime(2000, 1, 1), datetime(2023, 1, 1)), +) -> DataVibe: + return DataVibe( + id=str("test_id"), + time_range=time_range, + geometry={ + "type": "Polygon", + "coordinates": [coordinates], + }, + assets=[], + ) + + +TEST_DATAVIBES = { + "northwest": custom_datavibe( + [ + [-115.0, 55.0], + [-105.0, 55.0], + [-105.0, 45.0], + [-115.0, 45.0], + ] + ), + "northeast": custom_datavibe( + [ + [75.0, 55.0], + [65.0, 55.0], + [65.0, 45.0], + [75.0, 45.0], + ] + ), + "southwest": custom_datavibe( + [ + [-65.0, -15.0], + [-55.0, -15.0], + [-55.0, -25.0], + [-65.0, -25.0], + ] + ), + "southeast": custom_datavibe( + [ + [15.0, -5.0], + [25.0, -5.0], + [25.0, -15.0], + [15.0, -15.0], + ] + ), +} + + +def mock_verify(url: str): + # URLs are of the form: + # https://glad.umd.edu/users/Potapov/GLCLUC2020/Forest_extent_2000/00N_000E.tif + return url[-12:-4] in MOCK_TILES and int(url[-17:-13]) in VALID_GLAD_YEARS + + +@patch.object(file_downloader, "verify_url") +@pytest.mark.parametrize( + "test_datavibe, expected_tiles", + [ + (TEST_DATAVIBES["northwest"], TILES_MAPPING["northwest"]), + (TEST_DATAVIBES["northeast"], TILES_MAPPING["northeast"]), + (TEST_DATAVIBES["southwest"], TILES_MAPPING["southwest"]), + (TEST_DATAVIBES["southeast"], TILES_MAPPING["southeast"]), + ], +) +def test_glad_list(verify: Mock, test_datavibe: DataVibe, expected_tiles: List[str]): + verify.side_effect = mock_verify + op = OpTester(CONFIG_PATH) + output_data = op.run(**{"input_item": test_datavibe}) + assert output_data + assert "glad_products" in output_data + + products: List[GLADProduct] = cast(List[GLADProduct], ensure_list(output_data["glad_products"])) + expected_combinations = set(itertools.product(expected_tiles, VALID_GLAD_YEARS)) + + actual_combinations = set((p.tile_name, p.time_range[0].year) for p in products) + + assert expected_combinations == actual_combinations + verify.reset_mock() + + +@patch.object(file_downloader, "verify_url") +def test_glad_list_same_tiles(verify: Mock): + verify.side_effect = mock_verify + + # Create datavibe_1 + test_data_vibe_1 = custom_datavibe( + [ + [15.0, -5.0], + [15.1, -5.0], + [15.1, -5.1 + 0.1], # not the same geom + [15.0, -5.1], + ], + time_range=(datetime(2020, 1, 1), datetime(2020, 1, 1)), + ) + + test_data_vibe_2 = custom_datavibe( + [ + [15.0, -5.0], + [15.1, -5.0], + [15.1, -5.1], + [15.0, -5.1], + ], + time_range=(datetime(2020, 1, 1), datetime(2020, 1, 1)), + ) + + op = OpTester(CONFIG_PATH) + output_1 = op.run(**{"input_item": test_data_vibe_1}) + output_2 = op.run(**{"input_item": test_data_vibe_2}) + + products: List[GLADProduct] = [] + for output in [output_1, output_2]: + assert output + assert "glad_products" in output + assert isinstance(output["glad_products"], list) + assert len(output["glad_products"]) > 0 + + products.append(cast(GLADProduct, output["glad_products"][0])) + + assert products[0].id == products[1].id + assert products[0].time_range == products[1].time_range + assert products[0].geometry == products[1].geometry + assert products[0].assets == products[1].assets + assert products[0].url == products[1].url diff --git a/ops/list_gnatsgo_products/list_gnatsgo_products.py b/ops/list_gnatsgo_products/list_gnatsgo_products.py new file mode 100644 index 00000000..a11c26d3 --- /dev/null +++ b/ops/list_gnatsgo_products/list_gnatsgo_products.py @@ -0,0 +1,34 @@ +from typing import Dict, List + +from pystac import Item + +from vibe_core.data import DataVibe, GNATSGOProduct +from vibe_lib.planetary_computer import GNATSGOCollection + + +def convert_product(item: Item) -> GNATSGOProduct: + assert item.geometry is not None, "Input item has no geometry" + assert item.datetime is not None, "Input item has no datetime" + + output = GNATSGOProduct( + id=item.id, + time_range=(item.datetime, item.datetime), + geometry=item.geometry, + assets=[], + ) + return output + + +def callback_builder(): + def callback(input_item: DataVibe) -> Dict[str, List[GNATSGOProduct]]: + collection = GNATSGOCollection() + items = collection.query(roi=input_item.bbox) + products = [convert_product(item) for item in items] + if not products: + raise RuntimeError( + f"No product found for geometry {input_item.geometry}. " + f"Please, make sure the geometry is within Continental USA" + ) + return {"gnatsgo_products": products} + + return callback diff --git a/ops/list_gnatsgo_products/list_gnatsgo_products.yaml b/ops/list_gnatsgo_products/list_gnatsgo_products.yaml new file mode 100644 index 00000000..a0231a99 --- /dev/null +++ b/ops/list_gnatsgo_products/list_gnatsgo_products.yaml @@ -0,0 +1,12 @@ +name: list_gnatsgo_products +inputs: + input_item: DataVibe +output: + gnatsgo_products: List[GNATSGOProduct] +parameters: +entrypoint: + file: list_gnatsgo_products.py + callback_builder: callback_builder +description: + short_description: + Lists gNATSGO products from Planetary Computer that intersect with input geometry. diff --git a/ops/list_gnatsgo_products/test_list_gnatsgo_products.py b/ops/list_gnatsgo_products/test_list_gnatsgo_products.py new file mode 100644 index 00000000..eb3e276c --- /dev/null +++ b/ops/list_gnatsgo_products/test_list_gnatsgo_products.py @@ -0,0 +1,61 @@ +import os +from datetime import datetime, timezone +from typing import List, cast +from unittest.mock import MagicMock, patch + +import pytest +from pystac import Asset, Item +from shapely.geometry import Point, mapping + +from vibe_core.data import DataVibe, GNATSGOProduct +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.planetary_computer import GNATSGOCollection + +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_gnatsgo_products.yaml") + +VALID_GEOMETRY = Point(-92.99900, 42.03580).buffer(0.1, cap_style=3) +INVALID_GEOMETRY = Point(-47.06966, -22.81709).buffer(0.1, cap_style=3) +FAKE_DATE = datetime(year=2020, month=7, day=1, tzinfo=timezone.utc) + + +def fake_items(): + assets = {f"{var}": Asset(href=f"fake_href_{var}") for var in GNATSGOCollection.asset_keys} + return [ + Item( + id="fake_id", # type: ignore + geometry=mapping(VALID_GEOMETRY), + bbox=VALID_GEOMETRY.bounds, # type: ignore + datetime=FAKE_DATE, + properties={}, + assets=assets, + ) + ] + + +@patch("vibe_lib.planetary_computer.get_available_collections", return_value=["gnatsgo-rasters"]) +@patch.object(GNATSGOCollection, "query") +def test_op(query: MagicMock, _: MagicMock): + query.return_value = fake_items() + + input_item = DataVibe("input_item", (FAKE_DATE, FAKE_DATE), VALID_GEOMETRY, []) # type: ignore + + op_tester = OpTester(CONFIG_PATH) + out = op_tester.run(input_item=input_item) + + assert query.call_args.kwargs["roi"] == VALID_GEOMETRY.bounds + + assert "gnatsgo_products" in out + products = cast(List[GNATSGOProduct], out["gnatsgo_products"]) + assert isinstance(products, list) + assert len(products) == 1 + + +@patch("vibe_lib.planetary_computer.get_available_collections", return_value=["gnatsgo-rasters"]) +@patch.object(GNATSGOCollection, "query") +def test_op_fails_invalid_geometry(query: MagicMock, _: MagicMock): + query.return_value = [] + input_item = DataVibe("input_item", (FAKE_DATE, FAKE_DATE), mapping(INVALID_GEOMETRY), []) + + op_tester = OpTester(CONFIG_PATH) + with pytest.raises(RuntimeError): + op_tester.run(input_item=input_item) diff --git a/ops/list_hansen_products/list_hansen_products.py b/ops/list_hansen_products/list_hansen_products.py new file mode 100644 index 00000000..5bc296ad --- /dev/null +++ b/ops/list_hansen_products/list_hansen_products.py @@ -0,0 +1,97 @@ +import hashlib +from datetime import datetime +from typing import Dict, List, cast +from urllib.parse import urljoin + +import geopandas as gpd + +from vibe_core.data import DataVibe, HansenProduct +from vibe_core.file_downloader import verify_url +from vibe_lib import glad + +DATASET_START_YEAR = 2000 + + +class CallbackBuilder: + def __init__( + self, + layer_name: str, + tile_geometry: str, + tiles_folder_url: str, + ): + self.layer_name = layer_name + self.tiles_gdf: gpd.GeoDataFrame = cast(gpd.GeoDataFrame, gpd.read_file(tile_geometry)) + # Base urls are expected to be in the format: + # 'https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/' + self.tiles_folder_url = tiles_folder_url + + # Make sure folder url ends with a slash + self.tiles_folder_url = ( + self.tiles_folder_url + if self.tiles_folder_url.endswith("/") + else f"{self.tiles_folder_url}/" + ) + + self.final_year = HansenProduct.extract_last_year(self.tiles_folder_url) + self.version = HansenProduct.extract_version(self.tiles_folder_url) + + # Create an asset template for the products, this will be used to check if the tif files are + # compatible to 'https://storage.googleapis.com/.../Hansen_GFC-2022-v1.10_50N_000E.tif' + template = f"Hansen_GFC-{self.final_year}-{self.version}_{{asset_key}}_{{tile_name}}.tif" + self.asset_template = urljoin(self.tiles_folder_url, template) + + def is_product_available(self, layer_name: str, tile_name: str) -> bool: + return verify_url(self.asset_template.format(asset_key=layer_name, tile_name=tile_name)) + + def validate_time_range(self, input_item: DataVibe): + start_year = input_item.time_range[0].year + if start_year != DATASET_START_YEAR: + raise ValueError( + f"Start year must be {DATASET_START_YEAR} for Hansen dataset " + f"version {self.version}-{self.final_year}, received {start_year}" + ) + + end_year = input_item.time_range[1].year + if end_year > self.final_year: + raise ValueError( + f"End year must be <= {self.final_year} for Hansen dataset " + f"version {self.version}-{self.final_year}, received {end_year}" + ) + + def __call__(self): + def list_hansen_products(input_item: DataVibe) -> Dict[str, List[HansenProduct]]: + self.validate_time_range(input_item) + geom_tiles = glad.intersecting_tiles(self.tiles_gdf, input_item.geometry) + + first_year = input_item.time_range[0].year + last_year = input_item.time_range[1].year + + out_hansen_products = [ + HansenProduct.clone_from( + input_item, + id=hashlib.sha256( + ( + f"hansen-product-{self.layer_name}-{tile_name}" + f"{first_year}-{last_year}-{self.version}" + ).encode() + ).hexdigest(), + assets=[], + time_range=(datetime(first_year, 1, 1), datetime(last_year, 12, 31)), + geometry=glad.get_tile_geometry(self.tiles_gdf, tile_name), + asset_url=self.asset_template.format( + asset_key=self.layer_name, tile_name=tile_name + ), + ) + for tile_name in geom_tiles + if self.is_product_available(self.layer_name, tile_name) + ] + + if len(out_hansen_products) == 0: + raise RuntimeError( + f"No Hansen products found for time range {input_item.time_range}" + f" and geometry {input_item.geometry}" + ) + + return {"hansen_products": out_hansen_products} + + return list_hansen_products diff --git a/ops/list_hansen_products/list_hansen_products.yaml b/ops/list_hansen_products/list_hansen_products.yaml new file mode 100644 index 00000000..7d6ca9b6 --- /dev/null +++ b/ops/list_hansen_products/list_hansen_products.yaml @@ -0,0 +1,40 @@ +name: list_hansen_products +inputs: + input_item: DataVibe +output: + hansen_products: List[HansenProduct] +parameters: + layer_name: + tile_geometry: /opt/terravibes/ops/resources/glad_tile_geometry/10d_tiles.geojson + tiles_folder_url: https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/ +entrypoint: + file: list_hansen_products.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - layer_name +description: + short_description: Lists Global Forest Change (Hansen) products that intersect the user-provided geometry/time range. + long_description: + The op will list Global Forest Change (Hansen) products that intersect the + user-provided geometry/time range. The dataset is available at 30m + resolution and is updated annually. The data contains information on forest + cover, loss, and gain. Full dataset details can be found at + https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/download.html. + sources: + input_item: User-provided geometry and time range. + sinks: + hansen_products: List of Global Forest Change (Hansen) products that intersect the user-provided geometry/time range. + parameters: + tiles_folder_url: + URL to the Global Forest Change (Hansen) dataset. It specifies the dataset + version and is used to download the data. The default value is + https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/ + with the dataset version GFC-2022-v1.10. + layer_name: + Name of the Global Forest Change (Hansen) layer. Can be any of the following names + 'treecover2000', 'loss', 'gain', 'lossyear', 'datamask', 'first', 'last'. + tile_geometry: + Path to the GeoJSON file containing the tile geometries. It is used to filter + the tiles that intersect the user-provided geometry/time range. As the Hansen dataset + uses the same tiling system as the GLAD dataset, the default value is the GLAD tile geometry. \ No newline at end of file diff --git a/ops/list_hansen_products/test_hansen_list.py b/ops/list_hansen_products/test_hansen_list.py new file mode 100644 index 00000000..b7367616 --- /dev/null +++ b/ops/list_hansen_products/test_hansen_list.py @@ -0,0 +1,143 @@ +import itertools +import os +from datetime import datetime +from typing import List +from unittest.mock import Mock, patch + +import pytest + +from vibe_core import file_downloader +from vibe_core.data import DataVibe +from vibe_core.data.products import HansenProduct +from vibe_dev.testing.op_tester import OpTester + +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_hansen_products.yaml") +DEFAULT_DATASET_FINAL_YEAR = 2022 +DEFAULT_DATASET_FOLDER = "https://storage.googleapis.com/earthenginepartners-hansen/" +DEFAULT_DATASET_VERSION = "v1.10" + +EXPECTED_TILES = { + "northwest": ["50N_110W", "50N_120W", "60N_110W", "60N_120W"], + "northeast": ["50N_060E", "50N_070E", "60N_060E", "60N_070E"], + "southwest": ["10S_060W", "10S_070W", "20S_060W", "20S_070W"], + "southeast": ["10S_010E", "00N_010E", "10S_020E", "00N_020E"], +} + +MOCK_TILES = set([tile_name for tile_list in EXPECTED_TILES.values() for tile_name in tile_list]) + + +def create_fake_datavibe(coordinates: List[List[float]]) -> DataVibe: + return DataVibe( + id=str("test_id"), + time_range=(datetime(2000, 1, 1), datetime(2022, 1, 1)), + geometry={ + "type": "Polygon", + "coordinates": [coordinates], + }, + assets=[], + ) + + +MOCK_INPUT_DICT = { + "northwest": create_fake_datavibe( + [ + [-115.0, 55.0], + [-105.0, 55.0], + [-105.0, 45.0], + [-115.0, 45.0], + ] + ), + "northeast": create_fake_datavibe( + [ + [75.0, 55.0], + [65.0, 55.0], + [65.0, 45.0], + [75.0, 45.0], + ] + ), + "southwest": create_fake_datavibe( + [ + [-65.0, -15.0], + [-55.0, -15.0], + [-55.0, -25.0], + [-65.0, -25.0], + ] + ), + "southeast": create_fake_datavibe( + [ + [15.0, -5.0], + [25.0, -5.0], + [25.0, -15.0], + [15.0, -15.0], + ] + ), +} + + +@patch.object(file_downloader, "verify_url") +@pytest.mark.parametrize( + "test_datavibe, expected_tiles, layer_name", + [ + (MOCK_INPUT_DICT[location], EXPECTED_TILES[location], asset_key) + for location, asset_key in itertools.product( + ["northwest", "northeast", "southwest", "southeast"], HansenProduct.asset_keys + ) + ], +) +def test_hansen_list( + verify: Mock, test_datavibe: DataVibe, expected_tiles: List[str], layer_name: str +): + # URLs are of the form: + # https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/Hansen_GFC-2022-v1.10_treecover2000_20N_090W.tif + def mock_verify(url: str): + return ( + url[-12:-4] in MOCK_TILES + and int(url.split("/")[-2].split("-")[1]) == DEFAULT_DATASET_FINAL_YEAR + ) + + verify.side_effect = mock_verify + op = OpTester(CONFIG_PATH) + op.update_parameters({"layer_name": layer_name}) + + output_data = op.run(input_item=test_datavibe) + assert output_data + assert "hansen_products" in output_data + + tiles = set([product.tile_name for product in output_data["hansen_products"]]) # type: ignore + assert all( + [ + product.layer_name == layer_name + for product in output_data["hansen_products"] # type: ignore + ] + ) + assert tiles == set(expected_tiles), f"Expected {expected_tiles}, got {tiles}" + assert all( + [ + product.last_year == DEFAULT_DATASET_FINAL_YEAR + for product in output_data["hansen_products"] # type: ignore + ] + ) + assert all( + [ + product.version == DEFAULT_DATASET_VERSION + for product in output_data["hansen_products"] # type: ignore + ] + ) + + for product in output_data["hansen_products"]: # type: ignore + expected_url = ( + f"{DEFAULT_DATASET_FOLDER}Hansen_GFC-2022-v1.10_{layer_name}_{product.tile_name}.tif" + ) + assert set(product.asset_url) == set(expected_url) + + +def test_hansen_invalid_years(): + op = OpTester(CONFIG_PATH) + test_datavibe = MOCK_INPUT_DICT["northwest"] + test_datavibe.time_range = (datetime(1999, 1, 1), datetime(2022, 1, 1)) + with pytest.raises(ValueError): + op.run(input_item=test_datavibe) + + test_datavibe.time_range = (datetime(2000, 1, 1), datetime(2023, 1, 1)) + with pytest.raises(ValueError): + op.run(input_item=test_datavibe) diff --git a/ops/list_herbie/list_herbie.py b/ops/list_herbie/list_herbie.py new file mode 100644 index 00000000..4509a70e --- /dev/null +++ b/ops/list_herbie/list_herbie.py @@ -0,0 +1,98 @@ +import hashlib +from datetime import datetime +from typing import Dict, List, Optional + +import numpy as np +import pandas as pd +from herbie import Herbie_latest + +from vibe_core.data import DataVibe, HerbieProduct + +N = 6 # latest file within the last N*frequecy hours + + +class CallbackBuilder: + def __init__( + self, + model: str, + product: str, + frequency: int, + search_text: str, + forecast_lead_times: Optional[List[int]] = None, + forecast_start_date: Optional[str] = None, + ): + if forecast_lead_times is not None and forecast_start_date is not None: + raise ValueError( + "You cannot specify 'forecast_lead_times' and" + " 'forecast_start_date' at the same time." + ) + self.model = model + self.product = product + self.frequency = frequency + self.forecast_lead_times = forecast_lead_times + self.search_text = search_text + self.forecast_start_date = forecast_start_date + + def _get_list(self, input_item: DataVibe): + start = input_item.time_range[0].replace(tzinfo=None) + end = input_item.time_range[1].replace(tzinfo=None) + if self.forecast_lead_times is None: + if self.forecast_start_date is None: + H = Herbie_latest(n=N, freq=f"{self.frequency}H", model=self.model) + latest = H.date.to_pydatetime() + else: + latest = datetime.strptime(self.forecast_start_date, "%Y-%m-%d %H:%M") + if end > latest or self.forecast_start_date is not None: + plist = [(t, 0) for t in pd.date_range(start, latest, freq=f"{self.frequency}H")] + r = len(pd.date_range(start, end, freq=f"{self.frequency}H")) + last = plist[-1][0] + plist += [ + (last, int(lead)) + for lead in (np.arange(1, r - len(plist) + 1) * self.frequency) + ] + else: + plist = [(t, 0) for t in pd.date_range(start, end, freq=f"{self.frequency}H")] + else: + plist = [ + (t, lead) + for t in pd.date_range(start, end, freq=f"{self.frequency}H") + for lead in range( + self.forecast_lead_times[0], + self.forecast_lead_times[1], + self.forecast_lead_times[2], + ) + ] + + return plist + + def __call__(self): + def list_herbie( + input_item: DataVibe, + ) -> Dict[str, List[HerbieProduct]]: + plist = self._get_list(input_item) + + products = [ + HerbieProduct.clone_from( + input_item, + hashlib.sha256( + ( + f"{self.model}-{self.product}-" + f"{lead}-{self.search_text}-" + f"{str(input_item.geometry)}-{str(t)}" + ).encode() + ).hexdigest(), + assets=[], + time_range=( + t.tz_localize(input_item.time_range[0].tzinfo), + t.tz_localize(input_item.time_range[0].tzinfo), + ), + model=self.model, + product=self.product, + lead_time_hours=lead, + search_text=self.search_text, + ) + for t, lead in plist + ] + return {"product": products} + + return list_herbie diff --git a/ops/list_herbie/list_herbie.yaml b/ops/list_herbie/list_herbie.yaml new file mode 100644 index 00000000..c94a96a2 --- /dev/null +++ b/ops/list_herbie/list_herbie.yaml @@ -0,0 +1,52 @@ +name: list_herbie +inputs: + input_item: DataVibe +output: + product: List[HerbieProduct] +parameters: + model: "hrrr" + product: "prs" + frequency: 1 + forecast_lead_times: + forecast_start_date: + search_text: "TMP:2 m" +entrypoint: + callback_builder: CallbackBuilder + file: list_herbie.py +dependencies: + parameters: + - model + - product + - frequency + - forecast_lead_times + - search_text +description: + short_description: + Lists herbie products. + parameters: + model: + Model name as defined in the models template folder. CASE INSENSITIVE Below are examples of model types + 'hrrr' HRRR contiguous United States model + 'hrrrak' HRRR Alaska model (alias 'alaska') + 'rap' RAP model + 'gfs' Global Forecast System (atmosphere) + 'gfs_wave' Global Forecast System (wave) + 'rrfs' Rapid Refresh Forecast System prototype + for more information see https://herbie.readthedocs.io/en/latest/user_guide/model_info.html + product: + Output variable product file type (sfc (surface fields), prs (pressure fields), nat (native fields), + subh (subhourly fields)). Not specifying this will use the first product in model template file. + frequency: frequency in hours of the forecast + forecast_lead_times: + Forecast lead time in the format [start_time, end_time, increment] (in hours). If this parameter is + None, then this op lists analysis (zero lead time) up to the latest analysis available, and from + that point it lists forecasts with progressively increasing lead times. + forecast_start_date: + latest datetime (in the format "%Y-%m-%d %H:%M") for which analysis (zero lead time) are listed. + After this datetime, this op lists forecasts with progressively increasing lead times. This parameter + must be set to None if 'forecast_lead_times' is used. + search_text: + It's a regular expression used to search on GRIB2 Index files and allow you to download just the layer + of the file required instead of complete file. + For more information on search_text refer to below url. + https://blaylockbk.github.io/Herbie/_build/html/user_guide/searchString.html \ No newline at end of file diff --git a/ops/list_landsat_products_pc/list_landsat_pc.py b/ops/list_landsat_products_pc/list_landsat_pc.py new file mode 100644 index 00000000..40483448 --- /dev/null +++ b/ops/list_landsat_products_pc/list_landsat_pc.py @@ -0,0 +1,38 @@ +from typing import Any, Dict, List + +from dateutil.parser import isoparse + +from vibe_core.data import DataVibe, LandsatProduct +from vibe_lib.planetary_computer import LandsatCollection + + +def convert_product(item: Dict[str, Any]) -> LandsatProduct: + date = isoparse(item["properties"]["datetime"]) + output = LandsatProduct( + id=str(item["id"]), + time_range=(date, date), + geometry=item["geometry"], + assets=[], + tile_id=str(item["id"]), + ) + + return output + + +def callback_builder(): + def list_landsat_products( + input_item: DataVibe, + ) -> Dict[str, List[LandsatProduct]]: + collection = LandsatCollection() + items = collection.query(roi=input_item.bbox, time_range=input_item.time_range) + + products = [convert_product(item.to_dict()) for item in items] + + if not products: + raise RuntimeError( + f"No product found for time range {input_item.time_range} " + f"and geometry {input_item.geometry}" + ) + return {"landsat_products": products} + + return list_landsat_products diff --git a/ops/list_landsat_products_pc/list_landsat_products_pc.yaml b/ops/list_landsat_products_pc/list_landsat_products_pc.yaml new file mode 100644 index 00000000..f441f7ac --- /dev/null +++ b/ops/list_landsat_products_pc/list_landsat_products_pc.yaml @@ -0,0 +1,11 @@ +name: list_landsat_products_pc +inputs: + input_item: DataVibe +output: + landsat_products: List[LandsatProduct] +parameters: +entrypoint: + file: list_landsat_pc.py + callback_builder: callback_builder +description: + short_description: Lists LANDSAT tiles that intersect with the input geometry and time range. \ No newline at end of file diff --git a/ops/list_modis_sr/list_modis_sr.py b/ops/list_modis_sr/list_modis_sr.py new file mode 100644 index 00000000..2611299f --- /dev/null +++ b/ops/list_modis_sr/list_modis_sr.py @@ -0,0 +1,34 @@ +from typing import Dict, List + +from dateutil.parser import parse +from pystac import Item +from shapely import geometry as shpg + +from vibe_core.data import DataVibe, ModisProduct +from vibe_lib.planetary_computer import Modis8DaySRCollection + + +def convert_product(item: Item, resolution: int) -> ModisProduct: + time_range = tuple(parse(item.properties[k]) for k in ("start_datetime", "end_datetime")) + assert item.geometry is not None, f"Item {item.id} is missing geometry field" + return ModisProduct( + id=item.id, geometry=item.geometry, time_range=time_range, assets=[], resolution=resolution + ) + + +def callback_builder(resolution: int): + available_res = Modis8DaySRCollection.collections.keys() + if resolution not in available_res: + raise ValueError(f"Valid resolutions are {available_res}, got {resolution}.") + + def callback(input_data: List[DataVibe]) -> Dict[str, List[ModisProduct]]: + collection = Modis8DaySRCollection(resolution) + items: Dict[str, Item] = {} + for input_datum in input_data: + input_geom = shpg.shape(input_datum.geometry) + datum_items = collection.query(geometry=input_geom, time_range=input_datum.time_range) + for i in datum_items: + items[i.id] = i + return {"modis_products": [convert_product(i, resolution) for i in items.values()]} + + return callback diff --git a/ops/list_modis_sr/list_modis_sr.yaml b/ops/list_modis_sr/list_modis_sr.yaml new file mode 100644 index 00000000..fa1b7956 --- /dev/null +++ b/ops/list_modis_sr/list_modis_sr.yaml @@ -0,0 +1,17 @@ +name: list_modis_sr +inputs: + input_data: List[DataVibe] +output: + modis_products: List[ModisProduct] +parameters: + resolution: 250 +dependencies: + parameters: + - resolution +entrypoint: + file: list_modis_sr.py + callback_builder: callback_builder +description: + short_description: + Lists MODIS 8-day surface reflectance rasters intersecting + with the input geometry and time range for desired resolution. \ No newline at end of file diff --git a/ops/list_modis_vegetation/list_modis_vegetation.py b/ops/list_modis_vegetation/list_modis_vegetation.py new file mode 100644 index 00000000..91c9ddf4 --- /dev/null +++ b/ops/list_modis_vegetation/list_modis_vegetation.py @@ -0,0 +1,34 @@ +from typing import Dict, List + +from dateutil.parser import parse +from pystac import Item +from shapely import geometry as shpg + +from vibe_core.data import DataVibe, ModisProduct +from vibe_lib.planetary_computer import Modis16DayVICollection + + +def convert_product(item: Item, resolution: int) -> ModisProduct: + time_range = tuple(parse(item.properties[k]) for k in ("start_datetime", "end_datetime")) + assert item.geometry is not None, f"Item {item.id} is missing geometry field" + return ModisProduct( + id=item.id, geometry=item.geometry, time_range=time_range, assets=[], resolution=resolution + ) + + +def callback_builder(resolution: int): + available_res = Modis16DayVICollection.collections.keys() + if resolution not in available_res: + raise ValueError(f"Valid resolutions are {available_res}, got {resolution}.") + + def callback(input_data: List[DataVibe]) -> Dict[str, List[ModisProduct]]: + collection = Modis16DayVICollection(resolution) + items: Dict[str, Item] = {} + for input_datum in input_data: + input_geom = shpg.shape(input_datum.geometry) + datum_items = collection.query(geometry=input_geom, time_range=input_datum.time_range) + for i in datum_items: + items[i.id] = i + return {"modis_products": [convert_product(i, resolution) for i in items.values()]} + + return callback diff --git a/ops/list_modis_vegetation/list_modis_vegetation.yaml b/ops/list_modis_vegetation/list_modis_vegetation.yaml new file mode 100644 index 00000000..d10af781 --- /dev/null +++ b/ops/list_modis_vegetation/list_modis_vegetation.yaml @@ -0,0 +1,15 @@ +name: list_modis_vegetation +inputs: + input_data: List[DataVibe] +output: + modis_products: List[ModisProduct] +parameters: + resolution: 250 +dependencies: + parameters: + - resolution +entrypoint: + file: list_modis_vegetation.py + callback_builder: callback_builder +description: + short_description: Lists MODIS vegetation products for input geometry, time range and resolution. \ No newline at end of file diff --git a/ops/list_modis_vegetation/test_list_modis_vegetation.py b/ops/list_modis_vegetation/test_list_modis_vegetation.py new file mode 100644 index 00000000..23bff6f4 --- /dev/null +++ b/ops/list_modis_vegetation/test_list_modis_vegetation.py @@ -0,0 +1,73 @@ +import os +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest +from pystac import Item +from shapely import geometry as shpg + +from vibe_core.data import DataVibe +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.planetary_computer import Modis16DayVICollection + +HERE = os.path.dirname(os.path.abspath(__file__)) + +FAKE_TIME_RANGE = (datetime(2020, 11, 1), datetime(2020, 11, 2)) +FAKE_GEOM = shpg.mapping(shpg.box(0, 0, 2, 2)) +INVALID_RESOLUTION = 100 + + +def fake_items(resolution: int): + return [ + Item( + id=f"{resolution}m-id", # type: ignore + geometry=FAKE_GEOM, + bbox=None, + datetime=None, + properties={ + "start_datetime": FAKE_TIME_RANGE[0].isoformat() + "Z", + "end_datetime": FAKE_TIME_RANGE[1].isoformat() + "Z", + }, + ) + ] + + +@pytest.mark.parametrize("resolution", (250, 500)) +@patch("vibe_lib.planetary_computer.get_available_collections") +@patch.object(Modis16DayVICollection, "query") +def test_op(query: MagicMock, get_collections: MagicMock, resolution: int): + query.return_value = fake_items(resolution) + get_collections.return_value = list(Modis16DayVICollection.collections.values()) + + geom1 = shpg.Point(1, 1).buffer(0.1, cap_style=3) + geom2 = shpg.Point(2, 2).buffer(0.1, cap_style=3) + time_range = (datetime(2022, 11, 1), datetime(2022, 11, 16)) + x1 = DataVibe(id="1", time_range=time_range, geometry=shpg.mapping(geom1), assets=[]) + x2 = DataVibe(id="2", time_range=time_range, geometry=shpg.mapping(geom2), assets=[]) + op_tester = OpTester(os.path.join(HERE, "list_modis_vegetation.yaml")) + op_tester.update_parameters({"resolution": resolution}) + o1 = op_tester.run(input_data=[x1]) + query.assert_called_with(geometry=geom1, time_range=x1.time_range) + get_collections.assert_called_once() + o2 = op_tester.run(input_data=[x2]) + query.assert_called_with(geometry=geom2, time_range=x2.time_range) + assert get_collections.call_count == 2 + o3 = op_tester.run(input_data=[x1, x2]) + assert get_collections.call_count == 3 + assert query.call_count == 4 + products = o1["modis_products"] + assert isinstance(products, list) + assert len(products) == 1 + product = products[0] + assert isinstance(product, DataVibe) + assert product.id == f"{resolution}m-id" + assert product.time_range == tuple(t.astimezone() for t in FAKE_TIME_RANGE) + assert product.geometry == FAKE_GEOM + assert o1 == o2 == o3 + + +def test_op_fails_invalid_res(): + op_tester = OpTester(os.path.join(HERE, "list_modis_vegetation.yaml")) + op_tester.update_parameters({"resolution": INVALID_RESOLUTION}) + with pytest.raises(ValueError): + op_tester.run(input_data=[]) diff --git a/ops/list_naip_products/list_naip_products.py b/ops/list_naip_products/list_naip_products.py new file mode 100644 index 00000000..ac22fea0 --- /dev/null +++ b/ops/list_naip_products/list_naip_products.py @@ -0,0 +1,45 @@ +# This operator receives a region and a date range and obtains the respective +# NAIP items, returning a list of NaipProduct. +from typing import Any, Dict, List, Tuple, cast + +from dateutil.parser import isoparse +from shapely.geometry import shape + +from vibe_core.data import DataVibe, NaipProduct +from vibe_lib.planetary_computer import NaipCollection + + +def convert_product(item: Dict[str, Any]) -> NaipProduct: + date = isoparse(item["properties"]["datetime"]) + output = NaipProduct( + id=str(item["id"]), + time_range=(date, date), + geometry=item["geometry"], + assets=[], + tile_id=str(item["id"]), + resolution=float(item["properties"]["gsd"]), + year=int(item["properties"]["naip:year"]), + ) + + return output + + +def list_naip_products(input_item: DataVibe) -> Dict[str, List[NaipProduct]]: + collection = NaipCollection() + input_geometry = shape(input_item.geometry) + time_range = input_item.time_range + bbox = cast(Tuple[Any, Any, Any, Any], input_geometry.bounds) + items = collection.query(roi=bbox, time_range=time_range) + products = [convert_product(item.to_dict()) for item in items] + + if not products: + raise RuntimeError( + f"No product found for time range {input_item.time_range} " + f"and geometry {input_item.geometry}" + ) + + return {"naip_products": products} + + +def callback_builder(): + return list_naip_products diff --git a/ops/list_naip_products/list_naip_products.yaml b/ops/list_naip_products/list_naip_products.yaml new file mode 100644 index 00000000..876f168d --- /dev/null +++ b/ops/list_naip_products/list_naip_products.yaml @@ -0,0 +1,11 @@ +name: list_naip_products +inputs: + input_item: DataVibe +output: + naip_products: List[NaipProduct] +parameters: +entrypoint: + file: list_naip_products.py + callback_builder: callback_builder +description: + short_description: Lists Naip tiles that intersect with input geometry and time range. \ No newline at end of file diff --git a/ops/list_naip_products/test_list_naip_products.py b/ops/list_naip_products/test_list_naip_products.py new file mode 100644 index 00000000..3162638b --- /dev/null +++ b/ops/list_naip_products/test_list_naip_products.py @@ -0,0 +1,30 @@ +import os +from datetime import datetime, timezone +from typing import List, cast + +from shapely.geometry import Polygon, box, mapping + +from vibe_core.data import DataVibe, DemProduct +from vibe_dev.testing.op_tester import OpTester + +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_naip_products.yaml") + + +def test_op(): + latitude = 42.21422 + longitude = -93.22890 + buffer = 0.001 + bbox = [longitude - buffer, latitude - buffer, longitude + buffer, latitude + buffer] + polygon: Polygon = box(*bbox, ccw=True) + start_date = datetime(year=2018, month=2, day=1, tzinfo=timezone.utc) + end_date = datetime(year=2021, month=2, day=11, tzinfo=timezone.utc) + input_item = DataVibe("input_item", (start_date, end_date), mapping(polygon), []) + + output_data = OpTester(CONFIG_PATH).run(input_item=input_item) + + # Get op result + output_name = "naip_products" + assert output_name in output_data + output_product = output_data[output_name] + assert isinstance(output_product, list) + assert len(cast(List[DemProduct], output_data["naip_products"])) == 1 diff --git a/ops/list_sentinel1_products/list_sentinel1_products_pc.py b/ops/list_sentinel1_products/list_sentinel1_products_pc.py new file mode 100644 index 00000000..d37b1ccb --- /dev/null +++ b/ops/list_sentinel1_products/list_sentinel1_products_pc.py @@ -0,0 +1,43 @@ +import logging +from typing import Dict, List + +import planetary_computer as pc +from shapely import geometry as shpg + +from vibe_core.data import DataVibe, Sentinel1Product +from vibe_lib.planetary_computer import ( + Sentinel1GRDCollection, + Sentinel1RTCCollection, + convert_to_s1_product, +) + +LOGGER = logging.getLogger(__name__) +COLLECTIONS = {"grd": Sentinel1GRDCollection, "rtc": Sentinel1RTCCollection} + + +def callback_builder(pc_key: str, collection: str): + collection = collection.lower() + if collection not in COLLECTIONS: + col_names = ", ".join(f"'{c}'" for c in COLLECTIONS) + raise ValueError( + f"Invalid Sentinel-1 collection '{collection}', expected one of {col_names}" + ) + + def list_sentinel1_products(input_item: DataVibe) -> Dict[str, List[Sentinel1Product]]: + pc.set_subscription_key(pc_key) + + input_range = input_item.time_range + input_geom = shpg.shape(input_item.geometry) + + col = COLLECTIONS[collection]() + items = col.query(geometry=input_geom, time_range=input_range) + LOGGER.debug(f"Planetary Computer query returned {len(items)} STAC items") + products = [convert_to_s1_product(item) for item in items] + if not products: + raise RuntimeError( + f"No product found for time range {input_range} and " + f"and geometry {input_item.geometry}" + ) + return {"sentinel_products": products} + + return list_sentinel1_products diff --git a/ops/list_sentinel1_products/list_sentinel1_products_pc.yaml b/ops/list_sentinel1_products/list_sentinel1_products_pc.yaml new file mode 100644 index 00000000..d1c3137c --- /dev/null +++ b/ops/list_sentinel1_products/list_sentinel1_products_pc.yaml @@ -0,0 +1,26 @@ +# List sentinel 1 products from the Planetary Computer. +name: list_sentinel_1_products_pc +inputs: + input_item: DataVibe +output: + sentinel_products: List[Sentinel1Product] +parameters: + pc_key: + collection: RTC +entrypoint: + file: list_sentinel1_products_pc.py + callback_builder: callback_builder +description: + short_description: List Sentinel-1 GRD or RTC products given geometry and time range. + long_description: + The op will query the Planetary Computer for all products that intersect with the input geometry + and are in the input time range. + inputs: + input_item: Input geometry and time range. + output: + sentinel_products: Sentinel-1 products available. + parameters: + pc_key: Planetary Computer API key. +dependencies: + parameters: + - collection diff --git a/ops/list_sentinel1_products/sample_pc_output.json b/ops/list_sentinel1_products/sample_pc_output.json new file mode 100644 index 00000000..94392656 --- /dev/null +++ b/ops/list_sentinel1_products/sample_pc_output.json @@ -0,0 +1 @@ +[{"type": "Feature", "stac_version": "1.0.0", "id": "S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD", "properties": {"datetime": "2020-05-08T14:13:07.609978Z", "platform": "SENTINEL-1B", "s1:shape": [25505, 20178], "end_datetime": "2020-05-08 14:13:22.734955+00:00", "constellation": "Sentinel-1", "s1:resolution": "high", "s1:datatake_id": "167133", "start_datetime": "2020-05-08 14:12:52.485002+00:00", "s1:orbit_source": "RESORB", "s1:slice_number": "7", "s1:total_slices": "7", "sar:looks_range": 5, "sat:orbit_state": "descending", "sar:product_type": "GRD", "sar:looks_azimuth": 1, "sar:polarizations": ["VV", "VH"], "sar:frequency_band": "C", "sat:absolute_orbit": 21491, "sat:relative_orbit": 115, "s1:processing_level": "1", "sar:instrument_mode": "IW", "sar:center_frequency": 5.405, "sar:resolution_range": 20, "s1:product_timeliness": "Fast-24h", "sar:resolution_azimuth": 22, "sar:pixel_spacing_range": 10, "sar:observation_direction": "right", "sar:pixel_spacing_azimuth": 10, "sar:looks_equivalent_number": 4.4, "s1:instrument_configuration_ID": "1", "sat:platform_international_designator": "2016-025A"}, "geometry": {"type": "Polygon", "coordinates": [[[-119.138582, 47.4179232], [-119.6290381, 47.4820619], [-119.9631034, 47.5244576], [-120.4712379, 47.586948], [-120.8119799, 47.6275103], [-121.3015743, 47.6839415], [-121.6379534, 47.721425], [-121.9638494, 47.7567655], [-122.4156312, 47.8041106], [-122.4564718, 47.624352], [-122.4992538, 47.4447036], [-122.538563, 47.2646975], [-122.6222922, 46.9052187], [-122.6659226, 46.7256663], [-122.7032974, 46.5454623], [-122.7460242, 46.3658217], [-122.7881959, 46.1861214], [-122.8040405, 46.1164173], [-122.3681793, 46.0690417], [-122.0458206, 46.0328638], [-121.5577475, 45.9762723], [-121.2321514, 45.9372979], [-120.7495731, 45.8777187], [-120.4216433, 45.8360035], [-120.0941551, 45.7933433], [-119.6196888, 45.7297424], [-119.5988331, 45.7992956], [-119.5475278, 45.9790858], [-119.5025768, 46.159743], [-119.4464383, 46.3389037], [-119.3933146, 46.5184848], [-119.3443344, 46.6986317], [-119.2915808, 46.8782849], [-119.2417411, 47.0583384], [-119.138582, 47.4179232]]]}, "links": [{"rel": "collection", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd", "type": "application/json"}, {"rel": "parent", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd", "type": "application/json"}, {"rel": "root", "href": "https://planetarycomputer.microsoft.com/api/stac/v1", "type": "application/json", "title": "Microsoft Planetary Computer STAC API"}, {"rel": "self", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD", "type": "application/geo+json"}, {"rel": "license", "href": "https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice"}, {"rel": "preview", "href": "https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD", "type": "text/html", "title": "Map of item"}], "assets": {"vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/measurement/iw-vh.tiff", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "VH: vertical transmit, horizontal receive", "description": "Amplitude of signal transmitted with vertical polarization and received with horizontal polarization with radiometric terrain correction applied.", "roles": ["data"]}, "vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/measurement/iw-vv.tiff", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "VV: vertical transmit, vertical receive", "description": "Amplitude of signal transmitted with vertical polarization and received with vertical polarization with radiometric terrain correction applied.", "roles": ["data"]}, "thumbnail": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/preview/quick-look.png", "type": "image/png", "title": "Preview Image", "description": "An averaged, decimated preview image in PNG format. Single polarisation products are represented with a grey scale image. Dual polarisation products are represented by a single composite colour image in RGB with the red channel (R) representing the co-polarisation VV or HH), the green channel (G) represents the cross-polarisation (VH or HV) and the blue channel (B) represents the ratio of the cross an co-polarisations.", "roles": ["thumbnail"]}, "safe-manifest": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/manifest.safe", "type": "application/xml", "title": "Manifest File", "description": "General product metadata in XML format. Contains a high-level textual description of the product and references to all of product's components, the product metadata, including the product identification and the resource references, and references to the physical location of each component file contained in the product.", "roles": ["metadata"]}, "schema-noise-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/annotation/calibration/noise-iw-vh.xml", "type": "application/xml", "title": "Noise Schema", "description": "Estimated thermal noise look-up tables", "roles": ["metadata"]}, "schema-noise-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/annotation/calibration/noise-iw-vv.xml", "type": "application/xml", "title": "Noise Schema", "description": "Estimated thermal noise look-up tables", "roles": ["metadata"]}, "schema-product-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/annotation/iw-vh.xml", "type": "application/xml", "title": "Product Schema", "description": "Describes the main characteristics corresponding to the band: state of the platform during acquisition, image properties, Doppler information, geographic location, etc.", "roles": ["metadata"]}, "schema-product-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/annotation/iw-vv.xml", "type": "application/xml", "title": "Product Schema", "description": "Describes the main characteristics corresponding to the band: state of the platform during acquisition, image properties, Doppler information, geographic location, etc.", "roles": ["metadata"]}, "schema-calibration-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/annotation/calibration/calibration-iw-vh.xml", "type": "application/xml", "title": "Calibration Schema", "description": "Calibration metadata including calibration information and the beta nought, sigma nought, gamma and digital number look-up tables that can be used for absolute product calibration.", "roles": ["metadata"]}, "schema-calibration-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/8/IW/DV/S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD_C1D0/annotation/calibration/calibration-iw-vv.xml", "type": "application/xml", "title": "Calibration Schema", "description": "Calibration metadata including calibration information and the beta nought, sigma nought, gamma and digital number look-up tables that can be used for absolute product calibration.", "roles": ["metadata"]}, "tilejson": {"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD&assets=vv&assets=vh&expression=vv%2Cvh%2Cvv%2Fvh&rescale=0%2C500&rescale=0%2C300&rescale=0%2C7&tile_format=png", "type": "application/json", "title": "TileJSON with default rendering", "roles": ["tiles"]}, "rendered_preview": {"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200508T141252_20200508T141322_021491_028CDD&assets=vv&assets=vh&expression=vv%2Cvh%2Cvv%2Fvh&rescale=0%2C500&rescale=0%2C300&rescale=0%2C7&tile_format=png", "type": "image/png", "title": "Rendered preview", "rel": "preview", "roles": ["overview"]}}, "bbox": [-122.80404053, 45.72974239, -119.13858201, 47.80411064], "stac_extensions": ["https://stac-extensions.github.io/sar/v1.0.0/schema.json", "https://stac-extensions.github.io/sat/v1.0.0/schema.json", "https://stac-extensions.github.io/eo/v1.0.0/schema.json"], "collection": "sentinel-1-grd"}, {"type": "Feature", "stac_version": "1.0.0", "id": "S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49", "properties": {"datetime": "2020-05-05T01:53:10.759983Z", "platform": "SENTINEL-1B", "s1:shape": [25998, 16696], "end_datetime": "2020-05-05 01:53:23.259048+00:00", "constellation": "Sentinel-1", "s1:resolution": "high", "s1:datatake_id": "166729", "start_datetime": "2020-05-05 01:52:58.260917+00:00", "s1:orbit_source": "RESORB", "s1:slice_number": "12", "s1:total_slices": "19", "sar:looks_range": 5, "sat:orbit_state": "ascending", "sar:product_type": "GRD", "sar:looks_azimuth": 1, "sar:polarizations": ["VV", "VH"], "sar:frequency_band": "C", "sat:absolute_orbit": 21440, "sat:relative_orbit": 64, "s1:processing_level": "1", "sar:instrument_mode": "IW", "sar:center_frequency": 5.405, "sar:resolution_range": 20, "s1:product_timeliness": "Fast-24h", "sar:resolution_azimuth": 22, "sar:pixel_spacing_range": 10, "sar:observation_direction": "right", "sar:pixel_spacing_azimuth": 10, "sar:looks_equivalent_number": 4.4, "s1:instrument_configuration_ID": "1", "sat:platform_international_designator": "2016-025A"}, "geometry": {"type": "Polygon", "coordinates": [[[-121.6939507, 45.4834836], [-121.2754269, 45.5398515], [-120.949968, 45.5825238], [-120.4593687, 45.6449796], [-120.1313604, 45.6854882], [-119.8026649, 45.7250812], [-119.3082196, 45.7827593], [-118.975129, 45.8203419], [-118.5406649, 45.8678048], [-118.5868532, 46.0469466], [-118.6229127, 46.2272559], [-118.6621848, 46.4072244], [-118.7004307, 46.5872996], [-118.7817933, 46.9469381], [-118.8214057, 47.1268714], [-118.8617592, 47.3067269], [-118.8773102, 47.3659107], [-119.3232525, 47.3187693], [-119.6604197, 47.281891], [-120.0036295, 47.2432888], [-120.4923616, 47.1864189], [-120.8394691, 47.1447147], [-121.3351084, 47.083203], [-121.678541, 47.0392535], [-122.1324173, 46.9794471], [-122.1136801, 46.9202853], [-122.0548751, 46.7414745], [-122.0097466, 46.5608568], [-121.9460919, 46.3827264], [-121.898986, 46.2023906], [-121.8458561, 46.0228714], [-121.8037581, 45.8418801], [-121.7493476, 45.6625688], [-121.6939507, 45.4834836]]]}, "links": [{"rel": "collection", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd", "type": "application/json"}, {"rel": "parent", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd", "type": "application/json"}, {"rel": "root", "href": "https://planetarycomputer.microsoft.com/api/stac/v1", "type": "application/json", "title": "Microsoft Planetary Computer STAC API"}, {"rel": "self", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49", "type": "application/geo+json"}, {"rel": "license", "href": "https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice"}, {"rel": "preview", "href": "https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49", "type": "text/html", "title": "Map of item"}], "assets": {"vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/measurement/iw-vh.tiff", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "VH: vertical transmit, horizontal receive", "description": "Amplitude of signal transmitted with vertical polarization and received with horizontal polarization with radiometric terrain correction applied.", "roles": ["data"]}, "vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/measurement/iw-vv.tiff", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "VV: vertical transmit, vertical receive", "description": "Amplitude of signal transmitted with vertical polarization and received with vertical polarization with radiometric terrain correction applied.", "roles": ["data"]}, "thumbnail": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/preview/quick-look.png", "type": "image/png", "title": "Preview Image", "description": "An averaged, decimated preview image in PNG format. Single polarisation products are represented with a grey scale image. Dual polarisation products are represented by a single composite colour image in RGB with the red channel (R) representing the co-polarisation VV or HH), the green channel (G) represents the cross-polarisation (VH or HV) and the blue channel (B) represents the ratio of the cross an co-polarisations.", "roles": ["thumbnail"]}, "safe-manifest": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/manifest.safe", "type": "application/xml", "title": "Manifest File", "description": "General product metadata in XML format. Contains a high-level textual description of the product and references to all of product's components, the product metadata, including the product identification and the resource references, and references to the physical location of each component file contained in the product.", "roles": ["metadata"]}, "schema-noise-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/annotation/calibration/noise-iw-vh.xml", "type": "application/xml", "title": "Noise Schema", "description": "Estimated thermal noise look-up tables", "roles": ["metadata"]}, "schema-noise-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/annotation/calibration/noise-iw-vv.xml", "type": "application/xml", "title": "Noise Schema", "description": "Estimated thermal noise look-up tables", "roles": ["metadata"]}, "schema-product-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/annotation/iw-vh.xml", "type": "application/xml", "title": "Product Schema", "description": "Describes the main characteristics corresponding to the band: state of the platform during acquisition, image properties, Doppler information, geographic location, etc.", "roles": ["metadata"]}, "schema-product-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/annotation/iw-vv.xml", "type": "application/xml", "title": "Product Schema", "description": "Describes the main characteristics corresponding to the band: state of the platform during acquisition, image properties, Doppler information, geographic location, etc.", "roles": ["metadata"]}, "schema-calibration-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/annotation/calibration/calibration-iw-vh.xml", "type": "application/xml", "title": "Calibration Schema", "description": "Calibration metadata including calibration information and the beta nought, sigma nought, gamma and digital number look-up tables that can be used for absolute product calibration.", "roles": ["metadata"]}, "schema-calibration-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/5/IW/DV/S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49_9AFC/annotation/calibration/calibration-iw-vv.xml", "type": "application/xml", "title": "Calibration Schema", "description": "Calibration metadata including calibration information and the beta nought, sigma nought, gamma and digital number look-up tables that can be used for absolute product calibration.", "roles": ["metadata"]}, "tilejson": {"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49&assets=vv&assets=vh&expression=vv%2Cvh%2Cvv%2Fvh&rescale=0%2C500&rescale=0%2C300&rescale=0%2C7&tile_format=png", "type": "application/json", "title": "TileJSON with default rendering", "roles": ["tiles"]}, "rendered_preview": {"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200505T015258_20200505T015323_021440_028B49&assets=vv&assets=vh&expression=vv%2Cvh%2Cvv%2Fvh&rescale=0%2C500&rescale=0%2C300&rescale=0%2C7&tile_format=png", "type": "image/png", "title": "Rendered preview", "rel": "preview", "roles": ["overview"]}}, "bbox": [-122.13241725, 45.48348356, -118.54066489, 47.3659107], "stac_extensions": ["https://stac-extensions.github.io/sar/v1.0.0/schema.json", "https://stac-extensions.github.io/sat/v1.0.0/schema.json", "https://stac-extensions.github.io/eo/v1.0.0/schema.json"], "collection": "sentinel-1-grd"}, {"type": "Feature", "stac_version": "1.0.0", "id": "S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93", "properties": {"datetime": "2020-05-03T14:04:57.359203Z", "platform": "SENTINEL-1B", "s1:shape": [25653, 17974], "end_datetime": "2020-05-03 14:05:10.836563+00:00", "constellation": "Sentinel-1", "s1:resolution": "high", "s1:datatake_id": "166547", "start_datetime": "2020-05-03 14:04:43.881843+00:00", "s1:orbit_source": "RESORB", "s1:slice_number": "10", "s1:total_slices": "10", "sar:looks_range": 5, "sat:orbit_state": "descending", "sar:product_type": "GRD", "sar:looks_azimuth": 1, "sar:polarizations": ["VV", "VH"], "sar:frequency_band": "C", "sat:absolute_orbit": 21418, "sat:relative_orbit": 42, "s1:processing_level": "1", "sar:instrument_mode": "IW", "sar:center_frequency": 5.405, "sar:resolution_range": 20, "s1:product_timeliness": "Fast-24h", "sar:resolution_azimuth": 22, "sar:pixel_spacing_range": 10, "sar:observation_direction": "right", "sar:pixel_spacing_azimuth": 10, "sar:looks_equivalent_number": 4.4, "s1:instrument_configuration_ID": "1", "sat:platform_international_designator": "2016-025A"}, "geometry": {"type": "Polygon", "coordinates": [[[-117.1764847, 47.1004535], [-117.6650564, 47.1646324], [-117.9970043, 47.2069601], [-118.3302859, 47.2484249], [-118.8303154, 47.3087035], [-119.3312506, 47.3667752], [-119.6699184, 47.4047223], [-120.0065821, 47.4414051], [-120.435373, 47.4865971], [-120.4887182, 47.3081555], [-120.5207375, 47.1274403], [-120.5578334, 46.9472389], [-120.5998539, 46.7675515], [-120.6408996, 46.5877646], [-120.6790306, 46.4076721], [-120.7273938, 46.2286521], [-120.7722768, 46.0492732], [-120.7820398, 45.9964776], [-120.3608571, 45.9506475], [-120.0272661, 45.9131756], [-119.6976008, 45.8751337], [-119.2090772, 45.8169084], [-118.8874917, 45.7773694], [-118.4078398, 45.7166092], [-117.9232097, 45.653044], [-117.6113535, 45.6109694], [-117.592719, 45.6630222], [-117.5411059, 45.8427277], [-117.4955474, 46.0232608], [-117.4441308, 46.2030132], [-117.384136, 46.381627], [-117.3291147, 46.5609233], [-117.2799694, 46.7410179], [-117.1764847, 47.1004535]]]}, "links": [{"rel": "collection", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd", "type": "application/json"}, {"rel": "parent", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd", "type": "application/json"}, {"rel": "root", "href": "https://planetarycomputer.microsoft.com/api/stac/v1", "type": "application/json", "title": "Microsoft Planetary Computer STAC API"}, {"rel": "self", "href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93", "type": "application/geo+json"}, {"rel": "license", "href": "https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice"}, {"rel": "preview", "href": "https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93", "type": "text/html", "title": "Map of item"}], "assets": {"vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/measurement/iw-vh.tiff", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "VH: vertical transmit, horizontal receive", "description": "Amplitude of signal transmitted with vertical polarization and received with horizontal polarization with radiometric terrain correction applied.", "roles": ["data"]}, "vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/measurement/iw-vv.tiff", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "VV: vertical transmit, vertical receive", "description": "Amplitude of signal transmitted with vertical polarization and received with vertical polarization with radiometric terrain correction applied.", "roles": ["data"]}, "thumbnail": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/preview/quick-look.png", "type": "image/png", "title": "Preview Image", "description": "An averaged, decimated preview image in PNG format. Single polarisation products are represented with a grey scale image. Dual polarisation products are represented by a single composite colour image in RGB with the red channel (R) representing the co-polarisation VV or HH), the green channel (G) represents the cross-polarisation (VH or HV) and the blue channel (B) represents the ratio of the cross an co-polarisations.", "roles": ["thumbnail"]}, "safe-manifest": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/manifest.safe", "type": "application/xml", "title": "Manifest File", "description": "General product metadata in XML format. Contains a high-level textual description of the product and references to all of product's components, the product metadata, including the product identification and the resource references, and references to the physical location of each component file contained in the product.", "roles": ["metadata"]}, "schema-noise-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/annotation/calibration/noise-iw-vh.xml", "type": "application/xml", "title": "Noise Schema", "description": "Estimated thermal noise look-up tables", "roles": ["metadata"]}, "schema-noise-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/annotation/calibration/noise-iw-vv.xml", "type": "application/xml", "title": "Noise Schema", "description": "Estimated thermal noise look-up tables", "roles": ["metadata"]}, "schema-product-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/annotation/iw-vh.xml", "type": "application/xml", "title": "Product Schema", "description": "Describes the main characteristics corresponding to the band: state of the platform during acquisition, image properties, Doppler information, geographic location, etc.", "roles": ["metadata"]}, "schema-product-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/annotation/iw-vv.xml", "type": "application/xml", "title": "Product Schema", "description": "Describes the main characteristics corresponding to the band: state of the platform during acquisition, image properties, Doppler information, geographic location, etc.", "roles": ["metadata"]}, "schema-calibration-vh": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/annotation/calibration/calibration-iw-vh.xml", "type": "application/xml", "title": "Calibration Schema", "description": "Calibration metadata including calibration information and the beta nought, sigma nought, gamma and digital number look-up tables that can be used for absolute product calibration.", "roles": ["metadata"]}, "schema-calibration-vv": {"href": "https://sentinel1euwest.blob.core.windows.net/s1-grd/GRD/2020/5/3/IW/DV/S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93_3AA0/annotation/calibration/calibration-iw-vv.xml", "type": "application/xml", "title": "Calibration Schema", "description": "Calibration metadata including calibration information and the beta nought, sigma nought, gamma and digital number look-up tables that can be used for absolute product calibration.", "roles": ["metadata"]}, "tilejson": {"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93&assets=vv&assets=vh&expression=vv%2Cvh%2Cvv%2Fvh&rescale=0%2C500&rescale=0%2C300&rescale=0%2C7&tile_format=png", "type": "application/json", "title": "TileJSON with default rendering", "roles": ["tiles"]}, "rendered_preview": {"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=sentinel-1-grd&item=S1B_IW_GRDH_1SDV_20200503T140443_20200503T140510_021418_028A93&assets=vv&assets=vh&expression=vv%2Cvh%2Cvv%2Fvh&rescale=0%2C500&rescale=0%2C300&rescale=0%2C7&tile_format=png", "type": "image/png", "title": "Rendered preview", "rel": "preview", "roles": ["overview"]}}, "bbox": [-120.78203976, 45.61096937, -117.17648474, 47.48659707], "stac_extensions": ["https://stac-extensions.github.io/sar/v1.0.0/schema.json", "https://stac-extensions.github.io/sat/v1.0.0/schema.json", "https://stac-extensions.github.io/eo/v1.0.0/schema.json"], "collection": "sentinel-1-grd"}] \ No newline at end of file diff --git a/ops/list_sentinel1_products/test_list_sentinel1.py b/ops/list_sentinel1_products/test_list_sentinel1.py new file mode 100644 index 00000000..d0c2c9c8 --- /dev/null +++ b/ops/list_sentinel1_products/test_list_sentinel1.py @@ -0,0 +1,99 @@ +import json +import os +from datetime import datetime, timezone +from typing import List +from unittest.mock import Mock, patch + +import pytest +from pystac import Item +from shapely import geometry as shpg + +from vibe_core.data import DataVibe, Sentinel1Product +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.planetary_computer import Sentinel1GRDCollection, Sentinel1RTCCollection + +HERE = os.path.dirname(os.path.abspath(__file__)) +CONFIG_PATH_PC = os.path.join(HERE, "list_sentinel1_products_pc.yaml") + + +@pytest.fixture +def fake_items_pc(): + filepath = os.path.join(HERE, "sample_pc_output.json") + with open(filepath) as f: + out = json.load(f) + return [Item.from_dict(i) for i in out] + + +@pytest.fixture +def input_data(): + polygon_coords = [ + (-118.8415739999999943, 46.7963099999999983), + (-118.6759440000000012, 46.7963099999999983), + (-118.6759440000000012, 46.9169079999999994), + (-118.8415739999999943, 46.9169079999999994), + (-118.8415739999999943, 46.7963099999999983), + ] + + geom = shpg.Polygon(polygon_coords) + start_date = datetime(year=2021, month=7, day=10, tzinfo=timezone.utc) + end_date = datetime(year=2021, month=7, day=28, tzinfo=timezone.utc) + return DataVibe("input_test_data", (start_date, end_date), shpg.mapping(geom), []) + + +def compare_product_with_stac(product: Sentinel1Product, stac_item: Item): + assert product.geometry == stac_item.geometry + assert product.id == stac_item.id + assert product.time_range[0] == stac_item.datetime + + +@patch("vibe_lib.planetary_computer.get_available_collections") +@patch.object(Sentinel1GRDCollection, "query") +def test_list_pc( + query: Mock, get_collections: Mock, fake_items_pc: List[Item], input_data: DataVibe +): + query.return_value = fake_items_pc + get_collections.return_value = [Sentinel1GRDCollection.collection] + + op_tester = OpTester(CONFIG_PATH_PC) + op_tester.update_parameters({"collection": "grd"}) + output_data = op_tester.run(input_item=input_data) + + # Get op result + output_name = "sentinel_products" + assert output_name in output_data + products = output_data[output_name] + assert isinstance(products, list) + assert len(products) == 3 + get_collections.assert_called_once() + query.assert_called_once_with( + geometry=shpg.shape(input_data.geometry), time_range=input_data.time_range + ) + for p, i in zip(products, fake_items_pc): + assert isinstance(p, Sentinel1Product) + compare_product_with_stac(p, i) + + +@patch("vibe_lib.planetary_computer.get_available_collections") +@patch.object(Sentinel1RTCCollection, "query") +def test_list_rtc( + query: Mock, get_collections: Mock, fake_items_pc: List[Item], input_data: DataVibe +): + query.return_value = fake_items_pc + get_collections.return_value = [Sentinel1RTCCollection.collection] + + op_tester = OpTester(CONFIG_PATH_PC) + output_data = op_tester.run(input_item=input_data) + + # Get op result + output_name = "sentinel_products" + assert output_name in output_data + products = output_data[output_name] + assert isinstance(products, list) + assert len(products) == 3 + get_collections.assert_called_once() + query.assert_called_once_with( + geometry=shpg.shape(input_data.geometry), time_range=input_data.time_range + ) + for p, i in zip(products, fake_items_pc): + assert isinstance(p, Sentinel1Product) + compare_product_with_stac(p, i) diff --git a/ops/list_sentinel2_products/list_s2_pc.py b/ops/list_sentinel2_products/list_s2_pc.py new file mode 100644 index 00000000..d4c7589c --- /dev/null +++ b/ops/list_sentinel2_products/list_s2_pc.py @@ -0,0 +1,27 @@ +from concurrent.futures import ThreadPoolExecutor +from typing import Dict, List + +from vibe_core.data import DataVibe, Sentinel2Product +from vibe_lib.planetary_computer import Sentinel2Collection, convert_to_s2_product + + +def callback_builder(num_workers: int): + def list_sentinel_2_products( + input_item: DataVibe, + ) -> Dict[str, List[Sentinel2Product]]: + collection = Sentinel2Collection() + items = collection.query(roi=input_item.bbox, time_range=input_item.time_range) + + # We convert products in parallel otherwise this becomes a huge + # bottleneck due to needing to fetch the absolute orbit from the SAFE file + with ThreadPoolExecutor(max_workers=num_workers) as executor: + products = list(executor.map(convert_to_s2_product, items)) + + if not products: + raise RuntimeError( + f"No product found for time range {input_item.time_range} " + f"and geometry {input_item.geometry}" + ) + return {"sentinel_products": products} + + return list_sentinel_2_products diff --git a/ops/list_sentinel2_products/list_sentinel2_products_pc.yaml b/ops/list_sentinel2_products/list_sentinel2_products_pc.yaml new file mode 100644 index 00000000..a3b4ac6b --- /dev/null +++ b/ops/list_sentinel2_products/list_sentinel2_products_pc.yaml @@ -0,0 +1,12 @@ +name: list_sentinel2_products_pc +inputs: + input_item: DataVibe +output: + sentinel_products: List[Sentinel2Product] +parameters: + num_workers: 24 +entrypoint: + file: list_s2_pc.py + callback_builder: callback_builder +description: + short_description: Lists Sentinel-2 products that intersect with input geometry and time range. \ No newline at end of file diff --git a/ops/list_to_sequence/list_to_sequence.py b/ops/list_to_sequence/list_to_sequence.py new file mode 100644 index 00000000..845d924e --- /dev/null +++ b/ops/list_to_sequence/list_to_sequence.py @@ -0,0 +1,39 @@ +import hashlib +from datetime import datetime +from typing import Any, Dict, List, Tuple + +from shapely.geometry import mapping, shape +from shapely.ops import unary_union + +from vibe_core.data import Raster +from vibe_core.data.rasters import RasterSequence + + +def time_range_union(list_rasters: List[Raster]) -> Tuple[datetime, datetime]: + return ( + min([r.time_range[0] for r in list_rasters]), + max([r.time_range[1] for r in list_rasters]), + ) + + +def geometry_union(list_rasters: List[Raster]) -> Dict[str, Any]: + return mapping(unary_union([shape(r.geometry) for r in list_rasters])) + + +def callback_builder(): + def callback(list_rasters: List[Raster]) -> Dict[str, RasterSequence]: + res = RasterSequence.clone_from( + list_rasters[0], + id=hashlib.sha256( + ("sequence" + "".join(r.id for r in list_rasters)).encode() + ).hexdigest(), + time_range=time_range_union(list_rasters), + geometry=geometry_union(list_rasters), + assets=[], + ) + for r in list_rasters: + res.add_item(r) + + return {"rasters_seq": res} + + return callback diff --git a/ops/list_to_sequence/list_to_sequence.yaml b/ops/list_to_sequence/list_to_sequence.yaml new file mode 100644 index 00000000..a7aacdfb --- /dev/null +++ b/ops/list_to_sequence/list_to_sequence.yaml @@ -0,0 +1,15 @@ +name: list_to_sequence +inputs: + list_rasters: List[Raster] +output: + rasters_seq: RasterSequence +parameters: +dependencies: +entrypoint: + file: list_to_sequence.py + callback_builder: callback_builder +description: + short_description: Combines a list of Rasters into a RasterSequence. + long_description: > + The union of the geometries and time ranges of the input rasters are stored as the metadata of + the output RasterSequence. \ No newline at end of file diff --git a/ops/list_to_sequence/test_list_to_sequence.py b/ops/list_to_sequence/test_list_to_sequence.py new file mode 100644 index 00000000..80c5add7 --- /dev/null +++ b/ops/list_to_sequence/test_list_to_sequence.py @@ -0,0 +1,107 @@ +import mimetypes +import os +from datetime import datetime, timezone +from typing import List, Tuple + +import pytest +from shapely.geometry import Polygon, box, mapping, shape + +from vibe_core.data import AssetVibe, Raster, RasterSequence, gen_guid +from vibe_dev.testing.op_tester import OpTester + +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "list_to_sequence.yaml") + +# Geometries +WORLD_GEOM = box(-90, -180, 90, 180) +WESTERN_HEMS_GEOM = box(-90, -180, 90, 0.0) +EASTERN_HEMS_GEOM = box(-90, 0.0, 90, 180) +NORTHERN_HEMS_GEOM = box(0.0, -180, 90, 180) +SOUTHERN_HEMS_GEOM = box(-90, -180, 0.0, 180) +NW_REGION_GEOM = box(0.0, -180, 90, 0.0) +FAKE_GEOMETRY = box(-5.0, -5.0, -1.0, -1.0) # SW + +# Time ranges +FAKE_TIME_RANGE = (datetime.now(tz=timezone.utc), datetime.now(tz=timezone.utc)) +TR_1900s = ( + datetime(1900, 1, 1, tzinfo=timezone.utc), + datetime(1999, 12, 31, tzinfo=timezone.utc), +) +TR_1990s = ( + datetime(1990, 1, 1, tzinfo=timezone.utc), + datetime(1999, 12, 31, tzinfo=timezone.utc), +) +TR_2000s = ( + datetime(2000, 1, 1, tzinfo=timezone.utc), + datetime(2009, 12, 31, tzinfo=timezone.utc), +) +TR_1900s_2000s = ( + datetime(1900, 1, 1, tzinfo=timezone.utc), + datetime(2009, 12, 31, tzinfo=timezone.utc), +) + + +def create_raster(geometry: Polygon, time_range: Tuple[datetime, datetime]) -> Raster: + return Raster( + id=gen_guid(), + time_range=time_range, + geometry=mapping(geometry), + assets=[AssetVibe(reference="", type=mimetypes.types_map[".tif"], id=gen_guid())], + bands={}, + ) + + +@pytest.mark.parametrize( + "input_geometry_list, input_time_range_list, expected_geometry", + [ + ([NORTHERN_HEMS_GEOM, SOUTHERN_HEMS_GEOM], [FAKE_TIME_RANGE] * 2, WORLD_GEOM), + ([WESTERN_HEMS_GEOM, EASTERN_HEMS_GEOM], [FAKE_TIME_RANGE] * 2, WORLD_GEOM), + ([WESTERN_HEMS_GEOM, NW_REGION_GEOM], [FAKE_TIME_RANGE] * 2, WESTERN_HEMS_GEOM), + ([FAKE_GEOMETRY], [FAKE_TIME_RANGE], FAKE_GEOMETRY), + ], +) +def test_geometry_combination( + input_geometry_list: List[Polygon], + input_time_range_list: List[Tuple[datetime, datetime]], + expected_geometry: Polygon, +): + rasters = [ + create_raster(geometry, tr) + for geometry, tr in zip(input_geometry_list, input_time_range_list) + ] + + op_tester = OpTester(CONFIG_PATH) + output_data = op_tester.run(list_rasters=rasters) # type: ignore + + # Get op result + output_name = "rasters_seq" + assert output_name in output_data + output_seq = output_data[output_name] + assert type(output_seq) is RasterSequence + assert len(output_seq.asset_geometry) == len(rasters) + assert expected_geometry.equals(shape(output_seq.geometry)) + + +@pytest.mark.parametrize( + "input_time_range_list, expected_time_range", + [ + ([TR_1900s, TR_2000s], TR_1900s_2000s), + ([TR_1900s, TR_1990s], TR_1900s), + ([FAKE_TIME_RANGE], FAKE_TIME_RANGE), + ], +) +def test_time_range_combination( + input_time_range_list: List[Tuple[datetime, datetime]], + expected_time_range: Tuple[datetime, datetime], +): + rasters = [create_raster(FAKE_GEOMETRY, time_range) for time_range in input_time_range_list] + + op_tester = OpTester(CONFIG_PATH) + output_data = op_tester.run(list_rasters=rasters) # type: ignore + + # Get op result + output_name = "rasters_seq" + assert output_name in output_data + output_seq = output_data[output_name] + assert type(output_seq) is RasterSequence + assert len(output_seq.asset_time_range) == len(rasters) + assert output_seq.time_range == expected_time_range diff --git a/ops/match_raster_to_ref/match_raster_to_ref.py b/ops/match_raster_to_ref/match_raster_to_ref.py new file mode 100644 index 00000000..be7fc269 --- /dev/null +++ b/ops/match_raster_to_ref/match_raster_to_ref.py @@ -0,0 +1,42 @@ +import logging +from tempfile import TemporaryDirectory +from typing import Dict + +from rasterio.enums import Resampling + +from vibe_core.data import Raster, gen_guid +from vibe_lib.raster import load_raster_match, save_raster_to_asset + +LOGGER = logging.getLogger(__name__) + + +class CallbackBuilder: + def __init__(self, resampling: str): + self.tmp_dir = TemporaryDirectory() + self.resampling: Resampling = getattr(Resampling, resampling) + + def __call__(self): + def operator_callback(raster: Raster, ref_raster: Raster) -> Dict[str, Raster]: + raster_ar = load_raster_match( + raster, match_raster=ref_raster, resampling=self.resampling + ) + asset = save_raster_to_asset(raster_ar, self.tmp_dir.name) + assets = [asset] + try: + assets.append(raster.visualization_asset) + except ValueError as e: + LOGGER.warning(f"Visualization asset not found {e}") + + out_raster = Raster.clone_from( + src=raster, + id=gen_guid(), + geometry=ref_raster.geometry, + assets=assets, + ) + + return {"output_raster": out_raster} + + return operator_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/match_raster_to_ref/match_raster_to_ref.yaml b/ops/match_raster_to_ref/match_raster_to_ref.yaml new file mode 100644 index 00000000..54abd4ba --- /dev/null +++ b/ops/match_raster_to_ref/match_raster_to_ref.yaml @@ -0,0 +1,21 @@ +# This operator reprojects the input `raster` to the `ref_raster` CRS. +# It also resamples and clips the input `raster` to obtain the same +# extension and number of rows/columns. The output `output_raster` +# has the same number of bands as the input `raster`. +name: match_raster_to_ref +inputs: + raster: Raster + ref_raster: Raster +output: + output_raster: Raster +parameters: + resampling: bilinear +entrypoint: + file: match_raster_to_ref.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - resampling +description: + short_description: + Resamples the input `raster` to match the grid of `ref_raster`. diff --git a/ops/merge_cloud_masks/merge_cloud_masks.py b/ops/merge_cloud_masks/merge_cloud_masks.py new file mode 100644 index 00000000..d7715a7a --- /dev/null +++ b/ops/merge_cloud_masks/merge_cloud_masks.py @@ -0,0 +1,402 @@ +import datetime +import gc +import mimetypes +import os +from itertools import chain +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Tuple, cast + +import numpy as np +from numpy.typing import NDArray +from osgeo import gdal, gdalconst +from skimage.measure import label, regionprops +from skimage.morphology import binary_dilation, disk + +from vibe_core.data import AssetVibe, Sentinel2CloudMask, Sentinel2CloudProbability, gen_guid +from vibe_lib.raster import load_raster_from_url +from vibe_lib.spaceeye.utils import find_s2_product + +TileData = List[Tuple[Sentinel2CloudMask, Sentinel2CloudProbability]] + + +def write_tiff( + x: NDArray[Any], + tiff_file: str, + ref_file: str, + gdal_type: int = gdalconst.GDT_Float32, + predictor: int = 3, +): + """ + USAGE: write_tiff(array, tiff_file, ref_file) + Use predictor=3 for float types and predictor=2 for integer types. + """ + gtiff_flags = [ + "COMPRESS=ZSTD", # also LZW and DEFLATE works well + "ZSTD_LEVEL=9", # should be between 1-22, and 22 is highest compression. + # 9 is default and gets essentially the same compression-rate + "PREDICTOR=%d" % predictor, # default is 1, use 2 for ints, and 3 for floats + "TILED=YES", # so that we can read sub-arrays efficiently + "BIGTIFF=YES", # in case resulting file is >4GB + ] + + assert x.ndim == 2 or x.ndim == 3 + if x.ndim == 3: + nx, ny, nbands = x.shape + else: + nx, ny = x.shape + nbands = 1 + + if not os.path.exists(ref_file): + raise (FileNotFoundError("<%s> doesn't exist" % ref_file)) + ds = gdal.Open(ref_file) + if (ds.RasterYSize != nx) and (ds.RasterXSize != ny): + print("Size mismatch between reference file and input array") + print("x: %s, ref_file: %d, %d" % (x.shape, ds.RasterYSize, ds.RasterXSize)) + + outDrv = gdal.GetDriverByName("GTiff") + out = outDrv.Create(tiff_file, ny, nx, nbands, gdal_type, gtiff_flags) + out.SetProjection(ds.GetProjection()) + out.SetGeoTransform(ds.GetGeoTransform()) + if x.ndim == 3: + for i in range(nbands): + out.GetRasterBand(i + 1).WriteArray(x[:, :, i]) + else: + out.GetRasterBand(1).WriteArray(x) + out.FlushCache() + del out # guarantee the flush + del ds + + +def read_s2_bands( + tif_file: str, bands: List[int], transpose: bool = False, dtype: type = np.uint16 +) -> NDArray[Any]: + """ + USAGE: x = read_s2_bands(s2_file, [2,3,4]) + The command above reads in the RGB bands of the sentinel-2 tif file. + """ + ds = gdal.Open(tif_file) + nb = ds.RasterCount + nx = ds.RasterYSize + ny = ds.RasterXSize + for i in bands: + if i >= nb: + print("Band %d does not exist, only %d bands in %s" % (i, nb, tif_file)) + assert i < nb + if not transpose: + x = np.zeros((len(bands), nx, ny), dtype=dtype) + for i, b in enumerate(bands): + band = ds.GetRasterBand(b + 1) + x[i, :, :] = band.ReadAsArray() + else: + x = np.zeros((nx, ny, len(bands)), dtype=dtype) + for i, b in enumerate(bands): + band = ds.GetRasterBand(b + 1) + x[:, :, i] = band.ReadAsArray() + return x + + +def compute_missing_mask(s2_file: str, dilation: int = 1): + # TCI is no longer explicitly stored + bands_10m = read_s2_bands(s2_file, [1, 2, 3, 7]) + + # A dicey proposition, but it seems like 0 == NO_DATA in all bands. + missing_mask = np.min(bands_10m, axis=0) == 0 + + # Takes lots of memory, free up fast + del bands_10m + + # Try hard to free it up + gc.collect() + + # Compute missing mask using binary dilation + if dilation > 1 and np.max(missing_mask) == 0: + selem = disk(dilation) + missing_mask = binary_dilation(missing_mask, selem) + + return missing_mask + + +def kill_labels_(clabel: NDArray[Any], min_area: int) -> List[Any]: + """ + USAGE: kill_list = kill_labels(clabel, min_area) + Make a list of regions with area below min_area and return the list of regions. + """ + props = regionprops(clabel) + kill_list = [] + for p in props: + if p.area < min_area: + kill_list.append(p.label) + return kill_list + + +def remove_small_components(cmask: NDArray[Any], min_area: int = 400): + """ + USAGE: new_mask = remove_small_components(cmask, min_area=400) + First removes small connected cloud components, then fill in small + connected holes in clouds to make for a smoother cloud mask. + """ + assert cmask.ndim == 2 + cm2_comp = label(cmask) # remove small clouds + tmp = cmask.copy() + + kill_list = kill_labels_(cm2_comp, min_area) # type: ignore + small_clouds = np.isin(cm2_comp, kill_list) # type: ignore + + tmp[small_clouds] = False + cm2_inv = label(~tmp) # fill small holes in clouds + kill_list = kill_labels_(cm2_inv, min_area) # type: ignore + small_cloud_holes = np.isin(cm2_inv, kill_list) # type: ignore + tmp[small_cloud_holes] = True + + return tmp + + +def shift_arr( + cloud_probs: List[str], + cloud_masks: List[str], + T: int, + w2: int, + cm1_arr: List[NDArray[Any]], + cm2_arr: List[NDArray[Any]], + min_prob: float, +) -> Tuple[List[NDArray[Any]], List[NDArray[Any]]]: + """ + USAGE: cm1_arr, cm2_arr = shift_arr(s2_files, T, w2, cm1_arr, cm2_arr, min_prob) + Remove the first mask in the cm1_arr and cm2_arr and read the next masks in. + This is used to maintain a window (in time) of cloud-masks without having to read + in masks that have already been read in. + """ + c1_new = [cm1_arr[i + 1] for i in range(2 * T)] + c2_new = [cm2_arr[i + 1] for i in range(2 * T)] + + cm1, cm2 = load_cloud_masks(cloud_probs[w2], cloud_masks[w2], min_prob) + c1_new.append(cm1) + c2_new.append(cm2) + + return c1_new, c2_new + + +def compute_mask_with_missing_clouds( + cm1_arr: List[NDArray[Any]], + cm2_arr: List[NDArray[Any]], + idx: int, + max_extra_cloud: float, + min_area: int, + dilation: int, +) -> NDArray[Any]: + cm1 = np.dstack(cm1_arr) + cm2 = np.dstack(cm2_arr) + x = np.sum(np.logical_and(cm2, np.logical_not(cm1)), axis=2) + suspect = np.logical_and(x > max_extra_cloud, cm2[:, :, idx]) + suspect = np.logical_and(suspect, np.logical_not(cm1[:, :, idx])) + + new_mask = cm2[:, :, idx].copy() + new_mask[suspect] = cm1[suspect, idx] # i.e. = False + + new_mask = remove_small_components(new_mask, min_area=min_area) + old_mask = cm1[:, :, idx] + # don't switch off clouds in original built in mask + new_mask = np.logical_or(old_mask, new_mask) + + if dilation > 1: + selem = disk(dilation) + new_mask = binary_dilation(new_mask, selem) + + return new_mask + + +def fill_missing_pixels(ref_file: str, new_mask: NDArray[Any], tmp_dir: str) -> str: + """ + Since part of the region may be outside the footprint of the orbit + we need to handle missing pixels in some way. Here we choose to + simply mark them as clouds and let the reconstruction algorithm + handle it. We detect missing pixels by looking for TCI pixels where + the RGB bands are all zero. + """ + + # Add missing pixels as clouds + out_file = os.path.join(tmp_dir, f"{gen_guid()}.tif") + write_tiff( + new_mask.astype(np.uint8), out_file, ref_file, gdal_type=gdalconst.GDT_Byte, predictor=2 + ) + + return out_file + + +def load_cloud_masks( + cloudless_prob_path: str, l1c_cloud_path: str, min_prob: float +) -> Tuple[NDArray[Any], NDArray[Any]]: + cmask = load_raster_from_url(l1c_cloud_path).to_numpy()[0] + # Open it and fill masked values as clouds + cprob = load_raster_from_url(cloudless_prob_path).to_masked_array()[0] + cmask[cprob.mask] = 1.0 + cprob = cprob.filled(1.0) + cprob_thr = cprob > min_prob + + return cmask, cprob_thr + + +def cloud_masks_for_time_window( + cloudless_files: List[str], mask_files: List[str], min_prob: float +) -> Tuple[List[NDArray[Any]], List[NDArray[Any]]]: + """ + Populate temporal window of cloud masks + """ + + cm1_arr: List[NDArray[Any]] = [] + cm2_arr: List[NDArray[Any]] = [] + for prob, mask in zip(cloudless_files, mask_files): + cm1, cm2 = load_cloud_masks(prob, mask, min_prob) + cm1_arr.append(cm1) + cm2_arr.append(cm2) + + return cm1_arr, cm2_arr + + +# This script should take as input only the cloud masks. +def clean_clouds_for_tile( + probs_files: List[str], + mask_files: List[str], + out_dir: str, + T: int, + min_prob: float, + min_area: int, + max_extra_cloud: int, + dilation: int, +) -> List[str]: + """ + USAGE: clean_clouds_for_tile(tile, start, end, save=True, T=10, min_prob=0.7, + min_area=400, max_extra_cloud=5) reads in all the cloud masks in the directory + and cleans it based on two rules. + 1. If in a time window of length 2*T+1 there are max_extra_cloud pixels that + became cloudy in the s2cloudless mask and were not in the built in cloud + mask, then we back off to the built in mask. + 2. We remove connected cloud components with less than min_area pixels and + fill in holes in clouds with less than min_area pixels. + Finally we take the union of these cloud pixels and the built in cloud mask and + write it to a file named cloud_mask_merged.ny. + """ + + # Window of cloud masks to process + window_start = 0 + window_end = 2 * T + 1 + + selected_probs_files = probs_files[window_start:window_end] + selected_mask_files = mask_files[window_start:window_end] + + cm1_arr, cm2_arr = cloud_masks_for_time_window( + selected_probs_files, selected_mask_files, min_prob + ) + + N = len(probs_files) + saved_masks: List[str] = [] + for i in range(N): + if i + T > window_end and window_end < N: + cm1_arr, cm2_arr = shift_arr( + probs_files, mask_files, T, window_end, cm1_arr, cm2_arr, min_prob + ) + gc.collect() + window_start += 1 + window_end += 1 + idx = i - window_start + new_mask = compute_mask_with_missing_clouds( + cm1_arr, cm2_arr, idx, max_extra_cloud, min_area, dilation + ) + saved_masks.append(fill_missing_pixels(mask_files[i], new_mask, out_dir)) + gc.collect() + + return saved_masks + + +def prepare_tile_data( + items: TileData, +) -> Tuple[List[str], List[str]]: + date_list: List[datetime.datetime] = [] + cloud_masks: List[str] = [] + cloud_probs: List[str] = [] + for mask, prob in items: + cloud_probs.append(prob.raster_asset.local_path) + cloud_masks.append(mask.raster_asset.local_path) + date_list.append(mask.time_range[0]) + + ind = np.argsort(cast(NDArray[Any], date_list)) + out_cloud_probs = [cloud_probs[i] for i in ind] + out_cloud_masks = [cloud_masks[i] for i in ind] + + return out_cloud_probs, out_cloud_masks + + +class CallbackBuilder: + def __init__( + self, + num_workers: int, + window_size: int, + cloud_prob_threshold: float, + min_area: int, + max_extra_cloud: int, + dilation: int, + ): + self.num_workers = num_workers + self.tmp_dir = TemporaryDirectory() + self.window_size = window_size + self.threshold = cloud_prob_threshold + self.min_area = min_area + self.max_extra_cloud = max_extra_cloud + self.dilation = dilation + + def __call__(self): + def compute_cloud_prob( + masks: List[Sentinel2CloudMask], + cloud_probabilities: List[Sentinel2CloudProbability], + ) -> Dict[str, List[Sentinel2CloudMask]]: + def process_single_tile(items: TileData) -> List[Sentinel2CloudMask]: + items = sorted(items, key=lambda x: x[0].time_range[0]) + probs_files, mask_files = prepare_tile_data(items) + + out_files = clean_clouds_for_tile( + probs_files, + mask_files, + self.tmp_dir.name, + T=self.window_size, + min_prob=self.threshold, + min_area=self.min_area, + max_extra_cloud=self.max_extra_cloud, + dilation=self.dilation, + ) + + # Generating output items + output_items: List[Sentinel2CloudMask] = [] + for ( + mask, + _, + ), new_asset in zip(items, out_files): + merged_cloud = AssetVibe( + reference=new_asset, type=mimetypes.types_map[".tif"], id=gen_guid() + ) + new_mask = Sentinel2CloudMask.clone_from(mask, gen_guid(), [merged_cloud]) + output_items.append(new_mask) + + return output_items + + # Grouping by tile_id + tile_dict: Dict[str, TileData] = {} + + for mask in masks: + tile_id = mask.tile_id + prob = find_s2_product(mask.product_name, cloud_probabilities) + if tile_id in tile_dict: + tile_dict[tile_id].append((mask, prob)) + else: + tile_dict[tile_id] = [(mask, prob)] + + results = [process_single_tile(tile) for tile in tile_dict.values()] + results = cast(List[List[Sentinel2CloudMask]], results) + + consolidated_result = [result for result in chain(*results)] + + return {"merged_cloud_masks": consolidated_result} + + return compute_cloud_prob + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/merge_cloud_masks/merge_cloud_masks.yaml b/ops/merge_cloud_masks/merge_cloud_masks.yaml new file mode 100644 index 00000000..359508b4 --- /dev/null +++ b/ops/merge_cloud_masks/merge_cloud_masks.yaml @@ -0,0 +1,23 @@ +name: merge_cloud_masks +inputs: + masks: List[Sentinel2CloudMask] + cloud_probabilities: List[Sentinel2CloudProbability] +output: + merged_cloud_masks: List[Sentinel2CloudMask] +parameters: + num_workers: 1 + window_size: 10 + cloud_prob_threshold: 0.5 + min_area: 400 + max_extra_cloud: 5 + dilation: 1 +entrypoint: + file: merge_cloud_masks.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - window_size + - cloud_prob_threshold + - min_area + - max_extra_cloud + - dilation diff --git a/ops/merge_cloud_masks/merge_cloud_masks_simple.py b/ops/merge_cloud_masks/merge_cloud_masks_simple.py new file mode 100644 index 00000000..6d01f25e --- /dev/null +++ b/ops/merge_cloud_masks/merge_cloud_masks_simple.py @@ -0,0 +1,125 @@ +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Tuple + +import numpy as np +import rasterio +from numpy.typing import NDArray +from skimage.measure import label, regionprops +from skimage.morphology import binary_dilation, disk + +from vibe_core.data import AssetVibe, Sentinel2CloudMask, Sentinel2CloudProbability, gen_guid +from vibe_lib.raster import INT_COMPRESSION_KWARGS + +TileData = List[Tuple[Sentinel2CloudMask, Sentinel2CloudProbability]] + + +def kill_labels(clabel: NDArray[Any], min_area: int) -> List[Any]: + """ + USAGE: kill_list = kill_labels(clabel, min_area) + Make a list of regions with area below min_area and return the list of regions. + """ + props = regionprops(clabel) + kill_list = [] + for p in props: + if p.area < min_area: + kill_list.append(p.label) + return kill_list + + +def remove_small_components(cmask: NDArray[Any], min_area: int): + """ + USAGE: new_mask = remove_small_components(cmask, min_area=400) + First removes small connected cloud components, then fill in small + connected holes in clouds to make for a smoother cloud mask. + """ + # Get cloud components + cloud_comp = label(cmask) + # Mark small components + kill_list = kill_labels(cloud_comp, min_area) # type: ignore + small_clouds = np.isin(cloud_comp, kill_list) # type: ignore + # Remove them + cmask[small_clouds] = False + + # Do the same for small components of clear sky + holes_comp = label(~cmask) + kill_list = kill_labels(holes_comp, min_area) # type: ignore + small_cloud_holes = np.isin(holes_comp, kill_list) # type: ignore + cmask[small_cloud_holes] = True + + return cmask + + +def merge_masks( + product_mask: Sentinel2CloudMask, + cloud_probability: Sentinel2CloudProbability, + shadow_probability: Sentinel2CloudProbability, + cloud_threshold: float, + shadow_threshold: float, + closing_size: int, + min_area: int, +) -> Tuple[NDArray[np.uint8], Dict[str, Any]]: + with rasterio.open(cloud_probability.raster_asset.url) as src: + meta = src.meta + cloud_p = src.read(1) > cloud_threshold + with rasterio.open(shadow_probability.raster_asset.url) as src: + shadow_p = src.read(1) > shadow_threshold + with rasterio.open(product_mask.raster_asset.url) as src: + cloud_m = src.read(1).astype(bool) + # Do the most conservative thing we can, and pick cloud if any model classifies as cloud/shadow + merged = cloud_p | shadow_p | cloud_m + # Remove small holes and keep a buffer + merged = binary_dilation(merged, disk(closing_size)).astype(np.uint8) + if min_area > 0: + merged = remove_small_components(merged, min_area) + meta["dtype"] = "uint8" + return merged[None], meta + + +class CallbackBuilder: + def __init__( + self, + cloud_prob_threshold: float, + shadow_prob_threshold: float, + closing_size: int, + min_area: int, + ): + self.tmp_dir = TemporaryDirectory() + self.cloud_threshold = cloud_prob_threshold + self.shadow_threshold = shadow_prob_threshold + self.closing_size = closing_size + self.min_area = min_area + + def __call__(self): + def compute_cloud_prob( + product_mask: Sentinel2CloudMask, + cloud_probability: Sentinel2CloudProbability, + shadow_probability: Sentinel2CloudProbability, + ) -> Dict[str, Sentinel2CloudMask]: + merged, meta = merge_masks( + product_mask, + cloud_probability, + shadow_probability, + self.cloud_threshold, + self.shadow_threshold, + self.closing_size, + self.min_area, + ) + id = gen_guid() + out_path = os.path.join(self.tmp_dir.name, f"{id}.tif") + with rasterio.open(out_path, "w", **meta, **INT_COMPRESSION_KWARGS) as dst: + dst.write(merged) + return { + "merged_cloud_mask": Sentinel2CloudMask.clone_from( + cloud_probability, + id=gen_guid(), + bands={"cloud": 0}, + categories=["Clear", "Cloud"], + assets=[AssetVibe(id=id, type="image/tiff", reference=out_path)], + ) + } + + return compute_cloud_prob + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/merge_cloud_masks/merge_cloud_masks_simple.yaml b/ops/merge_cloud_masks/merge_cloud_masks_simple.yaml new file mode 100644 index 00000000..ca9eeecb --- /dev/null +++ b/ops/merge_cloud_masks/merge_cloud_masks_simple.yaml @@ -0,0 +1,23 @@ +name: merge_cloud_masks_simple +inputs: + product_mask: Sentinel2CloudMask + cloud_probability: Sentinel2CloudProbability + shadow_probability: Sentinel2CloudProbability +output: + merged_cloud_mask: Sentinel2CloudMask +parameters: + cloud_prob_threshold: 0.3 + shadow_prob_threshold: 0.2 + closing_size: 5 + min_area: 0 +entrypoint: + file: merge_cloud_masks_simple.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - cloud_prob_threshold + - shadow_prob_threshold + - closing_size + - min_area +description: + short_description: Merges cloud, shadow and product cloud masks into a single mask. \ No newline at end of file diff --git a/ops/merge_geometries/merge_geometries.py b/ops/merge_geometries/merge_geometries.py new file mode 100644 index 00000000..09c88c29 --- /dev/null +++ b/ops/merge_geometries/merge_geometries.py @@ -0,0 +1,45 @@ +import hashlib +from enum import auto +from typing import Dict, List, TypeVar + +from shapely import geometry as shpg +from shapely import ops as shpo +from strenum import StrEnum + +from vibe_core.data import DataVibe + +T = TypeVar("T", bound=DataVibe) + + +class MergeMethod(StrEnum): + union = auto() + intersection = auto() + + +def callback_builder(method: str): + try: + merge_method = MergeMethod[method] + except KeyError: + avail_methods = ", ".join([i.name for i in MergeMethod]) + raise ValueError( + f"Invalid merge method parameter {method}. Available methods are {avail_methods}" + ) + + def callback(items: List[T]) -> Dict[str, T]: + item_type = type(items[0]) + + if merge_method == MergeMethod.union: + merge_geom = shpg.mapping(shpo.unary_union([shpg.shape(i.geometry) for i in items])) + else: + merge_geom = shpg.shape(items[0].geometry) + for i in items: + merge_geom = merge_geom.intersection(shpg.shape(i.geometry)) + merge_geom = shpg.mapping(merge_geom) + merge_id = hashlib.sha256( + "".join([f"merge geometries method={merge_method}"] + [i.id for i in items]).encode() + ).hexdigest() + return { + "merged": item_type.clone_from(items[0], id=merge_id, assets=[], geometry=merge_geom) + } + + return callback diff --git a/ops/merge_geometries/merge_geometries.yaml b/ops/merge_geometries/merge_geometries.yaml new file mode 100644 index 00000000..38035c29 --- /dev/null +++ b/ops/merge_geometries/merge_geometries.yaml @@ -0,0 +1,25 @@ +name: merge_geometries +inputs: + items: List[DataVibe] +output: + # merged: "@INHERIT(items)" + merged: DataVibe +entrypoint: + file: merge_geometries.py + callback_builder: callback_builder +parameters: + method: union +dependencies: + parameters: + - method +description: + short_description: Create item with merged geometry from item list. + long_description: + The op will merge the items' geometries according to the chosen method and copy all other + metadata from the first item in the list. + inputs: + items: Input items. + output: + merged: Item that contains the merged geometry. + parameters: + method: How to merge the geometry, available methods are 'union' and 'intersection'. diff --git a/ops/merge_geometries/test_merge_geometries.py b/ops/merge_geometries/test_merge_geometries.py new file mode 100644 index 00000000..38319436 --- /dev/null +++ b/ops/merge_geometries/test_merge_geometries.py @@ -0,0 +1,38 @@ +import os +from datetime import datetime + +from shapely import geometry as shpg + +from vibe_core.data import DataVibe +from vibe_dev.testing.op_tester import OpTester + +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "merge_geometries.yaml") + + +def test_op(): + geoms = [shpg.box(0, 0, 1, 1), shpg.box(0, 0, 2, 2)] + items = [ + DataVibe( + id=f"{i}", + geometry=shpg.mapping(g), + time_range=(datetime.now(), datetime.now()), + assets=[], + ) + for i, g in enumerate(geoms) + ] + op_tester = OpTester(CONFIG_PATH) + op_tester.update_parameters({"method": "union"}) + out = op_tester.run(items=items) # type: ignore + assert "merged" in out + out_vibe = out["merged"] + assert isinstance(out_vibe, DataVibe) + assert shpg.shape(out_vibe.geometry).equals(geoms[-1]) + assert out_vibe.time_range == items[0].time_range + + op_tester.update_parameters({"method": "intersection"}) + out = op_tester.run(items=items) # type: ignore + assert "merged" in out + out_vibe = out["merged"] + assert isinstance(out_vibe, DataVibe) + assert shpg.shape(out_vibe.geometry).equals(geoms[0]) + assert out_vibe.time_range == items[0].time_range diff --git a/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.py b/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.py new file mode 100644 index 00000000..2c37e06c --- /dev/null +++ b/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.py @@ -0,0 +1,18 @@ +import hashlib +from typing import Dict + +from vibe_core.data import DataVibe + + +def callback_builder(): + def callback(geometry: DataVibe, time_range: DataVibe) -> Dict[str, DataVibe]: + id = hashlib.sha256( + f"merge geometry and time range {geometry.id}{time_range.id}".encode() + ).hexdigest() + return { + "merged": DataVibe( + id=id, geometry=geometry.geometry, time_range=time_range.time_range, assets=[] + ) + } + + return callback diff --git a/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.yaml b/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.yaml new file mode 100644 index 00000000..0e282805 --- /dev/null +++ b/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.yaml @@ -0,0 +1,19 @@ +name: merge_geometry_and_time_range +inputs: + geometry: DataVibe + time_range: DataVibe +output: + merged: DataVibe +entrypoint: + file: merge_geometry_and_time_range.py + callback_builder: callback_builder +parameters: +description: + short_description: + Create item that contains the geometry from one item and the time range from another. + long_description: The op will create and assetless DataVibe with the copied information. + inputs: + geometry: Item from which the geometry will be copied. + time_range: Item from which the time range will be copied. + output: + merged: Item with geometry from one item and time range from another. diff --git a/ops/merge_geometry_and_time_range/test_merge_geometry_and_time_range.py b/ops/merge_geometry_and_time_range/test_merge_geometry_and_time_range.py new file mode 100644 index 00000000..d2551410 --- /dev/null +++ b/ops/merge_geometry_and_time_range/test_merge_geometry_and_time_range.py @@ -0,0 +1,40 @@ +import os +from datetime import datetime + +from shapely import geometry as shpg + +from vibe_core.data import DataVibe +from vibe_dev.testing.op_tester import OpTester + +CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "merge_geometry_and_time_range.yaml" +) + + +def test_op(): + vibe1 = DataVibe( + id="1", + geometry=shpg.mapping(shpg.box(0, 0, 1, 1)), + time_range=(datetime(2020, 1, 1), datetime(2020, 2, 2)), + assets=[], + ) + vibe2 = DataVibe( + id="2", + geometry=shpg.mapping(shpg.box(0, 0, 2, 2)), + time_range=(datetime(2021, 1, 1), datetime(2021, 2, 2)), + assets=[], + ) + op_tester = OpTester(CONFIG_PATH) + out = op_tester.run(geometry=vibe1, time_range=vibe2) + assert "merged" in out + out_vibe = out["merged"] + assert isinstance(out_vibe, DataVibe) + assert out_vibe.geometry == vibe1.geometry + assert out_vibe.time_range == vibe2.time_range + + out = op_tester.run(geometry=vibe2, time_range=vibe1) + assert "merged" in out + out_vibe = out["merged"] + assert isinstance(out_vibe, DataVibe) + assert out_vibe.geometry == vibe2.geometry + assert out_vibe.time_range == vibe1.time_range diff --git a/ops/merge_rasters/merge_rasters.py b/ops/merge_rasters/merge_rasters.py new file mode 100644 index 00000000..8ef352a0 --- /dev/null +++ b/ops/merge_rasters/merge_rasters.py @@ -0,0 +1,124 @@ +import logging +import mimetypes +import os +from collections import defaultdict +from tempfile import TemporaryDirectory +from typing import Any, Dict, Optional, Set, Tuple, cast + +import geopandas as gpd +import numpy as np +import rasterio +from rasterio.enums import Resampling +from rasterio.merge import merge +from shapely import geometry as shpg + +from vibe_core.data import AssetVibe, Raster, RasterSequence, gen_guid +from vibe_lib.raster import FLOAT_COMPRESSION_KWARGS, INT_COMPRESSION_KWARGS + +FIELDS = ("crs", "dtype", "count") +RESOLUTION_METHODS = { + "equal": None, + "average": lambda resolutions: tuple(np.mean(resolutions, axis=0)), + "lowest": lambda resolutions: tuple(np.min(resolutions, axis=0)), + "highest": lambda resolutions: tuple(np.max(resolutions, axis=0)), +} +LOGGER = logging.getLogger(__name__) + + +def get_resolution( + raster_sequence: RasterSequence, resolution_method: str +) -> Optional[Tuple[float, float]]: + resolutions = [] + for r in raster_sequence.get_ordered_assets(): + with rasterio.open(r.url) as src: + resolutions.append((src.res[0], src.res[1])) + + if resolution_method == "equal": + if len(set(resolutions)) > 1: + raise ValueError( + "Found multiple resolutions when merging RasterSequence, " + "but expected all resolutions to be equal." + ) + return None + elif resolution_method in ["average", "lowest", "highest"]: + if len(set(resolutions)) > 1: + LOGGER.warning( + "Found multiple resolutions when merging RasterSequence, " + f"using the {resolution_method} of {len(resolutions)} resolutions." + ) + return cast(Tuple[float, float], RESOLUTION_METHODS[resolution_method](resolutions)) + else: + raise ValueError( + f"Expected resolution method to be in {list(RESOLUTION_METHODS.keys())}. " + f"Found {resolution_method}." + ) + + +def merge_rasters( + raster_sequence: RasterSequence, output_dir: str, resampling: Resampling, resolution: str +) -> Dict[str, Raster]: + out_id = gen_guid() + file_path = os.path.join(output_dir, f"{out_id}.tif") + # All rasters should have the same CRS + assets_meta: Dict[str, Set[Any]] = defaultdict(set) + for r in raster_sequence.get_ordered_assets(): + with rasterio.open(r.url) as src: + for field in FIELDS: + assets_meta[field].add(src.meta[field]) + for field, field_set in assets_meta.items(): + if len(field_set) > 1: + raise ValueError( + f"Expected all rasters in RasterSequence to have the same '{field}', " + f"found {field_set}" + ) + crs = assets_meta["crs"].pop() + dtype = assets_meta["dtype"].pop() + + compression_kwargs = ( + INT_COMPRESSION_KWARGS if np.issubdtype(dtype, np.integer) else FLOAT_COMPRESSION_KWARGS + ) + if not (np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.floating)): + ValueError(f"Expected raster with int or float subtype, found {dtype}") + + bounds = tuple( + gpd.GeoSeries(shpg.shape(raster_sequence.geometry), crs="epsg:4326") + .to_crs(crs) + .bounds.iloc[0] + ) + + merge( + [i.url for i in raster_sequence.get_ordered_assets()], + bounds=bounds, + res=get_resolution(raster_sequence, resolution), + resampling=resampling, + dst_path=file_path, + dst_kwds=compression_kwargs, + ) + + if not os.path.exists(file_path): + raise FileNotFoundError(f"Merged raster not found in {file_path}.") + + asset = AssetVibe(reference=file_path, type=mimetypes.types_map[".tif"], id=out_id) + product = Raster.clone_from(raster_sequence, id=gen_guid(), assets=[asset]) + return {"raster": product} + + +class CallbackBuilder: + def __init__(self, resampling: str, resolution: str): + self.tmp_dir = TemporaryDirectory() + self.resampling = Resampling[resampling] + self.resolution = resolution + + def __call__(self): + def callback(raster_sequence: RasterSequence): + return merge_rasters( + raster_sequence, + output_dir=self.tmp_dir.name, + resampling=self.resampling, + resolution=self.resolution, + ) + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/merge_rasters/merge_rasters.yaml b/ops/merge_rasters/merge_rasters.yaml new file mode 100644 index 00000000..4b95cdbf --- /dev/null +++ b/ops/merge_rasters/merge_rasters.yaml @@ -0,0 +1,27 @@ +# Merge rasters in a sequence to a single raster +# All rasters in the sequence should have the same CRS and dtype +name: merge_rasters +inputs: + raster_sequence: RasterSequence +output: + raster: Raster +parameters: + resampling: bilinear + resolution: equal +entrypoint: + file: merge_rasters.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - resampling + - resolution +description: + short_description: Merges rasters in a sequence to a single raster. + parameters: + resampling: + Resampling method used to reproject the rasters to a common CRS. + resolution: >- + Determines how the resolution of the output raster is defined. One of 'equal' (breaks if the + resolution of the sequence rasters are not the same), 'lowest' (uses the lowest resolution + among rasters), 'highest' (uses the highest resolution among rasters), or 'average' (averages + the resolution of all rasters in the sequence). \ No newline at end of file diff --git a/ops/merge_sentinel1_orbits/merge_sentinel1.py b/ops/merge_sentinel1_orbits/merge_sentinel1.py new file mode 100644 index 00000000..bb350ef9 --- /dev/null +++ b/ops/merge_sentinel1_orbits/merge_sentinel1.py @@ -0,0 +1,79 @@ +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Tuple + +import geopandas as gpd +import rasterio +from rasterio.enums import Resampling +from rasterio.merge import merge +from rasterio.vrt import WarpedVRT +from shapely import geometry as shpg + +from vibe_core.data import AssetVibe, Sentinel1Raster, Sentinel1RasterOrbitGroup, gen_guid +from vibe_lib.raster import FLOAT_COMPRESSION_KWARGS, tile_to_utm + + +def merge_rasters( + filepaths: List[str], + bounds: Tuple[float, float, float, float], + resampling: Resampling, + out_path: str, + **kwargs: Any, +): + src = [] + vrt = [] + try: + src = [rasterio.open(i) for i in filepaths] + vrt = [WarpedVRT(i, **kwargs) for i in src] + dst_kwds = FLOAT_COMPRESSION_KWARGS + dst_kwds["driver"] = "GTiff" + dst_kwds.update({"blockxsize": 512, "blockysize": 512}) + return merge( + vrt, bounds=bounds, resampling=resampling, dst_path=out_path, dst_kwds=dst_kwds + ) + finally: + for i in src + vrt: + i.close() # type:ignore + + +def process_orbit( + orbit_group: Sentinel1RasterOrbitGroup, output_dir: str, resampling: Resampling +) -> Sentinel1Raster: + out_id = gen_guid() + filepath = os.path.join(output_dir, f"{out_id}.tif") + geom = orbit_group.geometry + tile_id = orbit_group.tile_id + crs = f"epsg:{tile_to_utm(tile_id)}" + bounds = tuple( + gpd.GeoSeries(shpg.shape(geom), crs="epsg:4326").to_crs(crs).bounds.round().iloc[0] + ) + merge_rasters( + [i.url for i in orbit_group.get_ordered_assets()], + bounds=bounds, + resampling=resampling, + out_path=filepath, + crs=crs, + ) + + asset = AssetVibe(reference=filepath, type="image/tiff", id=out_id) + product = Sentinel1Raster.clone_from(orbit_group, id=gen_guid(), assets=[asset]) + return product + + +class CallbackBuilder: + def __init__(self, resampling: str): + self.tmp_dir = TemporaryDirectory() + self.resampling = Resampling[resampling] + + def __call__(self): + def callback( + raster_group: Sentinel1RasterOrbitGroup, + ) -> Dict[str, Sentinel1Raster]: + return { + "merged_product": process_orbit(raster_group, self.tmp_dir.name, self.resampling) + } + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/merge_sentinel1_orbits/merge_sentinel1_orbits.yaml b/ops/merge_sentinel1_orbits/merge_sentinel1_orbits.yaml new file mode 100644 index 00000000..4641c48d --- /dev/null +++ b/ops/merge_sentinel1_orbits/merge_sentinel1_orbits.yaml @@ -0,0 +1,26 @@ +name: merge_sentinel1_orbits +inputs: + raster_group: Sentinel1RasterOrbitGroup +output: + merged_product: Sentinel1Raster +parameters: + resampling: bilinear +entrypoint: + file: merge_sentinel1.py + callback_builder: CallbackBuilder +description: + short_description: + Merge items from the same absolute orbit into the appropriate MGRS (Sentinel-2 tiling system) + tile. + long_description: + The op will merge the items by reprojecting the data, if necessary, to the appropriate CRS and + then merging them using the order of the assets in the input as priority. + inputs: + raster_group: Rasters from the same orbit that will be merged. + output: + merged: Raster containing merged data. + parameters: + resampling: + How to resample the input data. See + https://rasterio.readthedocs.io/en/latest/api/rasterio.enums.html#rasterio.enums.Resampling + for more information on available sampling methods. diff --git a/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.py b/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.py new file mode 100644 index 00000000..5e1194be --- /dev/null +++ b/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.py @@ -0,0 +1,72 @@ +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Dict, List, Union + +from rasterio.merge import merge + +from vibe_core.data import ( + AssetVibe, + Sentinel2CloudMask, + Sentinel2CloudMaskOrbitGroup, + Sentinel2Raster, + Sentinel2RasterOrbitGroup, + gen_guid, +) +from vibe_core.uri import uri_to_filename + + +def merge_rasters(path_list: List[str], dst_dir: str) -> str: + filename = uri_to_filename(path_list[0]) + dst_path = os.path.join(dst_dir, filename) + # Rasterio is merging by keeping the first pixel while GDAL was keeping the + # last. There seems to be no advantage to either, but the new behavior is + # different. + merge(path_list, dst_path=dst_path, dst_kwds={"zstd_level": 9, "predictor": 2}) + return dst_path + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def merge_orbits( + raster_group: Sentinel2RasterOrbitGroup, mask_group: Sentinel2CloudMaskOrbitGroup + ) -> Dict[str, Union[Sentinel2Raster, Sentinel2CloudMask]]: + raster_list = [a.url for a in raster_group.get_ordered_assets()] + mask_list = [a.url for a in mask_group.get_ordered_assets()] + + if len(raster_list) > 1: + merged_img = merge_rasters(raster_list, self.tmp_dir.name) + merged_cloud = merge_rasters(mask_list, self.tmp_dir.name) + + raster_asset = AssetVibe( + reference=merged_img, type=mimetypes.types_map[".tif"], id=gen_guid() + ) + mask_asset = AssetVibe( + reference=merged_cloud, type=mimetypes.types_map[".tif"], id=gen_guid() + ) + else: + raster_asset = raster_group.get_ordered_assets()[0] + mask_asset = mask_group.get_ordered_assets()[0] + + # Update item geometry + new_raster = Sentinel2Raster.clone_from( + raster_group, + id=gen_guid(), + assets=[raster_asset], + ) + + new_mask = Sentinel2CloudMask.clone_from( + mask_group, + id=gen_guid(), + assets=[mask_asset], + ) + + return {"output_raster": new_raster, "output_mask": new_mask} + + return merge_orbits + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.yaml b/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.yaml new file mode 100644 index 00000000..3716d66d --- /dev/null +++ b/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.yaml @@ -0,0 +1,13 @@ +name: merge_sentinel2_orbits +inputs: + raster_group: Sentinel2RasterOrbitGroup + mask_group: Sentinel2CloudMaskOrbitGroup +output: + output_raster: Sentinel2Raster + output_mask: Sentinel2CloudMask +parameters: +entrypoint: + file: merge_sentinel2_orbits.py + callback_builder: CallbackBuilder +description: + short_description: Combines raster files grouped by group_sentinel2_orbits into a single raster. \ No newline at end of file diff --git a/ops/minimum_samples/find_soil_sample_locations.py b/ops/minimum_samples/find_soil_sample_locations.py new file mode 100644 index 00000000..9eb75586 --- /dev/null +++ b/ops/minimum_samples/find_soil_sample_locations.py @@ -0,0 +1,124 @@ +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Tuple, cast + +import numpy as np +import rasterio +from geopandas import GeoDataFrame, GeoSeries, clip +from numpy._typing import NDArray +from rasterio.features import shapes, sieve +from rasterio.mask import mask +from shapely import geometry as shpg +from shapely.geometry import shape +from shapely.validation import make_valid +from sklearn.mixture import GaussianMixture + +from vibe_core.data import DataVibe, gen_hash_id +from vibe_core.data.core_types import AssetVibe, gen_guid +from vibe_core.data.rasters import Raster +from vibe_lib.archive import create_flat_archive + + +class CallbackBuilder: + def __init__(self, n_clusters: int, sieve_size: int): + self.temp_dir = [] + self.n_clusters = n_clusters + self.random_state = 45 + self.sieve_size = sieve_size + + def find_minimum_samples(self, raster: Raster, user_input: DataVibe) -> DataVibe: + self.geometry_mask = GeoSeries([shape(user_input.geometry)], crs="EPSG:4326") + # read input files + with rasterio.open(raster.raster_asset.url, "r") as r_obj: + p = self.geometry_mask.to_crs(r_obj.crs)[0] + ar, tr = mask(r_obj, [p], crop=True, nodata=0) + self.raster_crs = r_obj.crs + self.tr = tr + x = ar[0] + + asset_vibes = self.get_samples(x) + return DataVibe( + gen_hash_id("heatmap_nutrients", raster.geometry, raster.time_range), + raster.time_range, + raster.geometry, + asset_vibes, + ) + + def get_samples(self, x: NDArray[Any]) -> List[AssetVibe]: + model = self.train_model(x) + geo_clusters, geo_locations = self.inference(model=model, input=x) + asset_vibes = [] + asset_vibes.append(self.write_samples(geo_clusters, "geo_cluster_boundaries")) + asset_vibes.append(self.write_samples(geo_locations, "geo_sample_locations")) + return asset_vibes + + def train_model( + self, + input: NDArray[Any], + ): + x_ = input.reshape(-1, 1) + x_ = np.nan_to_num(x_) + model = GaussianMixture( + n_components=self.n_clusters, covariance_type="full", random_state=self.random_state + ) + model.fit(x_) + return model + + def inference( + self, + model: GaussianMixture, + input: NDArray[Any], + ) -> Tuple[GeoDataFrame, GeoDataFrame]: + # convert input to 2D array + x_ = input.reshape(-1, 1) + x_ = np.nan_to_num(x_) + + # predict clusters + d = model.predict(x_) + blocks = d.reshape(input.shape) + + # group small clusters + blocks = sieve(blocks.astype(np.uint8), self.sieve_size) + + # converting clusters generated to a GeoDataFrame + out = [] + for segment in range(self.n_clusters): + polygons = (blocks == segment).astype(np.uint8) + geoms = [ + make_valid(shpg.shape(s)) + for s, _ in shapes(polygons, mask=polygons, transform=self.tr) + ] + out.extend(geoms) + + if len(out) > 0: + # get lat lon of center of each polygon, the center will be inside the polygon + gdf = GeoDataFrame(data=out, columns=["geometry"], crs=self.raster_crs) # type: ignore + gdf = cast(GeoDataFrame, gdf.to_crs("EPSG:4326")) + gdf = cast(GeoDataFrame, clip(gdf, self.geometry_mask, keep_geom_type=True)) + + if gdf is not None and not gdf.empty: + gdf_locations = gdf.geometry.representative_point() + return (gdf, gdf_locations) + + raise RuntimeError("No samples found") + + def write_samples(self, geo_df: GeoDataFrame, geo_type: str) -> AssetVibe: + temp_d = TemporaryDirectory() + output_path = os.path.join(temp_d.name, f"minimum_samples_location_{geo_df.shape[0]}.shp") + geo_df.to_file(output_path) + self.temp_dir.append(temp_d) + + # Create zip archive containing all output + archive_path = create_flat_archive(temp_d.name, geo_type) + return AssetVibe(reference=archive_path, type="application/zip", id=gen_guid()) + + def __call__(self): + def find_minimum_samples_init(raster: Raster, user_input: DataVibe) -> Dict[str, DataVibe]: + out_vibe = self.find_minimum_samples(raster, user_input) + return {"locations": out_vibe} + + return find_minimum_samples_init + + def __del__(self): + for temp_d in self.temp_dir: + temp_d.cleanup() diff --git a/ops/minimum_samples/find_soil_sample_locations.yaml b/ops/minimum_samples/find_soil_sample_locations.yaml new file mode 100644 index 00000000..018c9beb --- /dev/null +++ b/ops/minimum_samples/find_soil_sample_locations.yaml @@ -0,0 +1,20 @@ +name: find_soil_samples +inputs: + raster: Raster + user_input: DataVibe +output: + locations: DataVibe +parameters: + n_clusters: 1 + sieve_size: 1 +entrypoint: + callback_builder: CallbackBuilder + file: find_soil_sample_locations.py +dependencies: + parameters: + - n_clusters + - sieve_size +description: + short_description: + Find minimum soil sample locations by grouping indices values that are derived from + satellite or spaceEye imagery bands. diff --git a/ops/minimum_samples/test_soil_sample_heatmap.py b/ops/minimum_samples/test_soil_sample_heatmap.py new file mode 100644 index 00000000..1e6f35fa --- /dev/null +++ b/ops/minimum_samples/test_soil_sample_heatmap.py @@ -0,0 +1,108 @@ +import os +import time +from datetime import datetime +from typing import Any, Dict, Union, cast + +import geopandas as gpd +import pytest +from shapely import geometry as shpg +from shapely.geometry import MultiPolygon, Polygon + +from vibe_core.client import FarmvibesAiClient, get_default_vibe_client +from vibe_core.data import DataVibe +from vibe_core.data.rasters import Raster +from vibe_dev.testing.op_tester import OpTester + +FAKE_TIME_RANGE = (datetime(2022, 6, 30), datetime(2022, 7, 2)) +CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "find_soil_sample_locations.yaml" +) + + +@pytest.fixture +def vibe_client(): + return get_default_vibe_client() + + +@pytest.fixture +def vibe_geometry_dict() -> Dict[str, Any]: + farm_boundary = "op_resources/nutrients/long_block_boundary.geojson" + data_frame = gpd.read_file(farm_boundary, crs="EPSG:32611").to_crs("EPSG:4326") # type: ignore + geometry = shpg.mapping(data_frame["geometry"][0]) # type: ignore + return geometry + + +@pytest.fixture +def vibe_geometry_shapely() -> Union[MultiPolygon, Polygon]: + farm_boundary = "op_resources/heatmap_sensor/sensor_farm_boundary.geojson" + data_frame = gpd.read_file(farm_boundary) + if not data_frame.empty: + geometry = data_frame["geometry"][0] # type: ignore + return cast(MultiPolygon, geometry) + + raise RuntimeError("Geometry is None") + + +@pytest.fixture +def download_sentinel_cluster( + vibe_client: FarmvibesAiClient, vibe_geometry_shapely: Union[MultiPolygon, Polygon] +) -> Raster: + run = vibe_client.run( + workflow="data_ingestion/sentinel2/preprocess_s2", + name="sentinel2_example", + geometry=vibe_geometry_shapely, + time_range=FAKE_TIME_RANGE, + ) + + while run is None or run.status == "running" or run.status == "pending": + continue + time.sleep(5) + if run.status == "done": + obj: Raster = run.output["raster"][0] # type: ignore + return obj + + raise RuntimeError("Download Raster request failed") + + +@pytest.fixture +def download_index_cluster( + vibe_client: FarmvibesAiClient, download_sentinel_cluster: Raster, index: str +) -> Raster: + parameters = {"index": index} + + run = vibe_client.run( + workflow="data_processing/index/index", + name="EVI_example", + input_data=download_sentinel_cluster, + parameters=parameters, + ) + + while run.status == "running" or run.status == "pending": + continue + time.sleep(5) + if run.status == "done": + obj: Raster = run.output["index_raster"][0] # type: ignore + return obj + + raise RuntimeError("Download Raster request failed") + + +@pytest.fixture +def data_vibe(vibe_geometry_dict: Dict[str, Any]): + id = str(hash("test_minimums_samples")) + return DataVibe(id, FAKE_TIME_RANGE, vibe_geometry_dict, []) + + +@pytest.mark.skip(reason="Dependent on the cluster") +@pytest.mark.parametrize("index", ["evi"]) +def test_minimum_samples(download_index_cluster: Raster, data_vibe: DataVibe): + op_ = OpTester(CONFIG_PATH) + parameters = { + "n_clusters": 5, + "sieve_size": 2, + } + op_.update_parameters(parameters) + output_data = op_.run(raster=download_index_cluster, user_input=data_vibe) + + # Get op result + assert "locations" in output_data diff --git a/ops/ordinal_trend_test/ordinal_trend_test.py b/ops/ordinal_trend_test/ordinal_trend_test.py new file mode 100644 index 00000000..fb15b94e --- /dev/null +++ b/ops/ordinal_trend_test/ordinal_trend_test.py @@ -0,0 +1,109 @@ +import os +from datetime import datetime as dt +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Tuple + +import numpy as np +import pandas as pd +from numpy._typing import NDArray +from scipy.stats import norm + +from vibe_core.data import AssetVibe, OrdinalTrendTest, RasterPixelCount, gen_guid + +NODATA = None +DATE_FORMAT = "%Y/%m/%d" + + +def cochran_armitage_trend_test(contingency_table: NDArray[Any]) -> Tuple[float, float]: + contingency_table = np.array(contingency_table) + + row_sums = np.sum(contingency_table, axis=1) + column_sums = np.sum(contingency_table, axis=0) + total = np.sum(row_sums) + + row_weights = np.arange(contingency_table.shape[0]) + column_weights = np.arange(contingency_table.shape[1]) + + # Expected value + col_inner = np.inner(column_weights, column_sums) + row_inner = np.inner(row_weights, row_sums) + expected = col_inner * row_inner / total + + # Statistics + statistic = np.inner(row_weights, np.inner(contingency_table, column_weights)) + + # Theorical background can be found here: + # https://real-statistics.com/chi-square-and-f-distributions/cochran-armitage-test/ + # https://doi.org/10.1002/0471249688.ch5 + variance_numerator = np.inner(row_weights**2, row_sums) - row_inner**2 / total + variance_numerator *= np.inner(column_weights**2, column_sums) - col_inner**2 / total + variance = variance_numerator / (total - 1) + + z_score = (statistic - expected) / np.sqrt(variance) + p_value = 2 * norm.cdf(-np.abs(z_score)) + + return float(p_value), float(z_score) + + +def load_contingency_table(pixel_counts: List[RasterPixelCount]) -> pd.DataFrame: + columns = [] + for pixel_count in pixel_counts: + columns.append(np.loadtxt(pixel_count.assets[0].path_or_url, delimiter=",", skiprows=1)) + + # Return the unique values for the existing pixels + unique_values = np.unique(np.concatenate(columns, axis=0)[:, 0]) + contingency_table = pd.DataFrame(index=unique_values) + + for pixel_count, column in zip(pixel_counts, columns): + contingency_table[pixel_count.id] = pd.Series(column[:, 1], index=column[:, 0]) + + return contingency_table.fillna(0) + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def callback(pixel_count: List[RasterPixelCount]) -> Dict[str, OrdinalTrendTest]: + if len(pixel_count) < 2: + raise ValueError("Ordinal trend test requires at least pixel count from 2 rasters.") + + # Order the pixel counts by using the first date in time_range + pixel_count = sorted(pixel_count, key=lambda x: x.time_range[0]) + + time_ranges = [ + f"{dt.strftime(r.time_range[0], DATE_FORMAT)}-" + f"{dt.strftime(r.time_range[1], DATE_FORMAT)}" + for r in pixel_count + ] + + # Calculate the min and max dates for the rasters + min_date = min([r.time_range[0] for r in pixel_count]) + max_date = max([r.time_range[1] for r in pixel_count]) + + contingency_table = load_contingency_table(pixel_count) + p_value, z_score = cochran_armitage_trend_test(contingency_table.values) + + contingency_table.index.name = "category" + contingency_table.columns = time_ranges # type: ignore + + guid = gen_guid() + filepath = os.path.join(self.tmp_dir.name, f"{guid}.csv") + contingency_table.to_csv(filepath) + + ordinal_trend_result = OrdinalTrendTest( + gen_guid(), + time_range=(min_date, max_date), + geometry=pixel_count[0].geometry, + assets=[AssetVibe(reference=filepath, type="text/csv", id=guid)], + p_value=p_value, + z_score=z_score, + ) + + return {"ordinal_trend_result": ordinal_trend_result} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/ordinal_trend_test/ordinal_trend_test.yaml b/ops/ordinal_trend_test/ordinal_trend_test.yaml new file mode 100644 index 00000000..abfded46 --- /dev/null +++ b/ops/ordinal_trend_test/ordinal_trend_test.yaml @@ -0,0 +1,24 @@ +name: ordinal_trend_test +inputs: + pixel_count: List[RasterPixelCount] +output: + ordinal_trend_result: OrdinalTrendTest +parameters: +entrypoint: + file: ordinal_trend_test.py + callback_builder: CallbackBuilder +description: + short_description: Detects increase/decrease trends over a list of Rasters. + long_description: + Performs a cochran-armitage trend test over a list of rasters. The test + determines if there is an increasing/decreasing trend in the pixel levels + over the list of rasters. For instance, if the ordinal raster represents the + presence of forest in a given area (e.g., 0 - Non-forest, 1- Forest, 2- + Dense Forest), the test will determine if the forest is increasing or + decreasing over the sequence of rasters. The null hypothesis is that there + is no trend in the pixel levels over the list of rasters. The alternative + hypothesis is that there is a trend in the pixel levels over the list of + rasters. The test returns a p-value and a z-score. If the p-value is less + than some significance level, the null hypothesis is rejected and the + alternative hypothesis is accepted. If the z-score is positive, the trend + is increasing. If the z-score is negative, the trend is decreasing. diff --git a/ops/ordinal_trend_test/test_ordinal_trend.py b/ops/ordinal_trend_test/test_ordinal_trend.py new file mode 100644 index 00000000..b00aba62 --- /dev/null +++ b/ops/ordinal_trend_test/test_ordinal_trend.py @@ -0,0 +1,98 @@ +import os +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Any, cast + +import numpy as np +import pytest +from numpy._typing import NDArray +from shapely import geometry as shpg + +from vibe_core.data import AssetVibe, OrdinalTrendTest, RasterPixelCount +from vibe_dev.testing.op_tester import OpTester + +SIGNIFICANCE_LEVEL = 0.05 +CONFIG_PATH = os.path.join(os.path.dirname(__file__), "ordinal_trend_test.yaml") +CSV_HEADER = "unique_values,counts" + + +@pytest.fixture +def tmp_dir(): + _tmp_dir = TemporaryDirectory() + yield _tmp_dir.name + _tmp_dir.cleanup() + + +def fake_raster_pixel_count( + tmp_dir: str, pixel_id: str, fake_stack_data: NDArray[Any] +) -> RasterPixelCount: + file_path = os.path.join(tmp_dir, f"{pixel_id}.csv") + time_range = (datetime(2023, 1, 1), datetime(2023, 12, 31)) + np.savetxt(file_path, fake_stack_data, delimiter=",", fmt="%d", comments="", header=CSV_HEADER) + + return RasterPixelCount( + id=pixel_id, + time_range=time_range, + geometry=shpg.mapping(shpg.box(0, 0, 0, 0)), + assets=[AssetVibe(reference=file_path, type="text/csv", id="fake_asset_id")], + ) + + +@pytest.fixture +def fake_pixel_count0(tmp_dir: str) -> RasterPixelCount: + stack_data = np.column_stack(([0, 1, 2], [3, 3, 3])) + return fake_raster_pixel_count(tmp_dir, "pixel_id_0", stack_data) + + +@pytest.fixture +def fake_pixel_count1(tmp_dir: str) -> RasterPixelCount: + stack_data = np.column_stack(([0, 1, 2], [3, 3, 3])) + return fake_raster_pixel_count(tmp_dir, "pixel_id_1", stack_data) + + +@pytest.fixture +def fake_pixel_count2(tmp_dir: str) -> RasterPixelCount: + stack_data = np.column_stack(([0, 1, 2], [0, 1, 8])) + return fake_raster_pixel_count(tmp_dir, "pixel_id_2", stack_data) + + +def test_ordinal_trend_no_change( + fake_pixel_count0: RasterPixelCount, fake_pixel_count1: RasterPixelCount +): + op = OpTester(CONFIG_PATH) + output = op.run(pixel_count=[fake_pixel_count0, fake_pixel_count1]) + assert output + assert "ordinal_trend_result" in output + + ordinal_trend_result = output["ordinal_trend_result"] + ordinal_trend_result = cast(OrdinalTrendTest, ordinal_trend_result) + assert ordinal_trend_result.p_value == 1 + assert ordinal_trend_result.z_score == 0 + + +def test_ordinal_trend_increase( + fake_pixel_count0: RasterPixelCount, fake_pixel_count2: RasterPixelCount +): + op = OpTester(CONFIG_PATH) + output = op.run(pixel_count=[fake_pixel_count0, fake_pixel_count2]) + assert output + assert "ordinal_trend_result" in output + + ordinal_trend_result = output["ordinal_trend_result"] + ordinal_trend_result = cast(OrdinalTrendTest, ordinal_trend_result) + assert ordinal_trend_result.p_value < SIGNIFICANCE_LEVEL + assert ordinal_trend_result.z_score > 0 + + +def test_ordinal_trend_decrease( + fake_pixel_count2: RasterPixelCount, fake_pixel_count0: RasterPixelCount +): + op = OpTester(CONFIG_PATH) + output = op.run(pixel_count=[fake_pixel_count2, fake_pixel_count0]) + assert output + assert "ordinal_trend_result" in output + + ordinal_trend_result = output["ordinal_trend_result"] + ordinal_trend_result = cast(OrdinalTrendTest, ordinal_trend_result) + assert ordinal_trend_result.p_value < SIGNIFICANCE_LEVEL + assert ordinal_trend_result.z_score < 0 diff --git a/ops/pair_intersecting_rasters/pair_intersecting_rasters.py b/ops/pair_intersecting_rasters/pair_intersecting_rasters.py new file mode 100644 index 00000000..8d6ed1e2 --- /dev/null +++ b/ops/pair_intersecting_rasters/pair_intersecting_rasters.py @@ -0,0 +1,27 @@ +from typing import Dict, List, Union + +from shapely import geometry as shpg + +from vibe_core.data import Raster + + +def callback( + rasters1: List[Raster], rasters2: List[Raster] +) -> Dict[str, Union[List[Raster], List[Raster]]]: + paired_rasters1 = [] + paired_rasters2 = [] + for r1 in rasters1: + geom_n = shpg.shape(r1.geometry) + for r2 in rasters2: + geom_d = shpg.shape(r2.geometry) + if geom_n.intersects(geom_d): + paired_rasters1.append(r1) + paired_rasters2.append(r2) + + if not paired_rasters1: + raise ValueError("No intersecting rasters could be paired") + return {"paired_rasters1": paired_rasters1, "paired_rasters2": paired_rasters2} + + +def callback_builder(): + return callback diff --git a/ops/pair_intersecting_rasters/pair_intersecting_rasters.yaml b/ops/pair_intersecting_rasters/pair_intersecting_rasters.yaml new file mode 100644 index 00000000..a5f94ec1 --- /dev/null +++ b/ops/pair_intersecting_rasters/pair_intersecting_rasters.yaml @@ -0,0 +1,13 @@ +name: pair_intersecting_rasters +inputs: + rasters1: List[Raster] + rasters2: List[Raster] +output: + paired_rasters1: "@INHERIT(rasters1)" + paired_rasters2: "@INHERIT(rasters2)" +parameters: +entrypoint: + file: pair_intersecting_rasters.py + callback_builder: callback_builder +description: + short_description: Creates pairs of rasters with intersecting geometries between two input lists of Raster. \ No newline at end of file diff --git a/ops/price_airbus_products/price_airbus.py b/ops/price_airbus_products/price_airbus.py new file mode 100644 index 00000000..af8396b2 --- /dev/null +++ b/ops/price_airbus_products/price_airbus.py @@ -0,0 +1,60 @@ +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Dict, List + +from shapely import geometry as shpg +from shapely.ops import unary_union + +from vibe_core.data import AirbusPrice, AirbusProduct, gen_guid +from vibe_lib.airbus import AirBusAPI, Constellation +from vibe_lib.geometry import norm_intersection + +AMOUNT_UNIT = "kB" + + +class CallbackBuilder: + def __init__(self, api_key: str, projected_crs: bool, iou_threshold: float): + self.api_key = api_key + self.projected_crs = projected_crs + self.iou_thr = iou_threshold + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def price_product(api: AirBusAPI, product: AirbusProduct) -> float: + geom = shpg.shape(product.geometry) + owned = api.query_owned(geom, product.acquisition_id) + owned = sorted( + owned, + key=lambda o: norm_intersection(geom, shpg.shape(o["geometry"])), + reverse=True, + ) + if ( + not owned + or norm_intersection(geom, shpg.shape(owned[0]["geometry"])) < self.iou_thr + ): + # We choose the envelope to avoid having images with a lot of nodata in the library + quote = api.get_price([product.extra_info["id"]], geom.envelope)["price"] + if quote["amountUnit"] != AMOUNT_UNIT: + raise ValueError(f"Expected amount in kB, got {quote['amountUnit']}") + return quote["amount"] + return 0 # We already have it so price is 0 + + def price_products( + airbus_products: List[AirbusProduct], + ) -> Dict[str, AirbusPrice]: + api = AirBusAPI(self.api_key, self.projected_crs, [c for c in Constellation]) + total_price = sum(price_product(api, p) for p in airbus_products) + print(total_price) + date = datetime.now() + geom = unary_union([shpg.shape(p.geometry) for p in airbus_products]) + return { + "products_price": AirbusPrice( + id=gen_guid(), + time_range=(date, date), + geometry=shpg.mapping(geom), + assets=[], + price=total_price, + ) + } + + return price_products diff --git a/ops/price_airbus_products/price_airbus_products.yaml b/ops/price_airbus_products/price_airbus_products.yaml new file mode 100644 index 00000000..74269a09 --- /dev/null +++ b/ops/price_airbus_products/price_airbus_products.yaml @@ -0,0 +1,16 @@ +name: price_airbus_products +inputs: + airbus_products: List[AirbusProduct] +output: + products_price: AirbusPrice +parameters: + api_key: "@SECRET(eywa-secrets, msr-airbus-api)" + projected_crs: true + iou_threshold: .95 +entrypoint: + file: price_airbus.py + callback_builder: CallbackBuilder +description: + short_description: + Calculates the aggregate price (in kB) for selected AirBus images, + discounting images already in the user's library. \ No newline at end of file diff --git a/ops/protlearn/protlearn.py b/ops/protlearn/protlearn.py new file mode 100644 index 00000000..2d33561e --- /dev/null +++ b/ops/protlearn/protlearn.py @@ -0,0 +1,225 @@ +import os +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Dict, List, Union, cast + +import pandas as pd +from protlearn.features import aaindex1 +from shapely import geometry as shpg + +from vibe_core.data import AssetVibe, FoodFeatures, FoodVibe, ProteinSequence, gen_guid + +PROTLEARN_FEAT_LIST: List[str] = [ + "JOND750102_2nd", + "GEOR030105_1st", + "JOND920102_2nd", + "HOPA770101_1st", + "WERD780102_2nd", + "FUKS010109_1st", +] + +NUTRITIONAL_INFORMATION: List[str] = [ + "Dietary Fiber", + "Magnesium", + "Potassium", + "Manganese", + "Zinc", + "Iron", + "Copper", + "Protein", + "TRP", + "THR", + "ILE", + "LEU", + "LYS", + "MET", + "CYS", + "PHE", + "TYR", + "VAL", + "ARG", + "HIS", +] + +PROTEIN_INFORMATION: List[str] = ["1st family", "2nd family", "3rd family", "Food group"] + +FOOD_GROUP_ID: Dict[str, int] = { + "Cereal & cereal products": 1, + "Roots & tubers": 2, + "Legumes & oilseeds": 3, + "Oil byproducts": 4, + "Fish & fish products": 5, + "Animal products": 6, + "Milk products": 7, + "Fruits & vegetable products": 8, + "Others": 9, + "Plant based ": 10, + "Mixed food (animal + cereal product)": 11, + "Mixed food (plant based)": 12, + "Mixed food (cereal + legume)": 13, + "Mixed food (cereal + animal product)": 14, +} + +PROTEIN_FAMILY_ID: Dict[str, int] = { + "": 0, + "GLOBULIN": 1, + "ALBUMIN": 2, + "ALBUMINS": 2, + "OVALBUMIN": 3, + "OVOTRANSFERRIN": 4, + "OVOMUCOID": 5, + "CASEIN": 6, + "GLYCININ": 7, + "CONGLYCININ": 8, + "GLUTELIN": 9, + "GLIADINS": 10, + "ZEIN": 11, + "PROLAMIN": 12, + "MYOSIN": 13, + "MYOGLOBIN": 14, + "PATATIN": 15, + "LECTIN": 16, + "LEGUMIN": 17, + "OTHER": 18, +} + + +def encode_str(id_dict: Dict[str, int], val: Union[str, str]): + if not val.strip(): + return 0 + + try: + encoded_id = id_dict[val] + except KeyError: + encoded_id = 18 + + return encoded_id + + +def filter_protlearn_shap(protlearn_feats: pd.DataFrame): + return protlearn_feats.filter(PROTLEARN_FEAT_LIST) + + +def extracting_protlearn(aminoacids1: str, aminoacids2: str, aminoacids3: str): + """ + Reads in the aminoacid sequences from the fasta files + Returns a dataframe with the Aaindex features obtained using protlearn package + """ + aminoacids1 = aminoacids1[aminoacids1.rindex(" ") + 1 :] + + aaind1, inds1 = aaindex1(aminoacids1, standardize="zscore") # type: ignore + first = pd.DataFrame(aaind1, columns=inds1) # type: ignore + first = first.add_suffix("_1st") + aminoacids2 = aminoacids2[aminoacids2.rindex(" ") + 1 :] + + try: + aaind2, inds2 = aaindex1(aminoacids2, standardize="zscore") # type: ignore + except ValueError: + aaind2 = 0 + second = pd.DataFrame(aaind2, index=range(1), columns=inds1) # type: ignore + second = second.add_suffix("_2nd") + aminoacids3 = aminoacids3[aminoacids3.rindex(" ") + 1 :] + + try: + aaind3, indes3 = aaindex1(aminoacids3, standardize="zscore") # type: ignore + except ValueError: + aaind3 = 0 + third = pd.DataFrame(aaind3, index=range(1), columns=inds1) # type: ignore + third = third.add_suffix("_3rd") + aaindex_feats = pd.concat([first, second, third], axis=1) + return aaindex_feats + + +def read_protein(protein_df: pd.DataFrame): + protein_list = protein_df["protein_list"] + assert protein_list is not None, "Protein list column is missing" + + fasta_sequence0 = str(protein_list[0]) + + try: + fasta_sequence1 = str(protein_list[1]) + except KeyError: + fasta_sequence1 = " " + + try: + fasta_sequence2 = str(protein_list[2]) + except KeyError: + fasta_sequence2 = " " + + return fasta_sequence0, fasta_sequence1, fasta_sequence2 + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def protlearn_callback( + food_item: FoodVibe, protein_sequence: ProteinSequence + ) -> Dict[str, FoodFeatures]: + protein_df = cast( + pd.DataFrame, pd.read_csv(protein_sequence.assets[0].path_or_url, index_col=0) + ).reset_index() + + fasta_sequence0, fasta_sequence1, fasta_sequence2 = read_protein(protein_df) + + aaindex_feats = extracting_protlearn( + fasta_sequence0, + fasta_sequence1, + fasta_sequence2, + ) + + nutritional_data = [ + food_item.dietary_fiber, + food_item.magnesium, + food_item.potassium, + food_item.manganese, + food_item.zinc, + food_item.iron, + food_item.copper, + food_item.protein, + food_item.trp, + food_item.thr, + food_item.ile, + food_item.leu, + food_item.lys, + food_item.met, + food_item.cys, + food_item.phe, + food_item.tyr, + food_item.val, + food_item.arg, + food_item.his, + ] + + protein_family_food_type = [ + encode_str(PROTEIN_FAMILY_ID, food_item.protein_families[0]), + encode_str(PROTEIN_FAMILY_ID, food_item.protein_families[1]), + encode_str(PROTEIN_FAMILY_ID, food_item.protein_families[2]), + encode_str(FOOD_GROUP_ID, food_item.food_group), + ] + + nutritional_data_df = pd.DataFrame(nutritional_data, index=NUTRITIONAL_INFORMATION) + protein_family_df = pd.DataFrame(protein_family_food_type, index=PROTEIN_INFORMATION) + + protlearn_df = filter_protlearn_shap(aaindex_feats) + + df = pd.concat([nutritional_data_df.T, protlearn_df, protein_family_df.T], axis=1) + + guid = gen_guid() + filepath = os.path.join(self.tmp_dir.name, f"{guid}.csv") + df.to_csv(filepath, index=False) + + food_features = FoodFeatures( + gen_guid(), + time_range=(datetime.now(), datetime.now()), # these are just placeholders + geometry=shpg.mapping(shpg.Point(0, 0)), # this location is a placeholder + assets=[AssetVibe(reference=filepath, type="text/csv", id=guid)], + ) + + return {"food_features": food_features} + + return protlearn_callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/protlearn/protlearn.yaml b/ops/protlearn/protlearn.yaml new file mode 100644 index 00000000..7979a200 --- /dev/null +++ b/ops/protlearn/protlearn.yaml @@ -0,0 +1,10 @@ +name: protlearn +inputs: + food_item: FoodVibe + protein_sequence: ProteinSequence +output: + food_features: FoodFeatures +parameters: +entrypoint: + file: protlearn.py + callback_builder: CallbackBuilder diff --git a/ops/read_grib_forecast/read_grib_forecast.py b/ops/read_grib_forecast/read_grib_forecast.py new file mode 100644 index 00000000..46e4ff1c --- /dev/null +++ b/ops/read_grib_forecast/read_grib_forecast.py @@ -0,0 +1,78 @@ +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Dict, List + +import xarray as xr +from shapely import geometry as shpg + +from vibe_core.data import AssetVibe, DataVibe, GfsForecast, gen_forecast_time_hash_id, gen_guid + + +def parse_grib_file(grib_file: str, lat: float, lon: float, output_dir: str) -> AssetVibe: + """Extracts the local data from a global forecast. + + Args: + grib_file: the path to the grib file for the given time of interest + lat: the latitude of the forecast [-90, 90] + lon: the longitude of the forecast [-180, 180] + output_dir: directory in which to save csv data for this forecast + + Returns: + VibeAsset containging the forecast for the time and location specified + """ + # GFS stores longitude in a range from 0-360 + # compute unsigned value from [-180,180] scale + gfs_lon = (lon + 360) % 360 + + keys = {"typeOfLevel": "surface"} + if not grib_file.endswith("f000.grib"): + keys["stepType"] = "instant" + + ds = xr.load_dataset(grib_file, engine="cfgrib", filter_by_keys=keys) + forecast = ds.sel(latitude=lat, longitude=gfs_lon, method="nearest") + + data_file = "{file}_{lat}_{lon}.csv".format(file=grib_file[:-5], lat=lat, lon=lon) + + file_path = os.path.join(output_dir, data_file) + + with open(file_path, "w") as forecast_file: + forecast_file.write(forecast.to_pandas().to_csv()) # type: ignore + + return AssetVibe(reference=file_path, type=mimetypes.types_map[".csv"], id=gen_guid()) + + +class CallbackBuilder: + def __init__(self): + self.temp_dir = TemporaryDirectory() + + def __call__(self): + def read_forecast( + location: List[DataVibe], global_forecast: List[GfsForecast] + ) -> Dict[str, List[GfsForecast]]: + loc = location[0] + forecast_data = global_forecast[0] + # wkt format is (lon, lat) + lon, lat = shpg.shape(loc.geometry).centroid.coords[0] + grib_file = forecast_data.assets[0].local_path + forecast_asset = parse_grib_file( + grib_file=grib_file, lat=lat, lon=lon, output_dir=self.temp_dir.name + ) + + local_forecast = GfsForecast( + id=gen_forecast_time_hash_id( + "local_forecast", loc.geometry, forecast_data.publish_time, loc.time_range + ), + geometry=loc.geometry, + time_range=loc.time_range, + assets=[forecast_asset], + publish_time=forecast_data.publish_time, + ) + + output = {"local_forecast": [local_forecast]} + return output + + return read_forecast + + def __del__(self): + self.temp_dir.cleanup() diff --git a/ops/read_grib_forecast/read_grib_forecast.yaml b/ops/read_grib_forecast/read_grib_forecast.yaml new file mode 100644 index 00000000..00d80260 --- /dev/null +++ b/ops/read_grib_forecast/read_grib_forecast.yaml @@ -0,0 +1,12 @@ +name: read_forecast +inputs: + location: List[DataVibe] + global_forecast: List[GfsForecast] +output: + local_forecast: List[GfsForecast] +parameters: +entrypoint: + callback_builder: CallbackBuilder + file: read_grib_forecast.py +description: + short_description: Extracts the local data from a global forecast. \ No newline at end of file diff --git a/ops/recode_raster/recode_raster.py b/ops/recode_raster/recode_raster.py new file mode 100644 index 00000000..ae487a6f --- /dev/null +++ b/ops/recode_raster/recode_raster.py @@ -0,0 +1,37 @@ +from tempfile import TemporaryDirectory +from typing import Dict, List + +import numpy as np + +from vibe_core.data import Raster +from vibe_lib.raster import load_raster, save_raster_from_ref + + +class CallbackBuilder: + def __init__(self, from_values: List[float], to_values: List[float]): + self.tmp_dir = TemporaryDirectory() + + if len(from_values) != len(to_values): + raise ValueError( + f"'from_values' and 'to_values' must have the same length. " + f"Got {len(from_values)} and {len(to_values)}, respectively." + ) + + self.recode_map = dict(zip(from_values, to_values)) + + def __call__(self): + def callback(raster: Raster) -> Dict[str, Raster]: + data_ar = load_raster(raster) + + # Return the same pixel value if it is not in the recode map + transformed_ar = data_ar.copy( + data=np.vectorize(lambda x: self.recode_map.get(x, x))(data_ar) + ) + transformed_raster = save_raster_from_ref(transformed_ar, self.tmp_dir.name, raster) + + return {"recoded_raster": transformed_raster} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/recode_raster/recode_raster.yaml b/ops/recode_raster/recode_raster.yaml new file mode 100644 index 00000000..b1ff2b78 --- /dev/null +++ b/ops/recode_raster/recode_raster.yaml @@ -0,0 +1,30 @@ +name: recode_raster +inputs: + raster: Raster +output: + recoded_raster: Raster +parameters: + from_values: + to_values: +entrypoint: + file: recode_raster.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - from_values + - to_values +description: + short_description: Recodes values of the input raster. + long_description: Receives a raster and two lists of values to recode the raster values. The first + list contains the values to recode from and the second list contains the values to recode to. + The lists must have the same length. For example, if the input raster has values (1, 2, 3, 11, 12) + and the from_values list is [1, 2, 3] and the to_values list is [3, 4, 5], the output raster + will have values (3, 4, 5, 11, 12). Observe the raster values that are not in the from_values list + remain unchanged. + sources: + raster: Raster to recode. + sinks: + recoded: Recoded raster. + parameters: + from_values: List of values to recode from. + to_values: List of values to recode to. \ No newline at end of file diff --git a/ops/recode_raster/test_recode_raster.py b/ops/recode_raster/test_recode_raster.py new file mode 100644 index 00000000..9e59beba --- /dev/null +++ b/ops/recode_raster/test_recode_raster.py @@ -0,0 +1,105 @@ +import os +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import cast + +import numpy as np +import pytest +import xarray as xr +from shapely import geometry as shpg + +from vibe_core.data import Raster +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.raster import load_raster, save_raster_to_asset + +CONFIG_PATH = os.path.join(os.path.dirname(__file__), "recode_raster.yaml") + + +@pytest.fixture +def tmp_dir(): + _tmp_dir = TemporaryDirectory() + yield _tmp_dir.name + _tmp_dir.cleanup() + + +@pytest.fixture +def fake_raster(tmp_dir: str): + nbands = 3 + x = 128 + y = 128 + + fake_data = np.random.randint(0, 4, size=(nbands, y, x)).astype(np.float32) + fake_da = xr.DataArray( + fake_data, + coords={"bands": np.arange(nbands), "x": np.linspace(0, 1, x), "y": np.linspace(0, 1, y)}, + dims=["bands", "y", "x"], + ) + fake_da.rio.write_crs("epsg:4326", inplace=True) + + asset = save_raster_to_asset(fake_da, tmp_dir) + return Raster( + id="fake_id", + time_range=(datetime(2023, 1, 1), datetime(2023, 1, 1)), + geometry=shpg.mapping(shpg.box(*fake_da.rio.bounds())), + assets=[asset], + bands={j: i for i, j in enumerate(["B1", "B2", "B3"])}, + ) + + +def test_recode_raster(fake_raster: Raster): + op = OpTester(CONFIG_PATH) + parameters = { + "from_values": [0, 1, 2, 3], + "to_values": [4, 5, 6, 7], + } + + op.update_parameters(parameters) + output = op.run(raster=fake_raster) + assert output + + raster = cast(Raster, output["recoded_raster"]) + raster_data = load_raster(raster) + fake_raster_data = load_raster(fake_raster) + + # Assert that the recoded raster has the same shape as the original + assert raster_data.shape == fake_raster_data.shape + # Assert fake_raster_data - raster values is always 4 + assert np.all(raster_data - fake_raster_data == 4) + + +def test_recode_not_mapped_values(fake_raster: Raster): + op = OpTester(CONFIG_PATH) + + parameters = { + "from_values": [10, 11, 12, 13], + "to_values": [-1, -2, -3, -4], + } + + op.update_parameters(parameters) + output = op.run(raster=fake_raster) + assert output + + raster = cast(Raster, output["recoded_raster"]) + raster_data = load_raster(raster) + fake_raster_data = load_raster(fake_raster) + + # Assert that the recoded raster has the same shape as the original + assert raster_data.shape == fake_raster_data.shape + + # Assert fake_raster_data and raster_data are the same + assert np.all(raster_data == fake_raster_data) + + # Assert raster_data has no negative values + assert np.all(raster_data >= 0) + + +def test_recode_raster_different_lengths(fake_raster: Raster): + op = OpTester(CONFIG_PATH) + parameters = { + "from_values": [0, 1, 2], + "to_values": [4, 5, 6, 7], + } + + op.update_parameters(parameters) + with pytest.raises(ValueError): + op.run(raster=fake_raster) diff --git a/ops/remove_clouds/remove_clouds.py b/ops/remove_clouds/remove_clouds.py new file mode 100644 index 00000000..88aa6a08 --- /dev/null +++ b/ops/remove_clouds/remove_clouds.py @@ -0,0 +1,279 @@ +# pyright: reportUnknownMemberType=false +import logging +import os +from abc import abstractmethod +from datetime import datetime, timedelta +from tempfile import TemporaryDirectory +from typing import Dict, Optional, Union, cast + +import geopandas as gpd +import numpy as np +import onnxruntime as ort +import torch +import torch.nn as nn +from shapely import geometry as shpg +from torch.utils.data import DataLoader + +from vibe_core.data import AssetVibe, gen_guid +from vibe_core.data.sentinel import ( + S2ProcessingLevel, + Sentinel1RasterTileSequence, + Sentinel2CloudMaskTileSequence, + Sentinel2RasterTileSequence, + SpaceEyeRasterSequence, +) +from vibe_lib.raster import INT_COMPRESSION_KWARGS, compress_raster, write_window_to_file +from vibe_lib.spaceeye.dataset import Dims, SpaceEyeReader +from vibe_lib.spaceeye.illumination import add_illuminance +from vibe_lib.spaceeye.interpolation import DampedInterpolation +from vibe_lib.spaceeye.utils import QUANTIFICATION_VALUE, SPACEEYE_TO_SPYNDEX_BAND_NAMES + +S1_NUM_BANDS = 2 +S2_NUM_BANDS = 10 +L1C_BAND_INDICES = [1, 2, 3, 4, 5, 6, 7, 8, 11, 12] +L2A_BAND_INDICES = [1, 2, 3, 4, 5, 6, 7, 8, 10, 11] +FILENAME_TEMPLATE = "preds_{}.tif" + +LOGGER = logging.getLogger(__name__) + + +def get_filename(date: datetime) -> str: + return FILENAME_TEMPLATE.format(date.strftime("%Y%m%d")) + + +def remove_clouds( + model: Union[ort.InferenceSession, nn.Module], + dataset: SpaceEyeReader, + out_dir: str, + num_workers: int, +) -> SpaceEyeRasterSequence: + # TODO: Add meta to write_info dict + meta = { + "driver": "GTiff", + "height": dataset.height, + "width": dataset.width, + "count": S2_NUM_BANDS, + "crs": dataset.crs, + "dtype": "uint16", + "transform": dataset.transform, + "nodata": 0, + } + # Use batch size 1 + dataloader = DataLoader(dataset, collate_fn=lambda x: x, num_workers=num_workers) + total_chips = len(dataloader) + start_datetime = dataset.time_range[0] + for chip_idx, batch in enumerate(dataloader): + chip_data, write_info = batch[0] + t1, t2 = ( + (start_datetime + timedelta(days=t)).strftime("%Y-%m-%d") + for t in write_info["write_times"] + ) + write_window = write_info["write_window"] + (r1, r2), (c1, c2) = write_window.toranges() + LOGGER.info( + f"Running model for {t1}:{t2}, extent {r1}:{r2}, {c1}:{c2} " + f"({chip_idx + 1}/{total_chips})" + ) + inputs = {k: v[None] for k, v in chip_data.items() if k != "illuminance"} + with torch.inference_mode(): + if isinstance(model, nn.Module): + inputs = {k: torch.from_numpy(v) for k, v in inputs.items()} + s2 = cast(nn.Module, model)(inputs).numpy() + else: + s2 = cast(ort.InferenceSession, model).run(None, inputs)[0] + s2 = s2[0, :] + # Put illumination back + s2 = (add_illuminance(s2, chip_data["illuminance"]) * QUANTIFICATION_VALUE).astype( + np.uint16 + ) + chip_times, chip_rows, chip_cols = write_info["chip_slices"] + for write_t, chip_t in zip(range(*write_info["write_times"]), range(*chip_times)): + date = start_datetime + timedelta(days=write_t) + filename = get_filename(date) + filepath = os.path.join(out_dir, filename) + write_window_to_file( + s2[:, chip_t, slice(*chip_rows), slice(*chip_cols)], + None, + write_window, + filepath, + meta, + ) + + # Create a SpaceEyeRasterSequence with the sequence metadata + ref_sequence = dataset.s2_items + geom = shpg.mapping(gpd.GeoSeries(dataset.roi, crs=dataset.crs).to_crs("epsg:4326").iloc[0]) + spaceeye_sequence = SpaceEyeRasterSequence.clone_from( + ref_sequence, + assets=[], + id=gen_guid(), + geometry=geom, + time_range=dataset.time_range, + bands={name: idx for idx, name in enumerate(SPACEEYE_TO_SPYNDEX_BAND_NAMES.values())}, + ) + + geom = shpg.shape(geom) + + # Add each raster asset to the sequence + for time_idx in range(dataset.time_length): + date = start_datetime + timedelta(days=time_idx) + filename = get_filename(date) + filepath = os.path.join(out_dir, filename) + # Skip file if no predictions were made (not enough data) + if not os.path.exists(filepath): + continue + guid = gen_guid() + out_path = os.path.join(out_dir, f"{guid}.tif") + LOGGER.info(f"Compressing raster for {date.strftime('%Y-%m-%d')}") + compress_raster(filepath, out_path, **INT_COMPRESSION_KWARGS) + asset = AssetVibe(reference=out_path, type="image/tiff", id=guid) + spaceeye_sequence.add_asset(asset, (date, date), geom) + + return spaceeye_sequence + + +class CallbackBuilder: + def __init__( + self, + duration: int, + window_size: int, + spatial_overlap: float, + min_clear_ratio: float, + normalize_illuminance: bool, + num_workers: int, + ): + self.duration = duration + self.window_size = window_size + self.spatial_overlap = spatial_overlap + self.min_clear_ratio = min_clear_ratio + self.normalize_illuminance = normalize_illuminance + self.num_workers = num_workers + self.tmp_dir = TemporaryDirectory() + + def get_dataset( + self, + s1_products: Optional[Sentinel1RasterTileSequence], + s2_products: Sentinel2RasterTileSequence, + cloud_masks: Sentinel2CloudMaskTileSequence, + ) -> SpaceEyeReader: + s2_bands = ( + L1C_BAND_INDICES + if s2_products.processing_level == S2ProcessingLevel.L1C + else L2A_BAND_INDICES + ) + sequence_geom = shpg.shape(s2_products.geometry) + sequence_time_range = s2_products.time_range + dataset = SpaceEyeReader( + s1_items=s1_products, + s2_items=s2_products, + cloud_masks=cloud_masks, + time_range=sequence_time_range, + geometry=sequence_geom, + chip_size=Dims(width=self.window_size, height=self.window_size, time=self.duration), + overlap=(self.spatial_overlap, self.spatial_overlap, 0), + s2_bands=s2_bands, + min_clear_ratio=self.min_clear_ratio, + normalize_illuminance=self.normalize_illuminance, + ) + return dataset + + @abstractmethod + def get_model(self) -> Union[ort.InferenceSession, nn.Module]: + raise NotImplementedError + + def __call__(self): + def callback( + s2_products: Sentinel2RasterTileSequence, + cloud_masks: Sentinel2CloudMaskTileSequence, + s1_products: Optional[Sentinel1RasterTileSequence] = None, + ) -> Dict[str, SpaceEyeRasterSequence]: + if not s2_products.assets or (s1_products is not None and not s1_products.assets): + s1_str = ( + "" if s1_products is None else f"Sentinel-1: {len(s1_products.assets)} assets" + ) + LOGGER.warning( + "Received empty input sequence, output will be empty sequence. " + f"Sentinel-2: {len(s2_products.assets)} assets, {s1_str}" + ) + spaceeye_sequence = SpaceEyeRasterSequence.clone_from( + s2_products, + assets=[], + id=gen_guid(), + bands={ + name: idx + for idx, name in enumerate(SPACEEYE_TO_SPYNDEX_BAND_NAMES.values()) + }, + ) + return {"spaceeye_sequence": spaceeye_sequence} + model = self.get_model() + dataset = self.get_dataset(s1_products, s2_products, cloud_masks) + spaceeye_sequence = remove_clouds(model, dataset, self.tmp_dir.name, self.num_workers) + + return {"spaceeye_sequence": spaceeye_sequence} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() + + +class NNCallbackBuilder(CallbackBuilder): + def __init__( + self, + model_path: str, + duration: int, + window_size: int, + spatial_overlap: float, + min_clear_ratio: float, + normalize_illuminance: bool, + num_workers: int, + ): + super().__init__( + duration, + window_size, + spatial_overlap, + min_clear_ratio, + normalize_illuminance, + num_workers, + ) + self.model_path = model_path + + def get_model(self) -> ort.InferenceSession: + return ort.InferenceSession(self.model_path) + + +class InterpolationCallbackBuilder(CallbackBuilder): + def __init__( + self, + duration: int, + window_size: int, + spatial_overlap: float, + min_clear_ratio: float, + normalize_illuminance: bool, + num_workers: int, + damping_factor: float, + tolerance: float, + max_iterations: int, + check_interval: int, + ): + super().__init__( + duration, + window_size, + spatial_overlap, + min_clear_ratio, + normalize_illuminance, + num_workers, + ) + self.damping_factor = damping_factor + self.tol = tolerance + self.max_iter = max_iterations + self.check_interval = check_interval + + def get_model(self): + return DampedInterpolation( + S2_NUM_BANDS, + self.duration, + damping_factor=self.damping_factor, + tol=self.tol, + max_iter=self.max_iter, + check_interval=self.check_interval, + ) diff --git a/ops/remove_clouds/remove_clouds.yaml b/ops/remove_clouds/remove_clouds.yaml new file mode 100644 index 00000000..2e0bf717 --- /dev/null +++ b/ops/remove_clouds/remove_clouds.yaml @@ -0,0 +1,28 @@ +name: remove_clouds +inputs: + s1_products: Sentinel1RasterTileSequence + s2_products: Sentinel2RasterTileSequence + cloud_masks: Sentinel2CloudMaskTileSequence +output: + spaceeye_sequence: SpaceEyeRasterSequence +parameters: + model_path: /opt/terravibes/ops/resources/spaceeye_models/spaceeye.onnx + duration: 48 + window_size: 448 + spatial_overlap: 0.5 + min_clear_ratio: 0.1 + normalize_illuminance: True + num_workers: 0 +entrypoint: + file: remove_clouds.py + callback_builder: NNCallbackBuilder +dependencies: + parameters: + - duration + - window_size + - spatial_overlap + - min_clear_ratio + - normalize_illuminance +description: + short_description: + Runs SpaceEye to remove clouds in input rasters. \ No newline at end of file diff --git a/ops/remove_clouds/remove_clouds_interpolation.yaml b/ops/remove_clouds/remove_clouds_interpolation.yaml new file mode 100644 index 00000000..4acbf60b --- /dev/null +++ b/ops/remove_clouds/remove_clouds_interpolation.yaml @@ -0,0 +1,34 @@ +name: remove_clouds_interpolation +inputs: + s2_products: Sentinel2RasterTileSequence + cloud_masks: Sentinel2CloudMaskTileSequence +output: + spaceeye_sequence: SpaceEyeRasterSequence +parameters: + duration: 48 + window_size: 448 + spatial_overlap: 0.5 + min_clear_ratio: 0.1 + normalize_illuminance: True + num_workers: 0 + damping_factor: 0.1 + tolerance: .001 + max_iterations: 200 + check_interval: 5 +entrypoint: + file: remove_clouds.py + callback_builder: InterpolationCallbackBuilder +dependencies: + parameters: + - duration + - window_size + - spatial_overlap + - min_clear_ratio + - normalize_illuminance + - damping_factor + - tolerance + - max_iterations + - check_interval +description: + short_description: + Runs the interpolation version of SpaceEye to remove clouds in input rasters. \ No newline at end of file diff --git a/ops/remove_clouds/test_remove_clouds.py b/ops/remove_clouds/test_remove_clouds.py new file mode 100644 index 00000000..13e67745 --- /dev/null +++ b/ops/remove_clouds/test_remove_clouds.py @@ -0,0 +1,48 @@ +import os +from datetime import datetime, timezone +from typing import Any, Dict + +from shapely import geometry as shpg + +from vibe_core.data.sentinel import ( + Sentinel1RasterTileSequence, + Sentinel2CloudMaskTileSequence, + Sentinel2RasterTileSequence, +) +from vibe_dev.testing.op_tester import OpTester + +CONFIG_PATH_NN = os.path.join(os.path.dirname(os.path.abspath(__file__)), "remove_clouds.yaml") + +CONFIG_PATH_INTERP = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "remove_clouds_interpolation.yaml" +) + + +def test_remove_clouds_empty_sequence(): + polygon: Dict[str, Any] = shpg.mapping(shpg.box(0, 0, 1, 1)) # type: ignore + start_date = datetime(year=2021, month=7, day=10, tzinfo=timezone.utc) + end_date = datetime(year=2021, month=7, day=28, tzinfo=timezone.utc) + s1 = Sentinel1RasterTileSequence( + id="s1", + time_range=(start_date, end_date), + geometry=polygon, + assets=[], + product_name="", + orbit_number=0, + relative_orbit_number=0, + orbit_direction="", + platform="", + extra_info={}, + sensor_mode="", + polarisation_mode="", + bands={}, + tile_id="", + write_time_range=(start_date, end_date), + ) + s2 = Sentinel2RasterTileSequence.clone_from(s1, id="s2", assets=[], processing_level="") + cloud = Sentinel2CloudMaskTileSequence.clone_from(s2, id="cloud", assets=[], categories=[]) + + nn_out = OpTester(CONFIG_PATH_NN).run(s1_products=s1, s2_products=s2, cloud_masks=cloud) + assert not nn_out["spaceeye_sequence"].assets # type: ignore + interp_out = OpTester(CONFIG_PATH_INTERP).run(s2_products=s2, cloud_masks=cloud) + assert not interp_out["spaceeye_sequence"].assets # type: ignore diff --git a/ops/segment_anything/automatic_segmentation.yaml b/ops/segment_anything/automatic_segmentation.yaml new file mode 100644 index 00000000..567706bb --- /dev/null +++ b/ops/segment_anything/automatic_segmentation.yaml @@ -0,0 +1,56 @@ +name: automatic_segmentation +inputs: + input_raster: Raster +output: + segmented_chips: List[SamMaskRaster] +parameters: + model_type: vit_b + band_names: null + band_scaling: null + band_offset: null + spatial_overlap: 0.0 + points_per_side: 16 + n_crop_layers: 0 + crop_overlap_ratio: 0.0 + crop_n_points_downscale_factor: 1 + pred_iou_thresh: 0.88 + stability_score_thresh: 0.95 + stability_score_offset: 1.0 + points_per_batch: 16 + num_workers: 0 + in_memory: True +entrypoint: + file: sam_inference.py + callback_builder: AutomaticSegmentationCallbackBuilder +dependencies: + parameters: + - model_type + - band_names + - band_scaling + - band_offset + - spatial_overlap + - points_per_side + - n_crop_layers + - crop_overlap_ratio + - crop_n_points_downscale_factor + - pred_iou_thresh + - stability_score_thresh + - stability_score_offset +description: + short_description: Runs a SAM automatic segmentation inference over the input raster, generating masks for each chip. + parameters: + model_type: SAM's image encoder backbone architecture, among 'vit_h', 'vit_l', or 'vit_b'. Before running the workflow, make sure the desired model has been exported to the cluster by running `scripts/export_sam_models.py`. For more information, refer to the FarmVibes.AI troubleshooting page in the documentation. + band_names: Name of raster bands that should be selected to compose the 3-channel images expected by SAM. If not provided, will try to use ["R", "G", "B"]. If only a single band name is provided, will replicate it through all three channels. + band_scaling: A list of floats to scale each band by to the range of [0.0, 1.0] or [0.0, 255.0]. If not provided, will default to the raster scaling parameter. If a list with a single value is provided, will use it for all three bands. + band_offset: A list of floats to offset each band by. If not provided, will default to the raster offset value. If a list with a single value is provided, will use it for all three bands. + spatial_overlap: Percentage of spatial overlap between chips in the range of [0.0, 1.0). + points_per_side: The number of points to be sampled along one side of the chip to be prompts. The total number of points is points_per_side**2. + n_crop_layers: If >0, mask prediction will be run again on crops of the image. Sets the number of layers to run, where each layer has 2**i_layer number of image crops. + crop_overlap_ratio: Sets the degree to which crops overlap. In the first crop layer, crops will overlap by this fraction of the chip length. Later layers with more crops scale down this overlap. + crop_n_points_downscale_factor: The number of points-per-side sampled in layer n is scaled down by crop_n_points_downscale_factor**n. + pred_iou_thresh: A filtering threshold in [0,1] over the model's predicted mask quality/score. + stability_score_thresh: A filtering threshold in [0,1], using the stability of the mask under changes to the cutoff used to binarize the model's mask predictions. + stability_score_offset: The amount to shift the cutoff when calculated the stability score. + points_per_batch: Number of points to process in a single batch. + num_workers: Number of workers to use for parallel processing. + in_memory: Whether to load the whole raster in memory when running predictions. Uses more memory (~4GB/worker) but speeds up inference for fast models. diff --git a/ops/segment_anything/prompt_segmentation.yaml b/ops/segment_anything/prompt_segmentation.yaml new file mode 100644 index 00000000..a30111bc --- /dev/null +++ b/ops/segment_anything/prompt_segmentation.yaml @@ -0,0 +1,36 @@ +name: prompt_segmentation +inputs: + input_raster: Raster + input_prompts: GeometryCollection +output: + segmentation_mask: CategoricalRaster +parameters: + model_type: vit_b + band_names: null + band_scaling: null + band_offset: null + spatial_overlap: 0.0 + points_per_batch: 64 + num_workers: 0 + in_memory: True +entrypoint: + file: sam_inference.py + callback_builder: PromptCallbackBuilder +dependencies: + parameters: + - model_type + - band_names + - band_scaling + - band_offset + - spatial_overlap +description: + short_description: Runs SAM over the input raster with points and bounding boxes as prompts. + parameters: + model_type: SAM's image encoder backbone architecture, among 'vit_h', 'vit_l', or 'vit_b'. Before running the workflow, make sure the desired model has been exported to the cluster by running `scripts/export_sam_models.py`. For more information, refer to the FarmVibes.AI troubleshooting page in the documentation. + band_names: Name of raster bands that should be selected to compose the 3-channel images expected by SAM. If not provided, will try to use ["R", "G", "B"]. If only a single band name is provided, will replicate it through all three channels. + band_scaling: A list of floats to scale each band by to the range of [0.0, 1.0] or [0.0, 255.0]. If not provided, will default to the raster scaling parameter. If a list with a single value is provided, will use it for all three bands. + band_offset: A list of floats to offset each band by. If not provided, will default to the raster offset value. If a list with a single value is provided, will use it for all three bands. + spatial_overlap: Percentage of spatial overlap between chips in the range of [0.0, 1.0). + points_per_batch: Number of points to process in a single batch. + num_workers: Number of workers to use for parallel processing. + in_memory: Whether to load the whole raster in memory when running predictions. Uses more memory (~4GB/worker) but speeds up inference for fast models. diff --git a/ops/segment_anything/sam_inference.py b/ops/segment_anything/sam_inference.py new file mode 100644 index 00000000..5e833062 --- /dev/null +++ b/ops/segment_anything/sam_inference.py @@ -0,0 +1,561 @@ +import logging +import os +from tempfile import TemporaryDirectory +from typing import Any, Callable, Dict, List, Optional, Tuple, cast + +import numpy as np +import onnxruntime as ort +import torch +from numpy.typing import NDArray +from shapely import geometry as shpg +from shapely.geometry.base import BaseGeometry +from torch.utils.data import DataLoader +from torchvision.transforms.functional import resize + +from vibe_core.data import ( + AssetVibe, + BBox, + CategoricalRaster, + ChipWindow, + GeometryCollection, + Raster, + SamMaskRaster, + gen_guid, +) +from vibe_lib.raster import INT_COMPRESSION_KWARGS, write_window_to_file +from vibe_lib.segment_anything import ( + BACKGROUND_VALUE, + MASK_LOGIT_THRESHOLD, + SAM_CHIP_SIZE, + Prompt, + batch_prompt_encoder_preprocess, + build_chip_preprocessing_operation, + build_point_grid, + calculate_stability_score, + extract_img_embeddings_from_chip, + generate_crop_boxes, + get_mask_within_bbox, + get_normalized_prompts_within_chip, + mask_encoder_preprocess, + mask_to_bbox, + preprocess_geometry_collection, + prompt_encoder_preprocess, + translate_bbox, + uncrop_masks, +) +from vibe_lib.spaceeye.chip import ( + ChipDataset, + ChipDataType, + Dims, + InMemoryReader, + Window, + get_loader, + write_prediction_to_file, +) + +BASE_MODEL_PATH = "/mnt/onnx_resources/{model_type}_{model_part}.onnx" +SAM_MODEL_TYPES = ["vit_h", "vit_l", "vit_b"] + + +LOGGER = logging.getLogger(__name__) + + +class CallbackBuilder: + def __init__( + self, + model_type: str, + spatial_overlap: float, + num_workers: int, + in_memory: bool, + band_names: Optional[List[str]], + band_scaling: Optional[List[float]], + band_offset: Optional[List[float]], + ): + self.model_type = model_type + self.spatial_overlap = spatial_overlap + self.num_workers = num_workers + self.in_memory = in_memory + self.tmp_dir = TemporaryDirectory() + self.window_size = SAM_CHIP_SIZE + self.band_names = band_names + self.band_scaling = band_scaling + self.band_offset = band_offset + + def get_model(self) -> Tuple[ort.InferenceSession, ort.InferenceSession]: + if self.model_type not in SAM_MODEL_TYPES: + raise ValueError( + f"Unknown model type: '{self.model_type}'. Expected one of {SAM_MODEL_TYPES}" + ) + + encoder_path = BASE_MODEL_PATH.format(model_type=self.model_type, model_part="encoder") + decoder_path = BASE_MODEL_PATH.format(model_type=self.model_type, model_part="decoder") + + if not os.path.exists(encoder_path) or not os.path.exists(decoder_path): + raise ValueError( + f"Model files not found for model type: '{self.model_type}'. " + f"Refer to the troubleshooting section of FarmVibes.AI documentation " + f"for instructions on how to import the model files to the cluster." + ) + + encoder = ort.InferenceSession(encoder_path) + LOGGER.info(f"Loaded encoder model from {encoder_path}") + decoder = ort.InferenceSession(decoder_path) + LOGGER.info(f"Loaded decoder model from {decoder_path}") + return encoder, decoder + + def get_chip_dataloader( + self, + raster: Raster, + geometry: BaseGeometry, + ) -> DataLoader[ChipDataType]: + chip_size = self.window_size + step_size = int(chip_size * (1 - self.spatial_overlap)) + dataset = ChipDataset( + [raster], + chip_size=Dims(chip_size, chip_size, 1), + step_size=Dims(step_size, step_size, 1), + nodata=BACKGROUND_VALUE, + geometry_or_chunk=geometry, + reader=InMemoryReader(downsampling=1) if self.in_memory else None, + ) + + dataloader = get_loader( + dataset, batch_size=1, num_workers=self.num_workers if not self.in_memory else 0 + ) + + return dataloader + + def __del__(self): + self.tmp_dir.cleanup() + + +class PromptCallbackBuilder(CallbackBuilder): + img_preprocessing_operation: Callable[[NDArray[Any]], NDArray[Any]] + + def __init__( + self, + model_type: str, + spatial_overlap: float, + points_per_batch: int, + num_workers: int, + in_memory: bool, + band_names: Optional[List[str]], + band_scaling: Optional[List[float]], + band_offset: Optional[List[float]], + ): + super().__init__( + model_type, + spatial_overlap, + num_workers, + in_memory, + band_names, + band_scaling, + band_offset, + ) + self.points_per_batch = points_per_batch + + def get_mask_for_prompt_group( + self, + prompt_group: List[Prompt], + chip_data: NDArray[Any], + decoder_session: ort.InferenceSession, + img_embedding: NDArray[Any], + ) -> NDArray[Any]: + prompt_group_mask = np.zeros((1, 1, *chip_data.shape[-2:]), dtype=bool) + for i in range(0, len(prompt_group), self.points_per_batch): + prompt_batch, prompt_label = prompt_encoder_preprocess( + prompt_group[i : i + self.points_per_batch] + ) + mask_prompt, has_mask_prompt = mask_encoder_preprocess() + + ort_inputs = { + "image_embeddings": img_embedding, + "point_coords": prompt_batch, + "point_labels": prompt_label, + "mask_input": mask_prompt, + "has_mask_input": has_mask_prompt, + "orig_im_size": np.array([self.window_size, self.window_size], dtype=np.float32), + } + + predicted_mask, _, _ = decoder_session.run(None, ort_inputs) + predicted_mask = predicted_mask > MASK_LOGIT_THRESHOLD + prompt_group_mask = np.logical_or(prompt_group_mask, predicted_mask) + + # Only include in the mask, pixels within the prompted bounding box + prompt_group_mask = get_mask_within_bbox(prompt_group_mask, prompt_group) + + return prompt_group_mask + + def generate_masks_from_points( + self, + dataloader: DataLoader[ChipDataType], + encoder_session: ort.InferenceSession, + decoder_session: ort.InferenceSession, + input_prompts: Dict[int, List[Prompt]], + ) -> List[str]: + filepaths: List[str] = [] + dataset = cast(ChipDataset, dataloader.dataset) + get_filename = dataset.get_filename + for batch_idx, batch in enumerate(dataloader): + chip_data, chip_mask, write_info_list = batch + output_chip_mask = np.zeros((1, len(input_prompts), *chip_data.shape[-2:]), dtype=bool) + + prompts_in_chip = get_normalized_prompts_within_chip( + input_prompts, dataset.read_windows[batch_idx][0], dataset.offset + ) + + if prompts_in_chip: + LOGGER.info(f"Running model for batch ({batch_idx + 1}/{len(dataloader)})") + + img_embedding = extract_img_embeddings_from_chip( + chip_data, self.img_preprocessing_operation, encoder_session + ) + + for prompt_id, prompt_group in prompts_in_chip.items(): + prompt_group_mask = self.get_mask_for_prompt_group( + prompt_group, chip_data, decoder_session, img_embedding + ) + output_chip_mask[0, prompt_id] = np.logical_or( + output_chip_mask[0, prompt_id], prompt_group_mask[0, 0] + ) + + else: + LOGGER.info( + "Skipping batch with no prompt intersection " + f"({batch_idx + 1}/{len(dataloader)})" + ) + + write_prediction_to_file( + output_chip_mask.astype(np.uint8), + chip_mask, + write_info_list, + self.tmp_dir.name, + filepaths, + get_filename, + ) + + return filepaths + + def __call__(self): + def callback( + input_raster: Raster, + input_prompts: GeometryCollection, + ) -> Dict[str, CategoricalRaster]: + geometry = shpg.shape(input_raster.geometry) + dataloader = self.get_chip_dataloader(input_raster, geometry) + + processed_prompts, prompt_id_map = preprocess_geometry_collection( + input_prompts, cast(ChipDataset, dataloader.dataset), geometry + ) + + self.img_preprocessing_operation = build_chip_preprocessing_operation( + input_raster, self.band_names, self.band_scaling, self.band_offset + ) + + encoder_session, decoder_session = self.get_model() + + mask_filepaths = self.generate_masks_from_points( + dataloader, + encoder_session, + decoder_session, + processed_prompts, + ) + + asset = AssetVibe(reference=mask_filepaths[0], type="image/tiff", id=gen_guid()) + segmentation_mask = CategoricalRaster.clone_from( + input_raster, + id=gen_guid(), + assets=[asset], + bands={ + f"mask_prompt_{prompt_id_map[prompt_id]}": prompt_id + for prompt_id in processed_prompts.keys() + }, + categories=["background", "foreground"], + ) + + return {"segmentation_mask": segmentation_mask} + + return callback + + +class AutomaticSegmentationCallbackBuilder(PromptCallbackBuilder): + def __init__( + self, + model_type: str, + spatial_overlap: float, + points_per_side: int, + n_crop_layers: int, + crop_overlap_ratio: float, + crop_n_points_downscale_factor: int, + pred_iou_thresh: float, + stability_score_thresh: float, + stability_score_offset: float, + points_per_batch: int, + num_workers: int, + in_memory: bool, + band_names: Optional[List[str]], + band_scaling: Optional[List[float]], + band_offset: Optional[List[float]], + ): + super().__init__( + model_type, + spatial_overlap, + points_per_batch, + num_workers, + in_memory, + band_names, + band_scaling, + band_offset, + ) + self.points_per_side = points_per_side + self.n_crop_layers = n_crop_layers + self.crop_overlap_ratio = crop_overlap_ratio + self.crop_n_points_downscale_factor = crop_n_points_downscale_factor + self.pred_iou_thresh = pred_iou_thresh + self.stability_score_thresh = stability_score_thresh + self.stability_score_offset = stability_score_offset + self.validate_parameters() + + def validate_parameters(self): + if not isinstance(self.points_per_side, int) or self.points_per_side < 1: + raise ValueError( + f"'points_per_side' must be a positive integer. Got {self.points_per_side}." + ) + if not isinstance(self.n_crop_layers, int) or self.n_crop_layers < 0: + raise ValueError( + f"'n_crop_layers' must be a non-negative integer. Got {self.n_crop_layers}." + ) + if self.crop_overlap_ratio < 0 or self.crop_overlap_ratio >= 1: + raise ValueError( + "'crop_overlap_ratio' must be a float in the range [0, 1). " + f"Got {self.crop_overlap_ratio}." + ) + if ( + not isinstance(self.crop_n_points_downscale_factor, int) + or self.crop_n_points_downscale_factor < 1 + ): + raise ValueError( + "'crop_n_points_downscale_factor' must be a positive integer. " + f"Got {self.crop_n_points_downscale_factor}." + ) + if self.pred_iou_thresh <= 0 or self.pred_iou_thresh >= 1: + raise ValueError( + "'pred_iou_thresh' must be a float in the range (0, 1). " + f"Got {self.pred_iou_thresh}." + ) + if self.stability_score_thresh <= 0 or self.stability_score_thresh > 1: + raise ValueError( + "'stability_score_thresh' must be a float in the range (0, 1]. " + f"Got {self.stability_score_thresh}." + ) + + def point_grid_inference( + self, + prompts: List[Prompt], + img_embedding: NDArray[Any], + decoder_session: ort.InferenceSession, + ) -> Tuple[NDArray[Any], NDArray[Any], NDArray[Any]]: + mask, mask_scores, mask_bbox = [], [], [] + mask_prompt, has_mask_prompt = mask_encoder_preprocess() + for i in range(0, len(prompts), self.points_per_batch): + LOGGER.info( + f"Processing points {i}-{min(i + self.points_per_batch, len(prompts))} " + f"out of {len(prompts)}" + ) + batch = [[p] for p in prompts[i : i + self.points_per_batch]] + prompt_batch, prompt_label = batch_prompt_encoder_preprocess(batch) + ort_inputs = { + "image_embeddings": img_embedding, + "point_coords": prompt_batch, + "point_labels": prompt_label, + "mask_input": mask_prompt, + "has_mask_input": has_mask_prompt, + "orig_im_size": np.array([self.window_size, self.window_size], dtype=np.float32), + } + pred_mask, pred_scores, _ = decoder_session.run(None, ort_inputs) + + # Filter by the mask quality score provided by SAM + if self.pred_iou_thresh > 0: + keep_masks = (pred_scores > self.pred_iou_thresh).reshape(-1) + pred_mask = pred_mask[keep_masks] + pred_scores = pred_scores[keep_masks] + + # Filter by Stability Score + if self.stability_score_thresh > 0: + stability_score = calculate_stability_score( + pred_mask, MASK_LOGIT_THRESHOLD, self.stability_score_offset + ) + keep_masks = (stability_score > self.stability_score_thresh).reshape(-1) + pred_mask = pred_mask[keep_masks] + pred_scores = pred_scores[keep_masks] + + if pred_mask.shape[0] > 0: + # Binarize mask given logit threshold + pred_mask = pred_mask > MASK_LOGIT_THRESHOLD + mask.append(pred_mask) + mask_scores.append(pred_scores.reshape(-1)) + mask_bbox.append(mask_to_bbox(pred_mask)) + + mask = np.concatenate(mask, axis=0) + mask_scores = np.concatenate(mask_scores, axis=0) + mask_bbox = np.concatenate(mask_bbox, axis=0) + return mask, mask_scores, mask_bbox + + def process_crop( + self, + chip_data: NDArray[Any], + crop_box: BBox, + layer_idx: int, + encoder_session: ort.InferenceSession, + decoder_session: ort.InferenceSession, + ) -> Tuple[NDArray[Any], NDArray[Any], NDArray[Any]]: + # Get crop and resize + x0, y0, x1, y1 = crop_box + cropped_im = chip_data[:, :, y0:y1, x0:x1] + + if layer_idx > 0: # Resize to chip size if not the first layer + cropped_im = cast( + torch.Tensor, + resize(torch.from_numpy(cropped_im), size=[self.window_size]), + ).numpy() + + # Get crop embeddings + crop_img_embedding = extract_img_embeddings_from_chip( + cropped_im, self.img_preprocessing_operation, encoder_session + ) + + # Build point grid for crop + points_per_side_for_layer = int( + self.points_per_side / (self.crop_n_points_downscale_factor**layer_idx) + ) + prompts = build_point_grid(points_per_side_for_layer, self.window_size) + + # Build mask + mask, mask_scores, mask_bbox = self.point_grid_inference( + prompts, crop_img_embedding, decoder_session + ) + + if layer_idx > 0: # Resize mask to crop size if not the first layer + mask, mask_bbox = uncrop_masks(mask, mask_bbox, crop_box, self.window_size) + + # Return to the original image frame + mask_bbox = translate_bbox(mask_bbox, x_offset=crop_box[0], y_offset=crop_box[1]) + + return mask, mask_scores, mask_bbox + + def generate_masks_from_grid( + self, + dataloader: DataLoader[ChipDataType], + encoder_session: ort.InferenceSession, + decoder_session: ort.InferenceSession, + ) -> Tuple[List[str], List[NDArray[Any]], List[NDArray[Any]], List[ChipWindow]]: + filepaths: List[str] = [] + scores: List[NDArray[Any]] = [] + boxes: List[NDArray[Any]] = [] + chip_windows: List[ChipWindow] = [] + + file_id = gen_guid() + dataset = cast(ChipDataset, dataloader.dataset) + + # Generate smaller crops within each chip (if n_crop_layers > 0) + crop_boxes, layer_idxs = generate_crop_boxes( + self.window_size, self.n_crop_layers, self.crop_overlap_ratio + ) + + for batch_idx, batch in enumerate(dataloader): + LOGGER.info(f"Processing batch {batch_idx + 1}/{len(dataloader)}") + chip_data, chip_mask, write_info_list = batch + read_window = dataset.read_windows[batch_idx][0] + + crop_masks, crop_scores, crop_bbox = [], [], [] + + # Generate masks for each crop within chip + for crop_idx, (crop_box, layer_idx) in enumerate(zip(crop_boxes, layer_idxs)): + LOGGER.info( + f"Processing crop {crop_idx + 1}/{len(crop_boxes)} from layer idx {layer_idx}" + ) + mask, mask_scores, mask_bbox = self.process_crop( + chip_data, crop_box, layer_idx, encoder_session, decoder_session + ) + crop_masks.append(mask) + crop_scores.append(mask_scores) + crop_bbox.append(mask_bbox) + + crop_masks = np.concatenate(crop_masks, axis=0) + crop_scores = np.concatenate(crop_scores, axis=0) + crop_bbox = np.concatenate(crop_bbox, axis=0) + + # Translate crop_box in relation to input raster + crop_bbox = translate_bbox( + crop_bbox, x_offset=read_window.col_off, y_offset=read_window.row_off + ) + + # Write chip to file + if crop_masks.shape[0] > 0: + LOGGER.info(f"Writing masks to file {batch_idx + 1}/{len(dataloader)}") + filename = os.path.join(self.tmp_dir.name, f"{file_id}_{batch_idx}.tif") + meta = cast(Dict[str, Any], write_info_list[0]["meta"]) + meta.update({**INT_COMPRESSION_KWARGS}) + + write_window = ChipWindow( + int(read_window.col_off - dataset.offset.width), + int(read_window.row_off - dataset.offset.height), + int(read_window.width), + int(read_window.height), + ) + + write_window_to_file( + crop_masks.squeeze(axis=1), + chip_mask.any(axis=(0, 1)), + Window(*write_window), # type: ignore + filename, + meta, + ) + filepaths.append(filename) + scores.append(crop_scores) + boxes.append(crop_bbox) + chip_windows.append(write_window) + else: + LOGGER.info(f"No masks to write from batch {batch_idx + 1}/{len(dataloader)}") + + return filepaths, scores, boxes, chip_windows + + def __call__(self): + def callback( + input_raster: Raster, + ) -> Dict[str, List[SamMaskRaster]]: + geometry = shpg.shape(input_raster.geometry) + dataloader = self.get_chip_dataloader(input_raster, geometry) + + self.img_preprocessing_operation = build_chip_preprocessing_operation( + input_raster, self.band_names, self.band_scaling, self.band_offset + ) + + encoder_session, decoder_session = self.get_model() + + chip_filepaths, mask_scores, mask_boxes, chip_windows = self.generate_masks_from_grid( + dataloader, + encoder_session, + decoder_session, + ) + + rasters: List[SamMaskRaster] = [] + for path, scores, boxes, window in zip( + chip_filepaths, mask_scores, mask_boxes, chip_windows + ): + asset = AssetVibe(reference=path, type="image/tiff", id=gen_guid()) + segmented_chip = SamMaskRaster.clone_from( + input_raster, + id=gen_guid(), + assets=[asset], + bands={f"mask_{i}": i for i in range(scores.shape[0])}, + categories=["background", "foreground"], + mask_score=scores.tolist(), + mask_bbox=boxes.tolist(), + chip_window=window, + ) + rasters.append(segmented_chip) + + return {"segmented_chips": rasters} + + return callback diff --git a/ops/segment_anything/test_sam_inference.py b/ops/segment_anything/test_sam_inference.py new file mode 100644 index 00000000..a4bdd1a4 --- /dev/null +++ b/ops/segment_anything/test_sam_inference.py @@ -0,0 +1,471 @@ +import os +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Optional, Tuple, Union, cast + +import geopandas as gpd +import numpy as np +import pandas as pd +import pytest +import rioxarray as rio +import xarray as xr +from shapely import geometry as shpg + +from vibe_core.data import AssetVibe, GeometryCollection +from vibe_core.data.core_types import gen_guid +from vibe_core.data.rasters import CategoricalRaster, Raster, SamMaskRaster +from vibe_core.data.sentinel import Sentinel2Raster +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.raster import save_raster_to_asset + +CONFIG_PATH_PROMPT = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "prompt_segmentation.yaml" +) + +CONFIG_PATH_AUTOSEG = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "automatic_segmentation.yaml" +) + +DEFAULT_AUTOSEG_PARAMETERS = { + "points_per_side": 2, + "spatial_overlap": 0.0, + "pred_iou_thresh": 0.88, + "stability_score_thresh": 0.95, + "n_crop_layers": 0, + "crop_overlap_ratio": 0.0, + "crop_n_points_downscale_factor": 1, +} + +# Minimum threshold just to make sure the threshold won't remove any masks +MIN_THRESHOLD = 0.00001 +BAND_NAMES = {"s2": ["R", "G", "B"], "basemap": ["red", "green", "blue"]} + + +def edit_autoseg_parameters(key: str, value: Union[int, float]) -> Dict[str, Union[int, float]]: + new_params = DEFAULT_AUTOSEG_PARAMETERS.copy() + new_params[key] = value + return new_params + + +def create_base_raster( + tmp_dir_name: str, + raster_size: int = 2048, + type: str = "s2", + cells_per_side: int = 2, +) -> Union[Sentinel2Raster, Raster]: + now = datetime.now() + geom = shpg.mapping(shpg.box(0, 0, raster_size, raster_size)) + + n_channels = 12 if type == "s2" else 3 + raster_dim = (n_channels, raster_size, raster_size) # enough for two chips/side + + # Create a checkboard pattern + cell_size = raster_size // cells_per_side + row, col = np.indices((raster_size, raster_size)) + pattern_2d = (row // cell_size % 2) ^ (col // cell_size % 2) + fake_data = 10000.0 * np.repeat(pattern_2d[np.newaxis, :, :], n_channels, axis=0) + + fake_da = xr.DataArray( + fake_data, + coords={ + "bands": np.arange(raster_dim[0]), + "x": np.linspace(0, 1, raster_dim[1]), + "y": np.linspace(0, 1, raster_dim[2]), + }, + dims=["bands", "y", "x"], + ) + fake_da.rio.write_crs("epsg:4326", inplace=True) + + asset = save_raster_to_asset(fake_da, tmp_dir_name) + + if type == "s2": + raster = Sentinel2Raster( + id="s2", + time_range=(now, now), + geometry=geom, + assets=[asset], + bands={ + **{b: idx for idx, b in enumerate(BAND_NAMES[type])}, + **{str(idx): idx for idx in range(3, raster_dim[0])}, + }, + tile_id="", + processing_level="", + product_name="", + orbit_number=0, + relative_orbit_number=0, + orbit_direction="", + platform="", + extra_info={}, + ) + else: + raster = Raster( + id="basemap", + time_range=(now, now), + geometry=geom, + assets=[asset], + bands={ + **{b: idx for idx, b in enumerate(BAND_NAMES[type])}, + **{str(idx): idx for idx in range(3, raster_dim[0])}, + }, + ) + + return raster + + +def create_geometry_collection( + prompt_list: List[Union[shpg.Point, shpg.Polygon]], + label: List[int], + prompt_id: List[int], + geom: Dict[str, Any], + time_range: Tuple[datetime, datetime], + tmp_dir_name: str, + column_names: List[str] = ["geometry", "label", "prompt_id"], +): + df = pd.DataFrame( + {col_name: info for col_name, info in zip(column_names, [prompt_list, label, prompt_id])} + ) + gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326") # type: ignore + path = os.path.join(tmp_dir_name, "fake_gdf.geojson") + gdf.to_file(path, driver="GeoJSON") + + asset = AssetVibe(reference=path, type="application/json", id=gen_guid()) + geom_collection = GeometryCollection( + id=gen_guid(), geometry=geom, time_range=time_range, assets=[asset] + ) + return geom_collection + + +@pytest.fixture +def tmp_dir(): + _tmp_dir = TemporaryDirectory() + yield _tmp_dir.name + _tmp_dir.cleanup() + + +@pytest.mark.parametrize( + "prompt_list, label, prompt_id, expected_exception", + [ + ( + [shpg.MultiPoint([[1, 1], [2, 2]])], + [1], + [0], + "Expected each geometry to be a shapely Point or Polygon", + ), + ( + [shpg.Point(4000, 4000)], # outside of the raster + [1], + [0], + "Expected all prompts to be contained within the ROI of input_geometry", + ), + ([shpg.Point(1, 1)], [1], [5.5], "Expected prompt_ids as integers or strings"), + ( + [shpg.Point(1, 1), shpg.Point(2, 2)], + ["a", 5.5], + [0, 1], + "Expected labels to be integers, with 0 or 1 values", + ), + ( + [shpg.box(1, 1, 2, 2), shpg.box(2, 2, 3, 3)], + [1, 1], + [0, 0], + "Expected at most one bounding box per prompt", + ), + ], +) +def test_invalid_prompt_format( + prompt_list: List[Union[shpg.Point, shpg.Polygon]], + label: List[int], + prompt_id: List[int], + expected_exception: Optional[str], + tmp_dir: str, +): + raster = create_base_raster(tmp_dir) + geom_collection = create_geometry_collection( + prompt_list=prompt_list, + label=label, + prompt_id=prompt_id, + geom=raster.geometry, + time_range=raster.time_range, + tmp_dir_name=tmp_dir, + ) + + with pytest.raises(ValueError, match=expected_exception): + OpTester(CONFIG_PATH_PROMPT).run(input_raster=raster, input_prompts=geom_collection) + + +def test_invalid_geometry_collection(tmp_dir: str): + raster = create_base_raster(tmp_dir) + geom_collection = create_geometry_collection( + prompt_list=[shpg.Point(5, 5)], + label=[1], + prompt_id=[0], + geom=raster.geometry, + time_range=raster.time_range, + tmp_dir_name=tmp_dir, + column_names=["geometry", "label", "wrong_column_name"], + ) + + with pytest.raises(ValueError): + OpTester(CONFIG_PATH_PROMPT).run(input_raster=raster, input_prompts=geom_collection) + + +# Points expressed as fraction of the raster size for easier conversion to pixel coordinates +@pytest.mark.parametrize( + "raster_type, raster_size, spatial_overlap, prompt_list, label, prompt_id, expected_mask_area", + [ + ( # One point per quadrant as separate prompts + "s2", + 2048, + 0.0, + [ + shpg.Point(0.25, 0.25), # top-left quadrant + shpg.Point(0.75, 0.25), # top-right quadrant + shpg.Point(0.25, 0.75), # bottom-left quadrant + shpg.Point(0.75, 0.75), # bottom-right quadrant + ], + [1, 1, 1, 1], + [0, 1, 2, 3], + 1024 * 1024, # one quadrant, 1/4 of the raster area + ), + ( # One prompt with 2 points on the top-left and bottom-right quadrants + "basemap", + 2048, + 0.0, + [ + shpg.Point(0.25, 0.25), + shpg.Point(0.75, 0.25), + shpg.Point(0.25, 0.75), + shpg.Point(0.75, 0.75), + ], + [1, 0, 0, 1], + [0, 0, 0, 0], + 2 * 1024 * 1024, # two quadrant, 1/2 of the raster area + ), + ( # Four points per quadrant, each quadrant as separate prompt + "s2", + 2048, + 0.0, + [shpg.Point(0.125 + i * 0.25, 0.125 + j * 0.25) for i in range(4) for j in range(4)], + [1] * 16, + [2 * (i // 2) + (j // 2) for i in range(4) for j in range(4)], + 1024 * 1024, # one quadrant, 1/4 of the raster area + ), + ( # Four points per quadrant, single prompt (top-left, bottom-right), 50% of overlap + "basemap", + 2048, + 0.0, + [shpg.Point(0.125 + i * 0.25, 0.125 + j * 0.25) for i in range(4) for j in range(4)], + [1, 1, 0, 0] * 2 + [0, 0, 1, 1] * 2, + [1] * 16, + 2 * 1024 * 1024, # two quadrant, 1/2 of the raster area + ), + ( # Bbox of half of a quadrant centered in the first quadrant, single prompt, no overlap + "s2", + 2048, + 0.0, + [shpg.box(0.125, 0.125, 0.375, 0.375)], + [1], + [0], + 512 * 512, # half quadrant, 1/8 of the raster area + ), + ( # Same Bbox as above with a centered foreground point, single prompt, no overlap + "basemap", + 2048, + 0.0, + [shpg.box(0.125, 0.125, 0.375, 0.375), shpg.Point(0.25, 0.25)], + [1, 1], + [0, 0], + 512 * 512, # half quadrant, 1/8 of the raster area + ), + ], +) +def test_segmentation_mask( + raster_type: str, + raster_size: int, + spatial_overlap: float, + prompt_list: List[Union[shpg.Point, shpg.Polygon]], + label: List[int], + prompt_id: List[int], + expected_mask_area: int, + tmp_dir: str, +): + raster = create_base_raster(tmp_dir, raster_size, raster_type) + geom_collection = create_geometry_collection( + prompt_list=prompt_list, + label=label, + prompt_id=prompt_id, + geom=raster.geometry, + time_range=raster.time_range, + tmp_dir_name=tmp_dir, + ) + + op_tester = OpTester(CONFIG_PATH_PROMPT) + op_tester.update_parameters( + {"spatial_overlap": spatial_overlap, "band_names": BAND_NAMES[raster_type]} + ) + output = op_tester.run(input_raster=raster, input_prompts=geom_collection) + + assert "segmentation_mask" in output + + mask_raster = cast(CategoricalRaster, output["segmentation_mask"]) + assert len(mask_raster.bands) == len(np.unique(prompt_id)) + + mask = rio.open_rasterio(mask_raster.assets[0].path_or_url).values # type: ignore + assert mask.shape == (len(np.unique(prompt_id)), 2048, 2048) + + for idx, _ in enumerate(np.unique(prompt_id)): + assert ( + np.abs( + np.sum(mask[idx, :, :]) - expected_mask_area # type: ignore + ) + <= 0.05 * expected_mask_area + ), "Mask area is not within 5 percent of the expected area" + + +@pytest.mark.parametrize( + "raster_type, checkboard_cells_per_side, points_per_side, spatial_overlap, " + "pred_iou_thresh, stability_score_thresh, n_crop_layers, n_expected_masks", + [ + ( + "s2", + 2, + 2, + DEFAULT_AUTOSEG_PARAMETERS["spatial_overlap"], + MIN_THRESHOLD, + MIN_THRESHOLD, + DEFAULT_AUTOSEG_PARAMETERS["n_crop_layers"], + 16, + ), # 2x2 raster, 4 chips, 4 masks/chip (2pps**2) = 16 masks + ( + "basemap", + 2, + 2, + DEFAULT_AUTOSEG_PARAMETERS["spatial_overlap"], + MIN_THRESHOLD, + MIN_THRESHOLD, + 1, + 80, # 16 masks for crop layer 0 + 4*16 for the next layer + ), # Same as above, but with an additional crop layer + ( + "s2", + 2, + 2, + 0.5, + MIN_THRESHOLD, + MIN_THRESHOLD, + DEFAULT_AUTOSEG_PARAMETERS["n_crop_layers"], + 36, # SAM removes a few due to low quality and stability scores + ), # 2x2 raster, 9 chips (due to overlap), 4 masks/chip (2pps**2) = 36 masks + ( + "basemap", + 2, + 2, + 0.5, + DEFAULT_AUTOSEG_PARAMETERS["pred_iou_thresh"], + DEFAULT_AUTOSEG_PARAMETERS["stability_score_thresh"], + DEFAULT_AUTOSEG_PARAMETERS["n_crop_layers"], + 31, # SAM removes a few due to low quality and stability scores + ), # Same as above, but with filtered masks + ( + "s2", + 4, + 4, + DEFAULT_AUTOSEG_PARAMETERS["spatial_overlap"], + MIN_THRESHOLD, + MIN_THRESHOLD, + DEFAULT_AUTOSEG_PARAMETERS["n_crop_layers"], + 64, # Without the IoU quality and stability score filtering, we expect all 64 masks + ), # 4x4 raster, 4 chips, 16 masks/chip (4pps**2) = 64 masks + ( + "basemap", + 4, + 4, + DEFAULT_AUTOSEG_PARAMETERS["spatial_overlap"], + DEFAULT_AUTOSEG_PARAMETERS["pred_iou_thresh"], + DEFAULT_AUTOSEG_PARAMETERS["stability_score_thresh"], + DEFAULT_AUTOSEG_PARAMETERS["n_crop_layers"], + 36, # SAM removes a few due to low quality and stability scores + ), # Same as above, but with filtered masks + ], +) +def test_automatic_segmentation_mask( + raster_type: str, + checkboard_cells_per_side: int, + points_per_side: int, + spatial_overlap: float, + pred_iou_thresh: float, + stability_score_thresh: float, + n_crop_layers: int, + n_expected_masks: int, + tmp_dir: str, +): + raster_size = 2048 + raster = create_base_raster(tmp_dir, raster_size, raster_type, checkboard_cells_per_side) + + op_tester = OpTester(CONFIG_PATH_AUTOSEG) + op_tester.update_parameters( + { + "points_per_side": points_per_side, + "spatial_overlap": spatial_overlap, + "n_crop_layers": n_crop_layers, + "pred_iou_thresh": pred_iou_thresh, + "stability_score_thresh": stability_score_thresh, + "band_names": BAND_NAMES[raster_type], + } + ) + output = op_tester.run(input_raster=raster) + + assert "segmented_chips" in output + + segmented_chips = cast(List[SamMaskRaster], output["segmented_chips"]) + step_size = 1024 * (1 - spatial_overlap) + n_expected_rasters = (1 + (raster_size - 1024) / step_size) ** 2 + assert len(segmented_chips) == n_expected_rasters, ( + "Unexpected number of output rasters. " + f"Got {len(segmented_chips)}, expected {n_expected_rasters}." + ) + + n_masks = 0 + mask_areas = [] + for chip in segmented_chips: + mask = cast(xr.Dataset, rio.open_rasterio(chip.assets[0].path_or_url)).values + mask_areas.extend(np.sum(mask, axis=(1, 2)).reshape(-1).tolist()) # type: ignore + n_masks += mask.shape[0] + + assert ( + n_masks == n_expected_masks + ), f"Unexpected number of output masks. Got {n_masks}, expected {n_expected_masks}." + + +@pytest.mark.parametrize( + "param_key, invalid_value", + [ + ("points_per_side", 0), + ("points_per_side", 1.5), + ("n_crop_layers", -1), + ("n_crop_layers", 1.5), + ("crop_overlap_ratio", -1), + ("crop_overlap_ratio", 1.5), + ("crop_n_points_downscale_factor", 0), + ("crop_n_points_downscale_factor", 1.5), + ("pred_iou_thresh", 0), + ("pred_iou_thresh", 1), + ("stability_score_thresh", 0), + ("stability_score_thresh", 1.5), + ("band_names", ["Cyan", "Magenta", "Yellow"]), + ("band_names", ["R", "G", "B", "N", "N2"]), + ("band_names", ["R", "G"]), + ("band_scaling", [1.0, 1.0]), + ("band_offset", [1.0, 1.0]), + ], +) +def test_invalid_autoseg_params( + param_key: str, + invalid_value: Union[int, float], + tmp_dir: str, +): + raster = create_base_raster(tmp_dir) + op_tester = OpTester(CONFIG_PATH_AUTOSEG) + op_tester.update_parameters(edit_autoseg_parameters(param_key, invalid_value)) + with pytest.raises(ValueError): + op_tester.run(input_raster=raster) diff --git a/ops/segment_anything_combine_masks/combine_sam_masks.py b/ops/segment_anything_combine_masks/combine_sam_masks.py new file mode 100644 index 00000000..e041023f --- /dev/null +++ b/ops/segment_anything_combine_masks/combine_sam_masks.py @@ -0,0 +1,161 @@ +import os +from tempfile import TemporaryDirectory +from typing import Dict, List, Tuple + +import numpy as np +import rasterio +import torch +from torchvision.ops.boxes import batched_nms, box_area + +from vibe_core.data import AssetVibe, BBox, CategoricalRaster, ChipWindow, SamMaskRaster, gen_guid + + +def touch_chip_boundaries(bbox: BBox, chip_window: ChipWindow) -> bool: + return ( + bbox[0] <= chip_window.col_offset + or bbox[1] <= chip_window.row_offset + or bbox[2] >= chip_window.col_offset + chip_window.width + or bbox[3] >= chip_window.row_offset + chip_window.height + ) + + +def is_contained_by_others(current_bbox: BBox, other_boxes: List[BBox], eps: int = 5) -> bool: + for bbox in other_boxes: + if ( + current_bbox[0] >= bbox[0] - eps + and current_bbox[1] >= bbox[1] - eps + and current_bbox[2] <= bbox[2] + eps + and current_bbox[3] <= bbox[3] + eps + ): + return True + return False + + +# - ☑️ Filter masks that touch crop boundaries, but do not touch chip boundaries +# - ❌ NMS of all masks within a crop. I don't think this makes much sense anymore +# - ☑️ NMS for all crops within a chip +# - ❓ Remove small disconnected regions and holdes in a mask, then NMS again +# - ☑️ NMS masks from different chips +def select_masks( + boxes: List[List[BBox]], + scores: List[List[float]], + chip_windows: List[ChipWindow], + chip_nms_thr: float, + mask_nms_thr: float, +) -> List[List[int]]: + # NMS within each chip (using SAM prediction scores) + kept_idx = [] + for chip_boxes, chip_scores in zip(boxes, scores): + keep_by_nms = batched_nms( + boxes=torch.from_numpy(np.array(chip_boxes)).to(torch.float32), + scores=torch.from_numpy(np.array(chip_scores)).to(torch.float32), + idxs=torch.zeros(len(chip_boxes)), + iou_threshold=chip_nms_thr, + ) + kept_idx.append(keep_by_nms.numpy().tolist()) + + # NMS across chips (prefering smaller masks) + idx_map = [ + (cidx, idx) for cidx, chip_idxs in enumerate(kept_idx) for idx in range(len(chip_idxs)) + ] + + kept_boxes = np.array( + [ + boxes[chip_idx][to_keep_idx] + for chip_idx in range(len(kept_idx)) + for to_keep_idx in kept_idx[chip_idx] + ] + ) + + # As in SAM, prefer smaller masks + area_scores = 1 / box_area(torch.from_numpy(kept_boxes)) + + keep_by_nms = batched_nms( + boxes=torch.from_numpy(kept_boxes), + scores=area_scores, + idxs=torch.zeros(kept_boxes.shape[0]), + iou_threshold=mask_nms_thr, + ) + + idx_map = [idx_map[idx] for idx in keep_by_nms.numpy().tolist()] + filtered_mask_idxs = [[] for _ in range(len(boxes))] + for cidx, idx in idx_map: + filtered_mask_idxs[cidx].append(kept_idx[cidx][idx]) + + # Removing masks that touch their chip boundary and are contained within other masks + mask_idx_to_keep = [[] for _ in range(len(boxes))] + for chip_idx, mask_idxs in enumerate(filtered_mask_idxs): + if mask_idxs: + other_boxes = [ + boxes[cidx][idx] + for cidx in range(len(boxes)) + for idx in filtered_mask_idxs[cidx] + if cidx != chip_idx + ] + for idx in mask_idxs: + if not ( + touch_chip_boundaries(boxes[chip_idx][idx], chip_windows[chip_idx]) + and is_contained_by_others(boxes[chip_idx][idx], other_boxes) + ): + mask_idx_to_keep[chip_idx].append(idx) + return mask_idx_to_keep + + +def merge_masks( + masks: List[SamMaskRaster], mask_idx_to_keep: List[List[int]], tmp_dir: str +) -> Tuple[AssetVibe, int]: + n_masks = sum([len(idxs) for idxs in mask_idx_to_keep]) + with rasterio.open(masks[0].assets[0].path_or_url) as src: + out_meta = src.meta + out_meta["count"] = n_masks + + out_path = os.path.join(tmp_dir, f"{gen_guid()}.tif") + band_idx_to_write = 1 + with rasterio.open(out_path, "w", **out_meta) as dst: + for raster, idxs in zip(masks, mask_idx_to_keep): + if idxs: + with rasterio.open(raster.assets[0].path_or_url) as src: + for i in idxs: + dst.write(src.read(i + 1), band_idx_to_write) + band_idx_to_write += 1 + + asset = AssetVibe(reference=out_path, type="image/tiff", id=gen_guid()) + return asset, n_masks + + +class CallbackBuilder: + def __init__(self, chip_nms_thr: float, mask_nms_thr: float): + self.tmp_dir = TemporaryDirectory() + + if chip_nms_thr <= 0 or chip_nms_thr >= 1: + raise ValueError(f"'chip_nms_thr' must be between 0 and 1. Got {chip_nms_thr}") + if mask_nms_thr <= 0 or mask_nms_thr >= 1: + raise ValueError(f"'mask_nms_thr' must be between 0 and 1. Got {mask_nms_thr}") + + self.chip_nms_thr = chip_nms_thr + self.mask_nms_thr = mask_nms_thr + + def __call__(self): + def callback(input_masks: List[SamMaskRaster]) -> Dict[str, CategoricalRaster]: + mask_scores = [m.mask_score for m in input_masks] + mask_bboxes = [m.mask_bbox for m in input_masks] + chip_windows = [m.chip_window for m in input_masks] + + mask_idx_to_keep = select_masks( + mask_bboxes, mask_scores, chip_windows, self.chip_nms_thr, self.mask_nms_thr + ) + + asset, n_masks = merge_masks(input_masks, mask_idx_to_keep, self.tmp_dir.name) + segmentation_mask = CategoricalRaster.clone_from( + input_masks[0], + id=gen_guid(), + assets=[asset], + bands={f"mask_{i}": i for i in range(n_masks)}, + categories=["background", "foreground"], + ) + return {"output_mask": segmentation_mask} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/segment_anything_combine_masks/combine_sam_masks.yaml b/ops/segment_anything_combine_masks/combine_sam_masks.yaml new file mode 100644 index 00000000..bd491aaf --- /dev/null +++ b/ops/segment_anything_combine_masks/combine_sam_masks.yaml @@ -0,0 +1,23 @@ +name: combine_sam_masks +inputs: + input_masks: List[SamMaskRaster] +output: + output_mask: CategoricalRaster +parameters: + chip_nms_thr: 0.7 + mask_nms_thr: 0.5 +entrypoint: + file: combine_sam_masks.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - chip_nms_thr + - mask_nms_thr +description: + short_description: + Process intermediary segmentation masks, filtering out duplicates and combining into final mask raster. + parameters: + chip_nms_thr: + The box IoU cutoff used by non-maximal suppression to filter duplicate masks within a chip. + mask_nms_thr: + The box IoU cutoff used by non-maximal suppression to filter duplicate masks between different chips. \ No newline at end of file diff --git a/ops/segment_anything_combine_masks/test_combine_sam_masks.py b/ops/segment_anything_combine_masks/test_combine_sam_masks.py new file mode 100644 index 00000000..a3d26354 --- /dev/null +++ b/ops/segment_anything_combine_masks/test_combine_sam_masks.py @@ -0,0 +1,135 @@ +import os +from datetime import datetime +from tempfile import TemporaryDirectory +from typing import List, Tuple, Union, cast + +import numpy as np +import pytest +import xarray as xr +from shapely import geometry as shpg + +from vibe_core.data.core_types import ChipWindow, gen_guid +from vibe_core.data.rasters import CategoricalRaster, SamMaskRaster +from vibe_dev.testing.op_tester import OpTester +from vibe_lib.raster import save_raster_to_asset + +CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "combine_sam_masks.yaml") + +DEFAULT_BBOXES = [ + (0, 0, 1024, 1024), + (1024, 0, 2048, 1024), + (0, 1024, 1024, 2048), + (1024, 1024, 2048, 2048), +] + + +def create_segmented_raster( + tmp_dir_name: str, + mask_bbox: Tuple[int, int, int, int], + mask_score: float = 1.0, + raster_size: int = 2048, +) -> SamMaskRaster: + now = datetime.now() + geom = shpg.mapping(shpg.box(0, 0, raster_size, raster_size)) + + raster_dim = (1, raster_size, raster_size) + + fake_data = np.zeros(raster_dim, dtype=np.uint8) + fake_data[0, mask_bbox[1] : mask_bbox[3], mask_bbox[0] : mask_bbox[2]] = 1 + + fake_da = xr.DataArray( + fake_data, + coords={ + "bands": np.arange(raster_dim[0]), + "x": np.linspace(0, 1, raster_dim[1]), + "y": np.linspace(0, 1, raster_dim[2]), + }, + dims=["bands", "y", "x"], + ) + fake_da.rio.write_crs("epsg:4326", inplace=True) + + asset = save_raster_to_asset(fake_da, tmp_dir_name) + + return SamMaskRaster( + id=gen_guid(), + time_range=(now, now), + geometry=geom, + assets=[asset], + bands={"mask": 0}, + categories=["background", "foreground"], + mask_score=[mask_score], + mask_bbox=[tuple([float(c) for c in mask_bbox])], # type: ignore + chip_window=ChipWindow(0.0, 0.0, float(raster_size), float(raster_size)), + ) + + +@pytest.fixture +def tmp_dir(): + _tmp_dir = TemporaryDirectory() + yield _tmp_dir.name + _tmp_dir.cleanup() + + +@pytest.mark.parametrize( + "param_key, invalid_value", + [(p, v) for p in ["chip_nms_thr", "mask_nms_thr"] for v in [-1, 0, 1, 1.5]], +) +def test_invalid_params( + param_key: str, + invalid_value: Union[int, float], + tmp_dir: str, +): + raster = create_segmented_raster(tmp_dir, mask_bbox=(0, 0, 1024, 1024)) + op_tester = OpTester(CONFIG_PATH) + op_tester.update_parameters({param_key: invalid_value}) + with pytest.raises(ValueError): + op_tester.run(input_masks=[raster]) + + +# Points expressed as fraction of the raster size for easier conversion to pixel coordinates +@pytest.mark.parametrize( + "bbox_list, chip_nms_thr, mask_nms_thr, n_expected_masks", + [ + ( + DEFAULT_BBOXES, + 0.7, + 0.5, + 4, # No overlapping masks, so expect the same number + ), + ( + DEFAULT_BBOXES + [(10, 10, 1014, 1014)], + 0.7, + 0.5, + 4, # One mask is completely contained in another + ), + ( # Overlapping with top two masks, but with an area slightly larger than a chip + DEFAULT_BBOXES + [(500, 0, 1550, 1024)], + 0.7, + 0.5, # threshold of 0.5 IoU won't suppress the new box + 5, # Overlapping with two masks, but IoU won't pass the threshold so we will keep it + ), + ( # Overlapping with top two masks, but with an area slightly larger than a chip + DEFAULT_BBOXES + [(500, 0, 1550, 1024)], + 0.7, + 0.3, # lowering the threshold so it will be suppressed (we prefer smaller masks) + 4, + ), + ], +) +def test_segmentation_mask( + bbox_list: List[Tuple[int, int, int, int]], + chip_nms_thr: float, + mask_nms_thr: float, + n_expected_masks: int, + tmp_dir: str, +): + input_masks = [create_segmented_raster(tmp_dir, mask_bbox=box) for box in bbox_list] + + op_tester = OpTester(CONFIG_PATH) + op_tester.update_parameters({"chip_nms_thr": chip_nms_thr, "mask_nms_thr": mask_nms_thr}) + output = op_tester.run(input_masks=input_masks) # type: ignore + + assert "output_mask" in output + + mask_raster = cast(CategoricalRaster, output["output_mask"]) + assert len(mask_raster.bands) == n_expected_masks diff --git a/ops/segment_driveway/segment_driveway.py b/ops/segment_driveway/segment_driveway.py new file mode 100644 index 00000000..0444b4f0 --- /dev/null +++ b/ops/segment_driveway/segment_driveway.py @@ -0,0 +1,142 @@ +import os +from tempfile import TemporaryDirectory +from typing import Any, Callable, Dict, Tuple + +import numpy as np +import onnxruntime as ort +import rasterio +import torch +import torch.nn.functional as F +from numpy.typing import NDArray +from rasterio.enums import Resampling +from rasterio.windows import Window +from shapely import geometry as shpg + +from vibe_core.data import AssetVibe, gen_guid +from vibe_core.data.rasters import CategoricalRaster, Raster +from vibe_lib.raster import resample_raster +from vibe_lib.spaceeye.chip import ChipDataset, Dims, get_loader, predict_chips + + +def reader( + raster: Raster, window: Window, out_shape: Tuple[int, int] +) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: + with rasterio.open(raster.raster_asset.url) as src: + x = src.read(window=window, out_shape=out_shape, indexes=[4, 1, 2]) + mask = x == src.nodata + x[mask] = 0 + return x, mask + + +def contrast_enhance(img: NDArray[Any], low: float = 2, high: float = 98) -> NDArray[np.float32]: + img_min, img_max = np.nanpercentile(img, (low, high), axis=(-1, -2), keepdims=True) + return np.clip((img.astype(np.float32) - img_min) / (img_max - img_min), 0, 1) + + +def pre_process(size: Tuple[int, int]) -> Callable[[NDArray[Any], NDArray[Any]], NDArray[Any]]: + """ + Preprocess data by normalizing and picking a few bands + """ + + def fn(chip_data: NDArray[Any], _) -> NDArray[np.float32]: + x = F.interpolate(torch.from_numpy(chip_data), size=size, mode="bilinear").numpy() + x = contrast_enhance(x).astype(np.float32) + return x + + return fn + + +def post_process( + size: Tuple[int, int], +) -> Callable[[NDArray[Any], NDArray[Any], NDArray[Any]], NDArray[Any]]: + """ + Get most probable class + """ + + def fn(_, __: NDArray[Any], model_out: NDArray[Any]) -> NDArray[Any]: + x = F.interpolate(torch.from_numpy(model_out), size=size, mode="bilinear").numpy() + return x.argmax(axis=1, keepdims=True).astype(np.uint8) + + return fn + + +class CallbackBuilder: + def __init__( + self, + downsampling: int, + root_dir: str, + model_path: str, + window_size: int, + model_size: int, + overlap: float, + batch_size: int, + num_workers: int, + ): + self.downsampling = downsampling + self.root_dir = root_dir + self.model_path = model_path + self.window_size = window_size + self.model_size = model_size + self.overlap = overlap + self.batch_size = batch_size + self.num_workers = num_workers + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def callback( + input_raster: Raster, + ) -> Dict[str, CategoricalRaster]: + if self.downsampling < 1: + raise ValueError( + f"Downsampling must be equal or larger than 1, found {self.downsampling}" + ) + model_path = os.path.join(self.root_dir, self.model_path) + model = ort.InferenceSession(model_path) + chip_size = self.window_size + step_size = int(chip_size * (1 - self.overlap)) + dataset = ChipDataset( + [input_raster], + chip_size=Dims(chip_size, chip_size, 1), + step_size=Dims(step_size, step_size, 1), + downsampling=self.downsampling, + nodata=255, + geometry_or_chunk=shpg.shape(input_raster.geometry), + reader=reader, + dtype="uint8", + ) + + dataloader = get_loader(dataset, self.batch_size, self.num_workers) + pred_filepaths = predict_chips( + model, + dataloader, + self.tmp_dir.name, + skip_nodata=False, + pre_process=pre_process((self.model_size, self.model_size)), + post_process=post_process((self.window_size, self.window_size)), + ) + assert ( + len(pred_filepaths) == 1 + ), f"Expected one prediction file, found: {len(pred_filepaths)}" + mask_filepath = resample_raster( + pred_filepaths[0], + self.tmp_dir.name, + dataset.width, + dataset.height, + dataset.transform, + Resampling.nearest, + nodata=255, + ) + asset = AssetVibe(reference=mask_filepath, type="image/tiff", id=gen_guid()) + out = CategoricalRaster.clone_from( + input_raster, + id=gen_guid(), + assets=[asset], + categories=["Background", "Driveway", "Unknown"], + ) + + return {"segmentation_raster": out} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/segment_driveway/segment_driveway.yaml b/ops/segment_driveway/segment_driveway.yaml new file mode 100644 index 00000000..4c44d2e2 --- /dev/null +++ b/ops/segment_driveway/segment_driveway.yaml @@ -0,0 +1,25 @@ +name: segment_driveway +inputs: + input_raster: Raster +output: + segmentation_raster: CategoricalRaster +parameters: + downsampling: 1 + root_dir: /opt/terravibes/ops/resources/driveway_models + model_path: driveway.onnx + window_size: 128 + model_size: 512 + overlap: .25 + batch_size: 1 + num_workers: 0 +entrypoint: + file: segment_driveway.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - model_path + - downsampling + - window_size + - overlap +description: + short_description: Segments the front of houses in the input raster using a machine learning model. \ No newline at end of file diff --git a/ops/select_necessary_coverage_items/filter_items.py b/ops/select_necessary_coverage_items/filter_items.py new file mode 100644 index 00000000..e11e5541 --- /dev/null +++ b/ops/select_necessary_coverage_items/filter_items.py @@ -0,0 +1,100 @@ +""" +Selects a (locally?) minimum subset of items that covers the desired input geometry +(if suchs subset exists) for each timestamp. +Discards items for a timestamp if the geometry cannot be covered at that time. +Assumes items are sparse in time (time range is one moment in time) +""" + +from collections import defaultdict +from typing import Dict, List, Optional, TypeVar + +from shapely import geometry as shpg +from shapely import ops as shpo +from shapely.geometry.base import BaseGeometry + +from vibe_core.data import DataVibe +from vibe_lib.geometry import is_approx_within, norm_intersection + +T = TypeVar("T", bound=DataVibe, covariant=True) + + +def can_cover(geom: BaseGeometry, items: List[T], threshold: float) -> bool: + item_geoms = [shpg.shape(p.geometry) for p in items] + return is_approx_within(geom, shpo.unary_union(item_geoms), threshold) + + +def intersect_area(x: DataVibe, geom: BaseGeometry) -> float: + return shpg.shape(x.geometry).intersection(geom).area + + +def filter_necessary_items( + geom: BaseGeometry, items: List[T], threshold: float, min_area: Optional[float] = None +) -> List[T]: + """ + Greedily filter the items so that only a subset necessary to cover all + the geometry's spatial extent is returned + """ + if min_area is None: + min_area = (1 - threshold) * geom.area + if not items: # No more items left, can't cover the geometry + return [] + sorted_items = sorted(items, key=lambda x: intersect_area(x, geom), reverse=True) + # Get item with largest intersection + item = sorted_items[0] + item_geom = shpg.shape(item.geometry) + if is_approx_within(geom, item_geom, threshold): + return [item] + if norm_intersection(geom, item_geom) < (1 - threshold): + # Can't make more progress, so we give up + return [] + remaining_geom = geom - item_geom + if remaining_geom.area < min_area: + # We covered enough of the area, so we stop now + return [item] + return [item] + filter_necessary_items(remaining_geom, sorted_items[1:], threshold, min_area) + + +def callback_builder( + min_cover: float, within_threshold: float, max_items: Optional[int], group_attribute: str +): + if not 0 < min_cover < 1: + raise ValueError(f"{min_cover=} must be between 0 and 1") + if not 0 < within_threshold < 1: + raise ValueError(f"{within_threshold=} must be between 0 and 1") + if min_cover > within_threshold: + raise ValueError(f"{min_cover=} cannot be larger than {within_threshold}") + min_cover = min(min_cover, within_threshold) + + def filter_items(bounds_item: DataVibe, items: List[T]) -> Dict[str, T]: + input_geometry = shpg.shape(bounds_item.geometry) + item_groups = defaultdict(list) + for p in items: + item_groups[getattr(p, group_attribute)].append(p) + item_groups = [ + sorted(item_group, key=lambda x: intersect_area(x, input_geometry), reverse=True)[ + :max_items + ] + for item_group in item_groups.values() + ] + filtered_items = { + item.id: item + for item_group in item_groups + if can_cover( + input_geometry, + item_group, + min_cover, + ) + for item in filter_necessary_items(input_geometry, item_group, within_threshold) + } + if not filtered_items: + raise RuntimeError(f"No product group can cover input geometry {bounds_item.geometry}") + return filtered_items + + def callback(bounds_items: List[DataVibe], items: List[T]) -> Dict[str, List[T]]: + filtered_items = {} + for bounds_item in bounds_items: + filtered_items.update(filter_items(bounds_item, items)) + + return {"filtered_items": [v for v in filtered_items.values()]} + + return callback diff --git a/ops/select_necessary_coverage_items/select_necessary_coverage_items.yaml b/ops/select_necessary_coverage_items/select_necessary_coverage_items.yaml new file mode 100644 index 00000000..4f1fc03b --- /dev/null +++ b/ops/select_necessary_coverage_items/select_necessary_coverage_items.yaml @@ -0,0 +1,45 @@ +name: select_necessary_coverage_items +inputs: + bounds_items: List[DataVibe] + items: List[DataVibe] +output: + # @INHERIT will make the corresponding port to inherit the type of the port which feeds the + # input in parentheses. + filtered_items: "@INHERIT(items)" +entrypoint: + file: filter_items.py + callback_builder: callback_builder +parameters: + # Minimum amount of overlap to consider a group, should not be higher than within_threshold + # Between 0 and 1 + min_cover: .99 + # Intersection threshold to consider a geometry is within another + # Between 0 and 1 + within_threshold: .99 + max_items: null # null to consider all items + # Attribute used to group items + group_attribute: "time_range" +dependencies: + parameters: + - min_cover + - within_threshold + - max_items + - group_attribute +description: + short_description: Select items necessary to spatially cover the geometry of the bounds items. + long_description: + The op will be group items according to the chosen attribute and then select the necessary items + from each group, provided the group is able to cover the bounds. Items are selected + independently for each of the bounds items and then deduplicated in the end. + inputs: + bounds_items: Items whose geometries should be covered. + items: Items that will be selected. + output: + filtered_items: Items necessary to cover the geometries. + parameters: + min_cover: Minimum amount of cover required for a group to be used. + within_threshold: + Threshold of relative interesection are for a geoemtry to be contained by another. + max_items: + Maximum number of items per group that can be used to cover a geometry. `None` for no limit. + group_attribute: Which attribute should be used to group the items. diff --git a/ops/select_necessary_coverage_items/test_filter.py b/ops/select_necessary_coverage_items/test_filter.py new file mode 100644 index 00000000..07bf5c2d --- /dev/null +++ b/ops/select_necessary_coverage_items/test_filter.py @@ -0,0 +1,49 @@ +import os +from datetime import datetime, timezone +from typing import List, cast + +from shapely import affinity as shpa +from shapely import geometry as shpg + +from vibe_core.data import DataVibe +from vibe_core.data.core_types import BaseVibe +from vibe_dev.testing.op_tester import OpTester + +CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "select_necessary_coverage_items.yaml" +) + + +def test_op(): + bounds = shpg.Point(10, 10).buffer(5) + bounds = [bounds, shpa.translate(bounds, -6, 6)] + geom = shpg.Point(10, 10).buffer(10) + input_geoms = [ + shpa.translate(geom, -7, 0), + shpa.translate(geom, 8, 0), + shpa.translate(geom, 0, 8), + shpa.translate(geom, 5, 5), + ] + + start_date = datetime(year=2021, month=7, day=10, tzinfo=timezone.utc) + end_date = datetime(year=2021, month=7, day=28, tzinfo=timezone.utc) + bounds_vibe = [DataVibe("bounds", (start_date, end_date), shpg.mapping(b), []) for b in bounds] + input_vibe = [ + DataVibe(f"input{i}", (start_date, end_date), shpg.mapping(g), []) + for i, g in enumerate(input_geoms) + ] + inputs = [bounds_vibe[:1], bounds_vibe[1:2], bounds_vibe] + expected_out = [input_vibe[:2], [input_vibe[0], input_vibe[2]], input_vibe[:3]] + + for inp, out in zip(inputs, expected_out): + output_vibe = OpTester(CONFIG_PATH).run( + bounds_items=cast(BaseVibe, inp), items=cast(List[BaseVibe], input_vibe) + ) + + # Get op result + output_name = "filtered_items" + assert output_name in output_vibe + items = output_vibe[output_name] + assert isinstance(items, list) + assert len(items) == len(out) + assert items == out diff --git a/ops/select_sequence/select_sequence.py b/ops/select_sequence/select_sequence.py new file mode 100644 index 00000000..3517f2b6 --- /dev/null +++ b/ops/select_sequence/select_sequence.py @@ -0,0 +1,54 @@ +from functools import partial +from typing import Dict, List, Union + +import numpy as np +from shapely.geometry import mapping + +from vibe_core.data import Raster, RasterSequence +from vibe_core.data.core_types import gen_guid + + +def callback( + rasters: Union[RasterSequence, List[Raster]], num: int, criterion: str +) -> Dict[str, RasterSequence]: + if isinstance(rasters, RasterSequence): + rasters = [ + Raster.clone_from( + rasters, + gen_guid(), + assets=[i], + geometry=mapping(rasters.asset_geometry[i.id]), + time_range=rasters.asset_time_range[i.id], + ) + for i in rasters.get_ordered_assets() + ] + + if len(rasters) < num: + raise ValueError( + f"The raster sequence has fewer entries ({len(rasters)}) than requested ({num})" + ) + + if criterion == "first": + idxs = np.arange(num) + elif criterion == "last": + idxs = np.arange(len(rasters) - num, len(rasters)) + elif criterion == "regular": + idxs = np.round(np.linspace(0, len(rasters) - 1, num)).astype(int) + else: + raise ValueError( + f"Invalid selection criterion {criterion}. " + f"Valid criteria are 'first', 'last' and 'regular'" + ) + + selected_rasters = [rasters[i] for i in idxs] + + res = RasterSequence.clone_from(rasters[0], f"select_{criterion}_{gen_guid()}", []) + + for r in selected_rasters: + res.add_item(r) + + return {"sequence": res} + + +def callback_builder(num: int, criterion: str): + return partial(callback, num=num, criterion=criterion) diff --git a/ops/select_sequence/select_sequence.yaml b/ops/select_sequence/select_sequence.yaml new file mode 100644 index 00000000..7743af66 --- /dev/null +++ b/ops/select_sequence/select_sequence.yaml @@ -0,0 +1,21 @@ +# This op selects "num" entries from a Raster sequence so that the output sequence has a fixed length. +# It can be used to guarantee that the number of elements in a sequence down in the workflow is +# honored +name: select_sequence +inputs: + rasters: RasterSequence +output: + sequence: RasterSequence +parameters: + num: 2 + # criterion is used to select which rasters in the input will be used in the output + # it can be "regular" for regularly spaced selection, "first" to select the "num" first + # rasters, or "last" to select the "num" last rasters + criterion: first +entrypoint: + file: select_sequence.py + callback_builder: callback_builder +dependecies: + parameters: + - num + - criterion \ No newline at end of file diff --git a/ops/select_sequence/select_sequence_from_list.yaml b/ops/select_sequence/select_sequence_from_list.yaml new file mode 100644 index 00000000..a11f1ed9 --- /dev/null +++ b/ops/select_sequence/select_sequence_from_list.yaml @@ -0,0 +1,26 @@ +# This op selects "num" entries from a Raster list so that the output sequence has a fixed length. +# It can be used to guarantee that the number of elements in a sequence down in the workflow is +# honored +name: select_sequence_from_list +inputs: + rasters: List[Raster] +output: + sequence: RasterSequence +parameters: + num: 37 + criterion: regular +entrypoint: + file: select_sequence.py + callback_builder: callback_builder +dependecies: + parameters: + - num + - criterion +description: + short_description: Selects "num" entries from a Raster list so that the output sequence has a fixed length. + parameters: + num: Number of rasters to select among sequence. + criterion: + Used to select which rasters in the input will be used in the output. + It can be "regular" for regularly spaced selection, "first" to select + the "num" first, rasters, or "last" to select the "num" last rasters. \ No newline at end of file diff --git a/ops/split_sequence/split_sequence.py b/ops/split_sequence/split_sequence.py new file mode 100644 index 00000000..df38ec73 --- /dev/null +++ b/ops/split_sequence/split_sequence.py @@ -0,0 +1,25 @@ +from typing import Dict, List + +from vibe_core.data.core_types import gen_guid +from vibe_core.data.sentinel import ListTileData, Sequence2Tile, TileSequenceData + + +def callback_builder(): + """Op that splits a list of multiple TileSequence back to a list of Rasters""" + + def split_sequences( + sequences: List[TileSequenceData], + ) -> Dict[str, ListTileData]: + rasters = [ + Sequence2Tile[type(sequence)].clone_from( + sequence, + id=gen_guid(), + assets=[asset], + time_range=sequence.asset_time_range[asset.id], + ) + for sequence in sequences + for asset in sequence.get_ordered_assets() + ] + return {"rasters": rasters} + + return split_sequences diff --git a/ops/split_sequence/split_spaceeye_sequence.yaml b/ops/split_sequence/split_spaceeye_sequence.yaml new file mode 100644 index 00000000..37f1af9f --- /dev/null +++ b/ops/split_sequence/split_spaceeye_sequence.yaml @@ -0,0 +1,11 @@ +name: split_spaceeye_sequence +inputs: + sequences: List[SpaceEyeRasterSequence] +output: + rasters: List[SpaceEyeRaster] +parameters: +entrypoint: + file: split_sequence.py + callback_builder: callback_builder +description: + short_description: Splits a list of multiple TileSequence back to a list of Rasters. \ No newline at end of file diff --git a/ops/split_sequence/test_split_sequence.py b/ops/split_sequence/test_split_sequence.py new file mode 100644 index 00000000..7828d73a --- /dev/null +++ b/ops/split_sequence/test_split_sequence.py @@ -0,0 +1,36 @@ +import os +from datetime import datetime, timezone +from typing import Any, Dict + +from shapely import geometry as shpg + +from vibe_core.data.sentinel import SpaceEyeRasterSequence +from vibe_dev.testing.op_tester import OpTester + +CONFIG_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "split_spaceeye_sequence.yaml" +) + + +def test_split_empty_sequence(): + polygon: Dict[str, Any] = shpg.mapping(shpg.box(0, 0, 1, 1)) # type: ignore + start_date = datetime(year=2021, month=7, day=10, tzinfo=timezone.utc) + end_date = datetime(year=2021, month=7, day=28, tzinfo=timezone.utc) + seq = SpaceEyeRasterSequence( + id="s1", + time_range=(start_date, end_date), + geometry=polygon, + assets=[], + product_name="", + orbit_number=0, + relative_orbit_number=0, + orbit_direction="", + platform="", + extra_info={}, + tile_id="", + processing_level="", + bands={}, + write_time_range=(start_date, end_date), + ) + out = OpTester(CONFIG_PATH).run(sequences=[seq]) + assert not out["rasters"] diff --git a/ops/stack_landsat/stack_landsat.py b/ops/stack_landsat/stack_landsat.py new file mode 100644 index 00000000..fdbf0981 --- /dev/null +++ b/ops/stack_landsat/stack_landsat.py @@ -0,0 +1,86 @@ +from tempfile import TemporaryDirectory +from typing import Dict, Tuple + +import numpy as np +import rioxarray as rio +import xarray as xr + +from vibe_core.data import AssetVibe, LandsatProduct, gen_hash_id +from vibe_core.data.rasters import LandsatRaster +from vibe_lib.raster import save_raster_to_asset + +LANDSAT_SPYNDEX: Dict[str, str] = { + "blue": "B", + "green": "G", + "red": "R", + "nir08": "N", + "swir16": "S1", + "swir22": "S2", +} + + +def stack_landsat( + input: LandsatProduct, + tmp_folder: str, + qa_mask: int, +) -> Tuple[AssetVibe, Dict[str, int]]: + bands2stack = list(input.asset_map.keys()) + band_filepaths = [input.get_downloaded_band(band).path_or_url for band in bands2stack] + + band_idx = {k: v for v, k in enumerate(bands2stack)} + band_idx["nir"] = band_idx["nir08"] + # Add band aliases for spyndex + for k in LANDSAT_SPYNDEX.keys(): + band_idx[LANDSAT_SPYNDEX[k]] = band_idx[k] + + da = ( + xr.open_mfdataset(band_filepaths, engine="rasterio", combine="nested", concat_dim="bands") + .to_array() + .squeeze() + ) + + if qa_mask: + try: + qa_pixel = ( + rio.open_rasterio(input.get_downloaded_band("qa_pixel").path_or_url) + .squeeze() # type: ignore + .values.astype(int) + ) + mask = np.bitwise_and(qa_pixel, qa_mask) + del qa_pixel + da = da.where(mask) + except Exception as e: + raise ValueError(f"qa_pixel not found {e}") + + asset = save_raster_to_asset(da, tmp_folder) + return asset, band_idx + + +class CallbackBuilder: + def __init__(self, qa_mask_value: int): + self.tmp_dir = TemporaryDirectory() + self.qa_mask = qa_mask_value + + def __call__(self): + def process_landsat( + landsat_product: LandsatProduct, + ) -> Dict[str, LandsatRaster]: + img_asset, band_idx = stack_landsat(landsat_product, self.tmp_dir.name, self.qa_mask) + + bands = LandsatRaster.clone_from( + landsat_product, + id=gen_hash_id( + f"{landsat_product.tile_id}_stacked_landsat", + landsat_product.geometry, + landsat_product.time_range, + ), + assets=[img_asset], + bands=band_idx, + ) + + return {"landsat_raster": bands} + + return process_landsat + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/stack_landsat/stack_landsat.yaml b/ops/stack_landsat/stack_landsat.yaml new file mode 100644 index 00000000..dfeed763 --- /dev/null +++ b/ops/stack_landsat/stack_landsat.yaml @@ -0,0 +1,15 @@ +name: stack_landsat_bands +inputs: + landsat_product: LandsatProduct +output: + landsat_raster: LandsatRaster +parameters: + qa_mask_value: 64 +dependencies: + parameters: + - qa_mask_value +entrypoint: + file: stack_landsat.py + callback_builder: CallbackBuilder +description: + short_description: Stacks downloaded bands into a single raster. \ No newline at end of file diff --git a/ops/stack_sentinel2_bands/stack_sentinel2_bands.py b/ops/stack_sentinel2_bands/stack_sentinel2_bands.py new file mode 100644 index 00000000..0ca054bb --- /dev/null +++ b/ops/stack_sentinel2_bands/stack_sentinel2_bands.py @@ -0,0 +1,169 @@ +import logging +import mimetypes +import os +from tempfile import TemporaryDirectory +from typing import Dict, List, Sequence, Tuple, Union + +import geopandas as gpd +import numpy as np +from rasterio.features import rasterize +from rasterio.vrt import WarpedVRT +from rasterio.warp import Resampling + +from vibe_core.data import ( + AssetVibe, + DownloadedSentinel2Product, + Sentinel2CloudMask, + Sentinel2Raster, + gen_guid, +) +from vibe_lib.raster import INT_COMPRESSION_KWARGS, open_raster_from_ref + +BAND_ORDER: List[str] = [ + "B01", + "B02", + "B03", + "B04", + "B05", + "B06", + "B07", + "B08", + "B8A", + "B09", + "B10", + "B11", + "B12", +] + +CLOUD_CATEGORIES = ["NO-CLOUD", "OPAQUE", "CIRRUS", "OTHER"] +LOGGER = logging.getLogger(__name__) + + +def save_stacked_raster(band_filepaths: Sequence[str], ref_filepath: str, out_path: str) -> None: + """ + Save raster by stacking all bands. + Reprojects all bands to match the reference band file provided + """ + with open_raster_from_ref(ref_filepath) as src: + meta = src.meta + out_meta = meta.copy() + out_meta.update( + { + "count": len(band_filepaths), + "driver": "GTiff", + "nodata": 0, + **INT_COMPRESSION_KWARGS, + } + ) + + vrt_options = { + "resampling": Resampling.bilinear, + "crs": meta["crs"], + "transform": meta["transform"], + "height": meta["height"], + "width": meta["width"], + } + + with open_raster_from_ref(out_path, "w", **out_meta) as dst: + for i, path in enumerate(band_filepaths): + with open_raster_from_ref(path) as src: + with WarpedVRT(src, **vrt_options) as vrt: + data = vrt.read(1) + dst.write(data, i + 1) + + +def rasterize_clouds(item: DownloadedSentinel2Product, ref_file: str, out_path: str) -> None: + """ + Rasterize cloud shapes and save compressed tiff file. + """ + with open_raster_from_ref(ref_file) as src: + meta = src.meta + meta.update({"nodata": 100, "driver": "GTiff", "dtype": "uint8", **INT_COMPRESSION_KWARGS}) + out = np.zeros((meta["height"], meta["width"])) + try: + gml_path = item.get_downloaded_cloudmask().path_or_url + df = gpd.read_file(gml_path, WRITE_GFS="NO") + cloud_map = { + "OPAQUE": CLOUD_CATEGORIES.index("OPAQUE"), + "CIRRUS": CLOUD_CATEGORIES.index("CIRRUS"), + } + values = ( + df["maskType"].map(cloud_map).fillna(CLOUD_CATEGORIES.index("OTHER")) # type: ignore + ) + rasterize( + ((g, v) for g, v in zip(df["geometry"], values)), # type: ignore + out=out, + transform=meta["transform"], + ) + except ValueError: + # Empty file means no clouds + LOGGER.debug( + "ValueError when opening cloud GML file. Assuming there are no clouds and ignoring.", + exc_info=True, + ) + pass + except KeyError: + LOGGER.warning(f"No cloudmask available on downloaded product {item.product_name}") + with open_raster_from_ref(out_path, "w", **meta) as dst: + dst.write(out, 1) + + +def process_s2( + item: DownloadedSentinel2Product, output_file_name: str, tmp_folder: str +) -> Tuple[str, str, List[str]]: + output_img_path = os.path.join(tmp_folder, output_file_name) + output_cloud_path = os.path.join(tmp_folder, "cloudmask.tif") + + # Make sure bands are in order + valid_bands = [b for b in BAND_ORDER if b in item.asset_map] + band_filepaths = [item.get_downloaded_band(b).path_or_url for b in valid_bands] + ref_filepath = band_filepaths[BAND_ORDER.index("B02")] + save_stacked_raster(band_filepaths, ref_filepath, output_img_path) + + # Generate cloud mask + rasterize_clouds(item, ref_filepath, output_cloud_path) + + return output_img_path, output_cloud_path, valid_bands + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def process_sentinel_2( + input_item: DownloadedSentinel2Product, + ) -> Dict[str, Union[Sentinel2Raster, Sentinel2CloudMask]]: + ref_name: str = input_item.product_name + output_file_name = ref_name + ".tif" + tmp_dir = os.path.join(self.tmp_dir.name, ref_name) + os.makedirs(tmp_dir) + + img, cloud, valid_bands = process_s2(input_item, output_file_name, tmp_dir) + + img_asset = AssetVibe(reference=img, type=mimetypes.types_map[".tif"], id=gen_guid()) + cloud_asset = AssetVibe( + reference=cloud, type=mimetypes.types_map[".tif"], id=gen_guid() + ) + + bands = Sentinel2Raster.clone_from( + input_item, + bands={name: idx for idx, name in enumerate(valid_bands)}, + id=ref_name, + assets=[img_asset], + ) + + cloud = Sentinel2CloudMask.clone_from( + input_item, + bands={"cloud": 0}, + categories=CLOUD_CATEGORIES, + id=ref_name, + assets=[cloud_asset], + ) + + return {"sentinel2_raster": bands, "sentinel2_cloud_mask": cloud} + + return process_sentinel_2 + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/stack_sentinel2_bands/stack_sentinel2_bands.yaml b/ops/stack_sentinel2_bands/stack_sentinel2_bands.yaml new file mode 100644 index 00000000..44827892 --- /dev/null +++ b/ops/stack_sentinel2_bands/stack_sentinel2_bands.yaml @@ -0,0 +1,14 @@ +name: stack_sentinel2_bands +inputs: + input_item: DownloadedSentinel2Product +output: + sentinel2_raster: Sentinel2Raster + sentinel2_cloud_mask: Sentinel2CloudMask +parameters: +entrypoint: + file: stack_sentinel2_bands.py + callback_builder: CallbackBuilder +description: + short_description: + Creates a raster with bands stacked in the correct order and + a cloud mask raster with therasterized cloud shapes. \ No newline at end of file diff --git a/ops/summarize_raster/raster_summary.py b/ops/summarize_raster/raster_summary.py new file mode 100644 index 00000000..c029614b --- /dev/null +++ b/ops/summarize_raster/raster_summary.py @@ -0,0 +1,66 @@ +import os +from tempfile import TemporaryDirectory +from typing import Any, Dict, Optional + +import pandas as pd +from shapely import geometry as shpg + +from vibe_core.data import DataSummaryStatistics, DataVibe, Raster, gen_guid +from vibe_core.data.core_types import AssetVibe +from vibe_lib.raster import load_raster_from_url + + +def summarize_raster( + raster: Raster, mask: Optional[Raster], geometry: Dict[str, Any] +) -> Dict[str, float]: + geom = shpg.shape(geometry).intersection(shpg.shape(raster.geometry)) + data_ar = load_raster_from_url(raster.raster_asset.url, geometry=geom, geometry_crs="epsg:4326") + data_ma = data_ar.to_masked_array() + if mask is not None: + mask_ma = load_raster_from_url( + mask.raster_asset.url, + crs=data_ar.rio.crs, + geometry=geom, + geometry_crs="epsg:4326", + ).to_masked_array() + # Update mask + data_ma.mask = data_ma.mask | (mask_ma.data > 0 & ~mask_ma.mask) + masked_ratio = mask_ma.mean() + else: + masked_ratio = 0.0 + return { + "mean": data_ma.mean(), + "std": data_ma.std(), + "min": data_ma.min(), + "max": data_ma.max(), + "masked_ratio": masked_ratio, + } + + +class CallbackBuilder: + def __init__(self): + self.tmp_dir = TemporaryDirectory() + + def __call__(self): + def callback( + raster: Raster, input_geometry: DataVibe, mask: Optional[Raster] = None + ) -> Dict[str, DataSummaryStatistics]: + geom = input_geometry.geometry + stats = summarize_raster(raster, mask, geom) + guid = gen_guid() + filepath = os.path.join(self.tmp_dir.name, f"{guid}.csv") + pd.DataFrame(stats, index=pd.Index([raster.time_range[0]], name="date")).to_csv( + filepath + ) + summary = DataSummaryStatistics.clone_from( + raster, + geometry=geom, + id=gen_guid(), + assets=[AssetVibe(reference=filepath, type="text/csv", id=guid)], + ) + return {"summary": summary} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/summarize_raster/summarize_masked_raster.yaml b/ops/summarize_raster/summarize_masked_raster.yaml new file mode 100644 index 00000000..eec19835 --- /dev/null +++ b/ops/summarize_raster/summarize_masked_raster.yaml @@ -0,0 +1,15 @@ +name: summarize_masked_raster +inputs: + raster: Raster + mask: Raster + input_geometry: DataVibe +output: + summary: DataSummaryStatistics +parameters: +entrypoint: + file: raster_summary.py + callback_builder: CallbackBuilder +description: + short_description: + Computes the mean, standard deviation, maximum, and minimum values + across non-masked regions of the raster. \ No newline at end of file diff --git a/ops/summarize_raster/summarize_raster.yaml b/ops/summarize_raster/summarize_raster.yaml new file mode 100644 index 00000000..be220e94 --- /dev/null +++ b/ops/summarize_raster/summarize_raster.yaml @@ -0,0 +1,13 @@ +name: summarize_raster +inputs: + raster: Raster + input_geometry: DataVibe +output: + summary: DataSummaryStatistics +parameters: +entrypoint: + file: raster_summary.py + callback_builder: CallbackBuilder +description: + short_description: + Computes the mean, standard deviation, maximum, and minimum values across the whole raster. \ No newline at end of file diff --git a/ops/threshold_raster/threshold_raster.py b/ops/threshold_raster/threshold_raster.py new file mode 100644 index 00000000..9e6e2f35 --- /dev/null +++ b/ops/threshold_raster/threshold_raster.py @@ -0,0 +1,35 @@ +from tempfile import TemporaryDirectory +from typing import Dict, Optional, cast + +import numpy as np + +from vibe_core.data import Raster +from vibe_lib.raster import MaskedArrayType, load_raster, save_raster_from_ref + + +class CallbackBuilder: + def __init__(self, threshold: Optional[float]): + self.tmp_dir = TemporaryDirectory() + if threshold is None: + raise ValueError( + "Threshold must not be None. " + "Did you forget to overwrite the value on the workflow definition?" + ) + self.threshold = threshold + + def __call__(self): + def callback(raster: Raster) -> Dict[str, Raster]: + data_ar = load_raster(raster) + # Make a mess to keep the mask intact + data_ma = data_ar.to_masked_array() + thr_ma = cast(MaskedArrayType, (data_ma > self.threshold).astype("float32")) + thr_ar = data_ar.copy(data=thr_ma.filled(np.nan)) + # Save it as uint8 instead of the original dtype + thr_ar.rio.update_encoding({"dtype": "uint8"}, inplace=True) + thr_raster = save_raster_from_ref(thr_ar, self.tmp_dir.name, raster) + return {"thresholded": thr_raster} + + return callback + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/threshold_raster/threshold_raster.yaml b/ops/threshold_raster/threshold_raster.yaml new file mode 100644 index 00000000..411df772 --- /dev/null +++ b/ops/threshold_raster/threshold_raster.yaml @@ -0,0 +1,15 @@ +name: threshold_raster +inputs: + raster: Raster +output: + thresholded: Raster +parameters: + threshold: null +entrypoint: + file: threshold_raster.py + callback_builder: CallbackBuilder +dependencies: + parameters: + - threshold +description: + short_description: Thresholds values of the input raster if higher than the threshold parameter. \ No newline at end of file diff --git a/ops/tile_sentinel1/tile_sentinel1.py b/ops/tile_sentinel1/tile_sentinel1.py new file mode 100644 index 00000000..60f00a54 --- /dev/null +++ b/ops/tile_sentinel1/tile_sentinel1.py @@ -0,0 +1,91 @@ +import hashlib +import logging +from tempfile import TemporaryDirectory +from typing import Dict, List, Union, cast, overload + +import fiona +import geopandas as gpd +from shapely import geometry as shpg +from shapely.geometry.base import BaseGeometry + +from vibe_core.data import ( + DownloadedSentinel1Product, + Sentinel1Raster, + Sentinel2Product, + TiledSentinel1Product, +) + +LOGGER = logging.getLogger(__name__) +S1List = Union[List[DownloadedSentinel1Product], List[Sentinel1Raster]] +TiledList = Union[List[TiledSentinel1Product], List[Sentinel1Raster]] +KML_DRIVER_NAMES = "kml KML libkml LIBKML".split() + + +@overload +def prepare_items( + s1_products: List[DownloadedSentinel1Product], tiles_df: gpd.GeoDataFrame +) -> List[TiledSentinel1Product]: ... + + +@overload +def prepare_items( + s1_products: List[Sentinel1Raster], tiles_df: gpd.GeoDataFrame +) -> List[Sentinel1Raster]: ... + + +def prepare_items( + s1_products: S1List, + tiles_df: gpd.GeoDataFrame, +) -> TiledList: + processing_items = [] + for s1_item in s1_products: + s1_geom = shpg.shape(s1_item.geometry) + intersecting_df = cast(gpd.GeoDataFrame, tiles_df[tiles_df.intersects(s1_geom)]) + for _, intersecting_tile in intersecting_df.iterrows(): + geom = cast(BaseGeometry, intersecting_tile["geometry"]).buffer(0) + tile_id = cast(str, intersecting_tile["Name"]) + id = hashlib.sha256((s1_item.id + tile_id).encode()).hexdigest() + out_type = ( + TiledSentinel1Product + if isinstance(s1_item, DownloadedSentinel1Product) + else Sentinel1Raster + ) + tiled_s1 = out_type.clone_from( + s1_item, + id=id, + assets=s1_item.assets, + geometry=shpg.mapping(geom), + tile_id=tile_id, + ) + processing_items.append(tiled_s1) + return processing_items + + +class CallbackBuilder: + def __init__(self, tile_geometry: str): + self.tmp_dir = TemporaryDirectory() + self.tile_geometry = tile_geometry + + def __call__(self): + def preprocess_items( + sentinel1_products: S1List, + sentinel2_products: List[Sentinel2Product], + ) -> Dict[str, TiledList]: + tile_ids = set(p.tile_id for p in sentinel2_products) + # Make fiona read the file: https://gis.stackexchange.com/questions/114066/ + for driver in KML_DRIVER_NAMES: + fiona.drvsupport.supported_drivers[driver] = "rw" # type: ignore + + df = gpd.read_file(self.tile_geometry) + # Filter only tiles for which we have products + df = cast(gpd.GeoDataFrame, df[df["Name"].isin(tile_ids)]) # type: ignore + + # Prepare items for preprocessing with the s1 item, target geometry and tile id + processing_items = prepare_items(sentinel1_products, df) + + return {"tiled_products": processing_items} + + return preprocess_items + + def __del__(self): + self.tmp_dir.cleanup() diff --git a/ops/tile_sentinel1/tile_sentinel1.yaml b/ops/tile_sentinel1/tile_sentinel1.yaml new file mode 100644 index 00000000..d13d84df --- /dev/null +++ b/ops/tile_sentinel1/tile_sentinel1.yaml @@ -0,0 +1,23 @@ +name: tile_sentinel1 +inputs: + sentinel1_products: List[DownloadedSentinel1Product] + sentinel2_products: List[Sentinel2Product] +output: + tiled_products: List[TiledSentinel1Product] +parameters: + tile_geometry: /opt/terravibes/ops/resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml +entrypoint: + file: tile_sentinel1.py + callback_builder: CallbackBuilder +description: + short_description: Match Sentinel-1 products that intersect with Sentinel-2 tiles. + long_description: + The op will generate an item for each Sentinel-1 product x Sentinel-2 tile combination if both + intersect. This op only handles metadata, no asset is changed. + inputs: + sentinel1_products: Sentinel-1 products that will be tiled. + sentinel2_products: Sentinel-2 products from which the tiles will extracted. + output: + tiled_products: Sentinel-1 products with the added tiling metadata. + parameters: + tile_geometry: Path to the resource containing Sentinel-2 tile geometries. diff --git a/ops/tile_sentinel1/tile_sentinel1_rtc.yaml b/ops/tile_sentinel1/tile_sentinel1_rtc.yaml new file mode 100644 index 00000000..3e93f9a4 --- /dev/null +++ b/ops/tile_sentinel1/tile_sentinel1_rtc.yaml @@ -0,0 +1,23 @@ +name: tile_sentinel1 +inputs: + sentinel1_products: List[Sentinel1Raster] + sentinel2_products: List[Sentinel2Product] +output: + tiled_products: List[Sentinel1Raster] +parameters: + tile_geometry: /opt/terravibes/ops/resources/sentinel_tile_geometry/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml +entrypoint: + file: tile_sentinel1.py + callback_builder: CallbackBuilder +description: + short_description: Match Sentinel-1 products that intersect with Sentinel-2 tiles. + long_description: + The op will generate an item for each Sentinel-1 product x Sentinel-2 tile combination if both + intersect. This op only handles metadata, no asset is changed. + inputs: + sentinel1_products: Sentinel-1 products that will be tiled. + sentinel2_products: Sentinel-2 products from which the tiles will extracted. + output: + tiled_products: Sentinel-1 products with the added tiling metadata. + parameters: + tile_geometry: Path to the resource containing Sentinel-2 tile geometries. diff --git a/ops/unpack_refs/unpack_refs.py b/ops/unpack_refs/unpack_refs.py new file mode 100644 index 00000000..00875d6c --- /dev/null +++ b/ops/unpack_refs/unpack_refs.py @@ -0,0 +1,18 @@ +from typing import Dict, List + +from vibe_core.data import ExternalReference, ExternalReferenceList, gen_guid + + +def callback_builder(): + def callback( + input_refs: List[ExternalReferenceList], + ) -> Dict[str, List[ExternalReference]]: + return { + "ref_list": [ + ExternalReference.clone_from(refs, id=gen_guid(), url=url, assets=[]) + for refs in input_refs + for url in refs.urls + ] + } + + return callback diff --git a/ops/unpack_refs/unpack_refs.yaml b/ops/unpack_refs/unpack_refs.yaml new file mode 100644 index 00000000..f2ad548e --- /dev/null +++ b/ops/unpack_refs/unpack_refs.yaml @@ -0,0 +1,11 @@ +name: unpack_refs +inputs: + input_refs: List[ExternalReferenceList] +output: + ref_list: List[ExternalReference] +parameters: +entrypoint: + file: unpack_refs.py + callback_builder: callback_builder +description: + short_description: Unpacks the urls from the list of external references. \ No newline at end of file diff --git a/ops/weed_detection/weed_detection.py b/ops/weed_detection/weed_detection.py new file mode 100644 index 00000000..f686c4db --- /dev/null +++ b/ops/weed_detection/weed_detection.py @@ -0,0 +1,220 @@ +import os +from dataclasses import dataclass +from enum import auto +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Tuple, Union + +import geopandas as gpd +import numpy as np +import rasterio +from numpy.typing import NDArray +from rasterio.crs import CRS +from rasterio.features import geometry_mask, shapes, sieve +from rasterio.mask import mask +from rasterio.transform import Affine +from shapely import geometry as shpg +from sklearn.mixture import GaussianMixture +from strenum import StrEnum + +from vibe_core.data import DataVibe +from vibe_core.data.core_types import AssetVibe, gen_guid +from vibe_core.data.rasters import Raster +from vibe_lib.archive import create_flat_archive + + +class SimplifyBy(StrEnum): + simplify = auto() + convex = auto() + none = auto() + + +@dataclass +class OpenedRaster: + """Load a raster for training and prediction + + Attributes: + pixels: 1D array of selected data points + shape: shape of the input raster + alpha_mask: boolean values indicating which pixels were selected from the input raster + transform: affine transform of the input raster + crs: coordinate reference system of the input raster + """ + + def __init__( + self, + raster: Raster, + buffer: int, + no_data: Union[int, None], + alpha_index: int, + bands: List[int], + ): + with rasterio.open(raster.raster_asset.url) as src: + projected_geo = ( + gpd.GeoSeries(shpg.shape(raster.geometry), crs="epsg:4326").to_crs(src.crs).iloc[0] + ) + + if no_data is None: + no_data = src.nodata + ar, self.tr = mask(src, [projected_geo], crop=True, nodata=no_data) + self.input_crs = src.crs + + self.buffer_mask = geometry_mask( + [projected_geo.buffer(buffer)], ar.shape[1:], self.tr, invert=True + ) + + # Create an alpha mask + if alpha_index >= 0: + self._alpha_mask = ar[alpha_index].astype(bool) + else: # no alpha band + self._alpha_mask = np.ones(ar.shape[1:], dtype=bool) + + if not bands: + bands = [i for i in range(ar.shape[0]) if i != alpha_index] + self.pixels = ar[bands] + + self.input_shape = ar.shape + + @property + def shape(self) -> Tuple[int]: + return self.input_shape + + @property + def crs(self) -> CRS: + return self.input_crs + + @property + def transform(self) -> Affine: + return self.tr + + @property + def training_data(self) -> NDArray[Any]: + mask = self.buffer_mask & self.alpha_mask + return self.pixels[:, mask] + + @property + def prediction_data(self) -> NDArray[Any]: + return self.pixels[:, self.alpha_mask] + + @property + def alpha_mask(self) -> NDArray[Any]: + return self._alpha_mask + + +def train_model(open_raster: OpenedRaster, samples: int, clusters: int) -> GaussianMixture: + training_data = open_raster.training_data + idx = np.random.choice(training_data.shape[1], samples) + xy = training_data[:, idx].T + + gmm = GaussianMixture(n_components=clusters, covariance_type="full") + gmm.fit(xy) + + return gmm + + +def predict( + open_raster: OpenedRaster, + sieve_size: int, + clusters: int, + simplify: SimplifyBy, + tolerance: float, + model: GaussianMixture, + output_dir: str, +) -> AssetVibe: + prediction_data = open_raster.prediction_data + classes = model.predict(prediction_data.reshape(prediction_data.shape[0], -1).T) + result = np.zeros(open_raster.shape[1:], dtype=np.uint8) + result[open_raster.alpha_mask] = classes + result = sieve(result, sieve_size) + + file_num = 0 + for segment in range(clusters): + cluster = (result == segment).astype(np.uint8) + + df_shapes = gpd.GeoSeries( + [shpg.shape(s) for s, _ in shapes(cluster, mask=cluster, transform=open_raster.tr)], + crs=open_raster.crs, + ) # type: ignore + + if df_shapes.empty: + # Model could not converge with all requested clusters + continue + + cluster_path = os.path.join(output_dir, f"cluster{file_num}") + file_num += 1 + + if simplify == SimplifyBy.simplify: + df_shapes.simplify(tolerance).to_file(cluster_path) + elif simplify == SimplifyBy.convex: + df_shapes.convex_hull.to_file(cluster_path) + elif simplify == SimplifyBy.none: + df_shapes.to_file(cluster_path) # type: ignore + + # Create zip archive containing all output + archive_path = create_flat_archive(output_dir, "result") + return AssetVibe(reference=archive_path, type="application/zip", id=gen_guid()) + + +class CallbackBuilder: + def __init__( + self, + buffer: int, + no_data: Union[int, None], + clusters: int, + sieve_size: int, + simplify: str, + tolerance: float, + samples: int, + bands: List[int], + alpha_index: int, + ): + self.temp_dir = TemporaryDirectory() + self.buffer = buffer + self.no_data = no_data + self.clusters = clusters + self.sieve_size = sieve_size + self.simplify = SimplifyBy(simplify.lower()) + self.tolerance = tolerance + self.samples = samples + self.bands = bands + self.alpha_index = alpha_index + + def __call__(self): + def detect_weeds( + raster: Raster, + ) -> Dict[str, DataVibe]: + open_raster = OpenedRaster( + raster=raster, + buffer=self.buffer, + no_data=self.no_data, + alpha_index=self.alpha_index, + bands=self.bands, + ) + + model = train_model( + open_raster=open_raster, + samples=self.samples, + clusters=self.clusters, + ) + + prediction = predict( + open_raster=open_raster, + sieve_size=self.sieve_size, + clusters=self.clusters, + simplify=self.simplify, + tolerance=self.tolerance, + model=model, + output_dir=self.temp_dir.name, + ) + + result = DataVibe( + id=gen_guid(), + time_range=raster.time_range, + geometry=raster.geometry, + assets=[prediction], + ) + return {"result": result} + + return detect_weeds + + def __del__(self): + self.temp_dir.cleanup() diff --git a/ops/weed_detection/weed_detection.yaml b/ops/weed_detection/weed_detection.yaml new file mode 100644 index 00000000..ea1f3cc7 --- /dev/null +++ b/ops/weed_detection/weed_detection.yaml @@ -0,0 +1,34 @@ +name: weed_detection +inputs: + raster: Raster +output: + result: DataVibe +parameters: + buffer: -50 + no_data: + clusters: 4 + sieve_size: 2000 + simplify: "simplify" + tolerance: 0.25 + samples: 100000 + bands: [] + alpha_index: -1 +entrypoint: + callback_builder: CallbackBuilder + file: weed_detection.py +dependencies: + parameters: + - buffer + - no_data + - clusters + - sieve_size + - simplify + - tolerance + - samples + - bands + - alpha_index +version: 2 +description: + short_description: + Trains a Gaussian Mixture Model (GMM), cluster all images pixels, and convert clustered + regions into polygons. \ No newline at end of file diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 00000000..f60bffd4 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,11 @@ +{ + "ignore": [ + "ops/run_landcover_model", + ], + "reportMissingParameterType": "error", + "reportInvalidTypeVarUse": "error", + "reportMissingTypeArgument": "error", + "reportMissingImports": "warning", + "typeCheckingMode": "basic", + "useLibraryCodeForTypes": true, +} \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..44effbb2 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +addopts = -k 'not benchmark' +pythonpath = src +filterwarnings = ignore:.*fields may not start with an underscore.* diff --git a/resources/docker/Dockerfile-api_orchestrator b/resources/docker/Dockerfile-api_orchestrator new file mode 100644 index 00000000..fd4bb246 --- /dev/null +++ b/resources/docker/Dockerfile-api_orchestrator @@ -0,0 +1,18 @@ +# syntax=docker/dockerfile:1 +FROM mcr.microsoft.com/farmai/terravibes/services-base:12380 + +COPY src /app/src +COPY workflows /app/workflows +COPY ops /app/ops + +RUN /opt/conda/bin/pip install /app/src/vibe_core && \ + /opt/conda/bin/pip install /app/src/vibe_common && \ + /opt/conda/bin/pip install /app/src/vibe_server + +RUN rm -rf /app/src + +RUN find /app -type d -name __pycache__ | xargs rm -rf + +EXPOSE 3000 3500 50001 + +WORKDIR /app diff --git a/resources/docker/Dockerfile-cache b/resources/docker/Dockerfile-cache new file mode 100644 index 00000000..322fb693 --- /dev/null +++ b/resources/docker/Dockerfile-cache @@ -0,0 +1,18 @@ +# syntax=docker/dockerfile:1 +FROM mcr.microsoft.com/farmai/terravibes/services-base:12380 + +COPY src /app/src +COPY workflows /app/workflows +COPY ops /app/ops + +RUN /opt/conda/bin/pip install /app/src/vibe_core && \ + /opt/conda/bin/pip install /app/src/vibe_common && \ + /opt/conda/bin/pip install /app/src/vibe_agent + +RUN rm -rf /app/src + +RUN find /app -type d -name __pycache__ | xargs rm -rf + +EXPOSE 3000 3500 50001 + +WORKDIR /app diff --git a/resources/docker/Dockerfile-dev b/resources/docker/Dockerfile-dev new file mode 100644 index 00000000..58316bb8 --- /dev/null +++ b/resources/docker/Dockerfile-dev @@ -0,0 +1,17 @@ +# syntax=docker/dockerfile:1 +ARG BASE_IMAGE +FROM $BASE_IMAGE +COPY resources/envs/dev.yaml /tmp/dev.yaml + +RUN tdnf update -y || echo "Not updating anything..." && \ + tdnf install -y sudo azure-cli + +RUN micromamba install -f /tmp/dev.yaml + +RUN az extension add --system --name azure-devops + +RUN mkdir -p /opt/terravibes/ops/resources + +RUN chmod 777 /opt/terravibes/ops/resources + +EXPOSE 3000 3500 50001 diff --git a/resources/docker/Dockerfile-devcontainer b/resources/docker/Dockerfile-devcontainer new file mode 100644 index 00000000..464b9e9e --- /dev/null +++ b/resources/docker/Dockerfile-devcontainer @@ -0,0 +1,40 @@ +FROM mcr.microsoft.com/farmai/terravibes/worker-base:12380 + +ARG USERNAME=vscode +ARG USER_UID=1000 +ARG USER_GID=$USER_UID +ARG DOCKER_GID=998 +ENV DOCKER_BUILDKIT=1 +ENV PATH="/opt/conda/bin:${PATH}" +ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" + +USER root + +RUN tdnf upgrade -y && \ + tdnf install -y moby-cli moby-compose moby-engine moby-buildx \ + ca-certificates unzip iptables which dos2unix git-lfs \ + icu icu-devel build-essential pkg-config zsh sudo azure-cli && \ + rm -rf /var/cache/tdnf/* + +RUN groupadd --gid $USER_GID $USERNAME && \ + useradd --uid $USER_UID --gid $USER_GID -m $USERNAME -s /usr/bin/zsh && \ + touch /etc/sudoers.d/$USERNAME && \ + zsh -c "echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME" && \ + cat /etc/sudoers.d/$USERNAME && \ + chmod 0440 /etc/sudoers.d/$USERNAME && \ + mkdir /home/$USERNAME/.vscode-server && \ + chown -R $USERNAME:$USERNAME /home/$USERNAME + +RUN groupadd --gid $DOCKER_GID docker || echo ; \ + usermod -aG $(grep docker /etc/group | cut -d : -f 1) $USERNAME + +COPY ./docker-in-docker-install.sh /tmp/ +RUN dos2unix /tmp/docker-in-docker-install.sh && \ + /bin/bash /tmp/docker-in-docker-install.sh + +USER $USERNAME +WORKDIR /home/$USERNAME + +ENTRYPOINT ["/usr/local/share/docker-init.sh"] +VOLUME [ "/var/lib/docker" ] +CMD ["sleep", "infinity"] diff --git a/resources/docker/Dockerfile-services-base b/resources/docker/Dockerfile-services-base new file mode 100644 index 00000000..d17675d7 --- /dev/null +++ b/resources/docker/Dockerfile-services-base @@ -0,0 +1,34 @@ +# syntax=docker/dockerfile:1 +FROM mcr.microsoft.com/cbl-mariner/base/core:2.0 AS builder + + +RUN tdnf update -y || echo "Not updating anything..." &&\ + tdnf install -y tar ca-certificates && tdnf clean all + +WORKDIR /usr/local + +RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && \ + ln -s /usr/local/bin/micromamba /usr/local/bin/conda && \ + chmod +x /usr/local/bin/micromamba + +COPY resources/envs/rest-api_orchestrator.yml /tmp/rest-api.yml +COPY resources/envs/services-requirements.txt /tmp/services-requirements.txt + +RUN conda env create -p /opt/conda -f /tmp/rest-api.yml && \ + conda clean --all --yes + +ENV CONDA_PREFIX=/opt/conda +RUN /opt/conda/bin/pip install uv && \ + /opt/conda/bin/uv pip install -r /tmp/services-requirements.txt && \ + /opt/conda/bin/uv cache clean && \ + rm -r /root/.cache/pip && \ + rm -rf /root/.mamba && \ + rm -rf /opt/*conda/pkgs && \ + rm -rf /app/docker && \ + find / -type d -name __pycache__ | xargs rm -rf && \ + rm /tmp/rest-api.yml && \ + rm /tmp/services-requirements.txt && \ + rm -fr /var/cache/tdnf && \ + rm -fr /var/cache/conda + +ENV PATH "/opt/conda/bin:${PATH}" diff --git a/resources/docker/Dockerfile-worker b/resources/docker/Dockerfile-worker new file mode 100644 index 00000000..b9177152 --- /dev/null +++ b/resources/docker/Dockerfile-worker @@ -0,0 +1,23 @@ +# syntax=docker/dockerfile:1 +FROM mcr.microsoft.com/farmai/terravibes/worker-base:12380 + +COPY src /app/src +COPY workflows /app/workflows +COPY ops /app/ops + +COPY op_resources/ /opt/terravibes/ops/resources/ + +RUN /opt/conda/bin/pip install /app/src/vibe_core && \ + /opt/conda/bin/pip install /app/src/vibe_common && \ + /opt/conda/bin/pip install /app/src/vibe_agent && \ + /opt/conda/bin/pip install /app/src/vibe_lib + +RUN rm -rf /app/src + +EXPOSE 3000 3500 50001 + +ENV PYTHONPATH "/app" + +WORKDIR /app + +SHELL ["conda", "run", "--no-capture-output", "-p", "/opt/conda", "/bin/bash", "-c"] diff --git a/resources/docker/Dockerfile-worker-base b/resources/docker/Dockerfile-worker-base new file mode 100644 index 00000000..b235e501 --- /dev/null +++ b/resources/docker/Dockerfile-worker-base @@ -0,0 +1,59 @@ +# syntax=docker/dockerfile:1 +FROM mcr.microsoft.com/cbl-mariner/base/core:2.0 AS builder + +RUN tdnf update -y || echo "Not updating anything..." && tdnf install -y \ + libibverbs \ + librdmacm \ + iproute \ + build-essential \ + git \ + wget \ + tar \ + python3-pip \ + freefont \ + procps \ + ca-certificates \ + util-linux \ + dejavu-sans-fonts \ + fontconfig && tdnf clean all # fontconfig used to be for SNAP, but it doesnt hurt leaving it + +FROM builder AS builder1 + +# The line below was for SNAP, but it doesn't hurt to have it +ENV JAVA_OPTS=-Djava.awt.headless=true +ENV CONDA_PREFIX=/opt/conda + +WORKDIR /usr/local + +RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && \ + ln -s /usr/local/bin/micromamba /usr/local/bin/conda && \ + chmod +x /usr/local/bin/micromamba + +RUN conda shell init -s bash + +COPY resources/envs/worker.yml /tmp +COPY resources/envs/worker-requirements.txt /tmp + +RUN conda env create -p /opt/conda -f /tmp/worker.yml && \ + conda clean --all --yes + +RUN /opt/conda/bin/pip install uv && \ + /opt/conda/bin/pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu && \ + /opt/conda/bin/pip install torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cpu && \ + /opt/conda/bin/uv pip install -r /tmp/worker-requirements.txt && \ + /opt/conda/bin/uv cache clean && \ + rm -r /root/.cache/pip && \ + rm -rf /root/.mamba && \ + rm -rf /opt/*conda/pkgs && \ + rm -rf /app/docker && \ + find / -type d -name __pycache__ | xargs rm -rf && \ + rm /tmp/worker.yml && \ + rm /tmp/worker-requirements.txt && \ + rm -fr /var/cache/tdnf && \ + rm -fr /var/cache/conda + +ENV PATH "/opt/conda/bin:${PATH}" + +# compilation is necessary the first time aquacrop is imported and this needs writing permission +RUN /opt/conda/bin/pip install aquacrop==2.2.3 --no-deps && \ + python -c "import aquacrop" diff --git a/resources/docker/docker-in-docker-install.sh b/resources/docker/docker-in-docker-install.sh new file mode 100755 index 00000000..7489faba --- /dev/null +++ b/resources/docker/docker-in-docker-install.sh @@ -0,0 +1,93 @@ +#!/bin/sh +# Copyright (c) Microsoft Corporation. All rights reserved. +# +# This is based on https://github.com/devcontainers/features/blob/main/src/docker-in-docker/install.sh +# We had to copy this because, at the time of writing, devcontainers didn't have tooling to run +# features on top of CBL-Mariner2/Azure Linux. + +if [ -f "/usr/local/share/docker-init.sh" ]; then + echo "/usr/local/share/docker-init.sh already exists, exiting." + exit 0 +fi + +tee /usr/local/share/docker-init.sh > /dev/null \ +<< 'EOF' +dockerd_start="AZURE_DNS_AUTO_DETECTION=${AZURE_DNS_AUTO_DETECTION} DOCKER_DEFAULT_ADDRESS_POOL=${DOCKER_DEFAULT_ADDRESS_POOL} $(cat << 'INNEREOF' + # explicitly remove dockerd and containerd PID file to ensure that it can start properly if it was stopped uncleanly + # ie: docker kill + find /run /var/run -iname 'docker*.pid' -delete || : + find /run /var/run -iname 'container*.pid' -delete || : + + ## Dind wrapper script from docker team, adapted to a function + # Maintained: https://github.com/moby/moby/blob/master/hack/dind + + export container=docker + + if [ -d /sys/kernel/security ] && ! mountpoint -q /sys/kernel/security; then + mount -t securityfs none /sys/kernel/security || { + echo >&2 'Could not mount /sys/kernel/security.' + echo >&2 'AppArmor detection and --privileged mode might break.' + } + fi + + # Mount /tmp (conditionally) + if ! mountpoint -q /tmp; then + mount -t tmpfs none /tmp + fi + + # cgroup v2: enable nesting + if [ -f /sys/fs/cgroup/cgroup.controllers ]; then + # move the processes from the root group to the /init group, + # otherwise writing subtree_control fails with EBUSY. + # An error during moving non-existent process (i.e., "cat") is ignored. + mkdir -p /sys/fs/cgroup/init + xargs -rn1 < /sys/fs/cgroup/cgroup.procs > /sys/fs/cgroup/init/cgroup.procs || : + # enable controllers + sed -e 's/ / +/g' -e 's/^/+/' < /sys/fs/cgroup/cgroup.controllers \ + > /sys/fs/cgroup/cgroup.subtree_control + fi + ## Dind wrapper over. + + # Handle DNS + set +e + cat /etc/resolv.conf | grep -i 'internal.cloudapp.net' + if [ $? -eq 0 ] && [ "${AZURE_DNS_AUTO_DETECTION}" = "true" ] + then + echo "Setting dockerd Azure DNS." + CUSTOMDNS="--dns 168.63.129.16" + else + echo "Not setting dockerd DNS manually." + CUSTOMDNS="" + fi + + set -e + + if [ -z "$DOCKER_DEFAULT_ADDRESS_POOL" ] + then + DEFAULT_ADDRESS_POOL="" + else + DEFAULT_ADDRESS_POOL="--default-address-pool $DOCKER_DEFAULT_ADDRESS_POOL" + fi + + # Start docker/moby engine + ( dockerd $CUSTOMDNS $DEFAULT_ADDRESS_POOL > /tmp/dockerd.log 2>&1 ) & +INNEREOF +)" + +# Start using sudo if not invoked as root +if [ "$(id -u)" -ne 0 ]; then + sudo /bin/sh -c "${dockerd_start}" +else + eval "${dockerd_start}" +fi + +set +e + +# Execute whatever commands were passed in (if any). This allows us +# to set this script to ENTRYPOINT while still executing the default CMD. +exec "$@" +EOF + +chmod +x /usr/local/share/docker-init.sh + +echo 'docker-in-docker script completed' diff --git a/resources/documentation_generation/generate_datatype_hierarchy_diagram.py b/resources/documentation_generation/generate_datatype_hierarchy_diagram.py new file mode 100644 index 00000000..5f670e39 --- /dev/null +++ b/resources/documentation_generation/generate_datatype_hierarchy_diagram.py @@ -0,0 +1,81 @@ +import os +import subprocess +from typing import List + +from jinja2 import Template + +HERE = os.path.dirname(os.path.abspath(__file__)) +PROJECT_DIR = os.path.abspath(os.path.join(HERE, "..", "..")) +DOC_DIR = os.path.abspath( + os.path.join(PROJECT_DIR, "docs", "source", "docfiles", "markdown", "data_types_diagram") +) +DATA_TYPES_PATH = os.path.abspath( + os.path.join(PROJECT_DIR, "src", "vibe_core", "vibe_core", "data") +) +TEMPLATE_PATH = os.path.abspath(os.path.join(HERE, "templates", "datatype_hierarchy_template.md")) + + +def render_template( + mermaid_diagram: str, + output_path: str, + template_path: str, +): + """Load and render template given a data source""" + + with open(template_path) as f: + t = Template(f.read()) + + rendered_template = t.render(mermaid_diagram=mermaid_diagram) + + if not os.path.exists(os.path.dirname(output_path)): + os.makedirs(os.path.dirname(output_path)) + + with open(output_path, "w") as f: + f.write(rendered_template) + + +def list_modules(module_path: str) -> List[str]: + """List all modules in module_path""" + + paths = [] + for root, dirs, files in os.walk(module_path): + for file in files: + if file.endswith(".py") and not file.startswith("__"): + paths.append(os.path.join(root, file)) + + return paths + + +def build_data_type_diagrams(data_module_paths: List[str]): + for path in data_module_paths: + module_name = path.split("/")[-1].split(".")[0] + subprocess.run( + [ + "pyreverse", + "-my", + "-A", + "-k", + "-o", + "mmd", + "-p", + f"{module_name}", + path, + ], + check=True, + ) + + with open(f"classes_{module_name}.mmd") as f: + mmd = f.read() + render_template(mmd, os.path.join(DOC_DIR, f"{module_name}_hierarchy.md"), TEMPLATE_PATH) + + # Delete the generated mmd file with subprocess.run + subprocess.run(["rm", f"classes_{module_name}.mmd"], check=True) + + +def main(): + data_module_paths = list_modules(DATA_TYPES_PATH) + build_data_type_diagrams(data_module_paths) + + +if __name__ == "__main__": + main() diff --git a/resources/documentation_generation/generate_notebook_list.py b/resources/documentation_generation/generate_notebook_list.py new file mode 100644 index 00000000..1c544309 --- /dev/null +++ b/resources/documentation_generation/generate_notebook_list.py @@ -0,0 +1,160 @@ +import json +import os +from dataclasses import dataclass +from math import inf +from typing import Dict, List, Optional, Tuple + +from jinja2 import Template + +HERE = os.path.dirname(os.path.abspath(__file__)) +PROJECT_DIR = os.path.abspath(os.path.join(HERE, "..", "..")) +NOTEBOOK_DIR = os.path.abspath(os.path.join(PROJECT_DIR, "notebooks")) +LIST_TEMPLATE_PATH = os.path.abspath(os.path.join(HERE, "templates", "list_notebook_template.md")) +DOC_DIR = os.path.abspath(os.path.join(PROJECT_DIR, "docs", "source", "docfiles", "markdown")) +OUTPUT_PATH = os.path.abspath(os.path.join(DOC_DIR, "NOTEBOOK_LIST.md")) +GITHUB_URL = "https://github.com/microsoft/farmvibes-ai/blob/main" +PRIVATE_TAG = "private" + + +@dataclass +class NotebookDataSource: + name: str + description: str + tags: List[Tuple[int, str]] + repo_path: str + disk_time_req: str + + +def render_template( + data_source: List[NotebookDataSource], + tag_data_source: List[Tuple[Tuple[int, str], List[NotebookDataSource]]], + output_path: str, +): + """Load and render template given a data source""" + + with open(LIST_TEMPLATE_PATH) as f: + t = Template(f.read()) + + rendered_template = t.render( + data_source=data_source, + tag_data_source=tag_data_source, + ) + + with open(output_path, "w") as f: + f.write(rendered_template) + + +def format_disk_time_req(disk_space: str, running_time: str) -> str: + """Format the disk space and running time requirements""" + output_str = "({}{}{})" if disk_space or running_time else "{}{}{}" + sep = ", " if disk_space and running_time else "" + return output_str.format(disk_space, sep, running_time) + + +def parse_nb_metadata(nb_path: str) -> Optional[NotebookDataSource]: + """Parse the ipynb to extract its metadata""" + with open(nb_path) as f: + nb_json = json.load(f) + + try: + nb_metadata = nb_json["metadata"] + except KeyError: + raise KeyError(f"Notebook {nb_path} has no metadata") + + # Parse tag order + nb_tags = [] + try: + tags = nb_metadata["tags"] + except KeyError: + raise KeyError(f"Notebook {nb_path} with metadata {nb_metadata} has no tags") + + for tag in tags: + tag_components = tag.split("_") + if len(tag_components) == 2: + tag_order = int(tag_components[0]) + tag_name = tag_components[-1] + else: + tag_order = inf + tag_name = tag_components[-1] + + if tag_name == PRIVATE_TAG: + return None + nb_tags.append((tag_order, tag_name)) + + nb_name = nb_metadata["name"] + nb_description = nb_metadata["description"] + nb_repo_path = f"{GITHUB_URL}{nb_path.split(PROJECT_DIR)[-1]}" + nb_disk_time_req = format_disk_time_req(nb_metadata["disk_space"], nb_metadata["running_time"]) + + return NotebookDataSource( + name=nb_name, + description=nb_description, + tags=nb_tags, + repo_path=nb_repo_path, + disk_time_req=nb_disk_time_req, + ) + + +def list_notebooks() -> List[str]: + """Iterate over NOTEBOOK_DIR and retrieve all ipynb paths""" + notebook_list: List[str] = [] + + for folder, _, nb_files in os.walk(NOTEBOOK_DIR): + for nb_file in nb_files: + if nb_file.endswith(".ipynb"): + nb_path = os.path.abspath(os.path.join(folder, nb_file)) + notebook_list.append(nb_path) + + return notebook_list + + +def sort_tags( + tag_data_source: Dict[Tuple[int, str], List[NotebookDataSource]] +) -> List[Tuple[Tuple[int, str], List[NotebookDataSource]]]: + """Sort tags by tag order and then by name""" + sorted_tags_ds = [] + for tag_tuple, nb_data_source_list in tag_data_source.items(): + sorted_nb_data_source_list = sorted(nb_data_source_list, key=lambda x: x.name) + sorted_tags_ds.append((tag_tuple, sorted_nb_data_source_list)) + sorted_tags_ds = sorted(sorted_tags_ds, key=lambda x: x[0]) + return sorted_tags_ds + + +def build_notebook_list(): + """Build the notebook list page""" + data_source: List[NotebookDataSource] = [] + tag_data_source: Dict[Tuple[str, int], List[NotebookDataSource]] = {} + + # List notebooks in NOTEBOOK_DIR + notebook_list = list_notebooks() + + # For each notebook, parse the json metadata and get attributes + for notebook_path in notebook_list: + notebook_data_source = parse_nb_metadata(notebook_path) + + if notebook_data_source: + # Add notebook to data source + data_source.append(notebook_data_source) + + # Add notebook to tag list + for tag_tuple in notebook_data_source.tags: + if tag_tuple not in tag_data_source: + tag_data_source[tag_tuple] = [] + tag_data_source[tag_tuple].append(notebook_data_source) + + # Sort data source by name + data_source = sorted(data_source, key=lambda x: x.name) + + # Sort tag data source by tag order and name + sorted_tags_ds = sort_tags(tag_data_source) + + # Render template + render_template(data_source, sorted_tags_ds, OUTPUT_PATH) + + +def main(): + build_notebook_list() + + +if __name__ == "__main__": + main() diff --git a/resources/documentation_generation/generate_workflow_list.py b/resources/documentation_generation/generate_workflow_list.py new file mode 100644 index 00000000..0f5509cf --- /dev/null +++ b/resources/documentation_generation/generate_workflow_list.py @@ -0,0 +1,158 @@ +import os +from dataclasses import dataclass +from typing import Dict, List, Union + +import yaml +from jinja2 import Template + +from vibe_core.client import FarmvibesAiClient +from vibe_core.datamodel import TaskDescription +from vibe_server.workflow.spec_parser import WorkflowParser + +HERE = os.path.dirname(os.path.abspath(__file__)) +PROJECT_DIR = os.path.abspath(os.path.join(HERE, "..", "..")) +DOC_DIR = os.path.abspath(os.path.join(PROJECT_DIR, "docs", "source", "docfiles", "markdown")) +WORKFLOW_DIR = os.path.abspath(os.path.join(PROJECT_DIR, "workflows")) + +WF_LIST_TEMPLATE_PATH = os.path.abspath( + os.path.join(HERE, "templates", "list_workflow_template.md") +) +WF_LIST_OUTPUT_PATH = os.path.abspath(os.path.join(DOC_DIR, "WORKFLOW_LIST.md")) + +WF_YAML_TEMPLATE_PATH = os.path.abspath( + os.path.join(HERE, "templates", "workflow_yaml_template.md") +) +WF_YAML_OUTPUT_DIR = os.path.abspath(os.path.join(DOC_DIR, "workflow_yaml")) + +WF_CATEGORY_LIST = ["data_ingestion", "data_processing", "farm_ai", "forest_ai", "ml"] + + +@dataclass +class WorkflowInformation: + name: str + description: Union[str, TaskDescription] + markdown_link: str + yaml: str + mermaid_diagram: str + + +@dataclass +class TemplateDataSource: + category: str + wf_list: List[WorkflowInformation] + + +def format_wf_name(full_wf_name: str, category: str): + return full_wf_name.split(f"{category}/")[-1] + + +def render_template( + data_source: Union[List[TemplateDataSource], WorkflowInformation], + output_path: str, + template_path: str, +): + """Load and render template given a data source""" + + with open(template_path) as f: + t = Template(f.read()) + + rendered_template = t.render(data_source=data_source) + + if not os.path.exists(os.path.dirname(output_path)): + os.makedirs(os.path.dirname(output_path)) + + with open(output_path, "w") as f: + f.write(rendered_template) + + +def list_exposed_workflows() -> Dict[str, List[str]]: + """Call the REST API to list the workflows""" + + workflow_list = FarmvibesAiClient("http://localhost:1108/").list_workflows() + + category_dict = { + cat: [wf_name for wf_name in workflow_list if wf_name.startswith(cat)] + for cat in WF_CATEGORY_LIST + } + return category_dict + + +def parse_wf_yamls(category: str, wf_list: List[str]) -> List[WorkflowInformation]: + """Parse the wf yaml files to extract short description""" + parsedList = [] + + client = FarmvibesAiClient("http://localhost:1108/") + + for wf_name in wf_list: + wf_yaml = client.get_workflow_yaml(wf_name) + yaml_dict = yaml.safe_load(wf_yaml) + wf_spec = WorkflowParser.parse_dict(yaml_dict) + + wf_md_link = os.path.relpath( + path=os.path.join(WF_YAML_OUTPUT_DIR, f"{wf_name}.md"), start=DOC_DIR + ) + + wf_name = format_wf_name(wf_name, category) + + parsedList.append( + WorkflowInformation( + name=wf_name, + description=wf_spec.description.short_description, + markdown_link=wf_md_link, + yaml=wf_yaml, + mermaid_diagram="", + ) + ) + + return sorted(parsedList, key=lambda x: x.name) + + +def build_workflow_list(): + """Build the worflow list page from the client""" + data_source: List[TemplateDataSource] = [] + + # List workflows in the REST API + wf_per_category = list_exposed_workflows() + + # For each workflow, parse the yaml and get description + for category, wf_list in wf_per_category.items(): + data_source.append( + TemplateDataSource(category=category, wf_list=parse_wf_yamls(category, wf_list)) + ) + + render_template(data_source, WF_LIST_OUTPUT_PATH, WF_LIST_TEMPLATE_PATH) + + +def build_workflow_yamls(): + """Build the workflow yaml pages from the client""" + client = FarmvibesAiClient("http://localhost:1108/") + + for wf_name in client.list_workflows(): + wf_yaml = client.get_workflow_yaml(wf_name) + yaml_dict = yaml.safe_load(wf_yaml) + wf_spec = WorkflowParser.parse_dict(yaml_dict) + + description = client.describe_workflow(wf_name)["description"] + + wf_yaml_output_path = os.path.join(WF_YAML_OUTPUT_DIR, f"{wf_name}.md") + if not os.path.exists(os.path.dirname(wf_yaml_output_path)): + os.makedirs(os.path.dirname(wf_yaml_output_path)) + + data_source = WorkflowInformation( + name=wf_name, + description=description, + markdown_link="", + yaml=wf_yaml, + mermaid_diagram=wf_spec.to_mermaid(), + ) + + render_template(data_source, wf_yaml_output_path, WF_YAML_TEMPLATE_PATH) + + +def main(): + build_workflow_list() + build_workflow_yamls() + + +if __name__ == "__main__": + main() diff --git a/resources/documentation_generation/templates/datatype_hierarchy_template.md b/resources/documentation_generation/templates/datatype_hierarchy_template.md new file mode 100644 index 00000000..1804638b --- /dev/null +++ b/resources/documentation_generation/templates/datatype_hierarchy_template.md @@ -0,0 +1,6 @@ + +
+ +{{mermaid_diagram}} + +
diff --git a/resources/documentation_generation/templates/list_notebook_template.md b/resources/documentation_generation/templates/list_notebook_template.md new file mode 100644 index 00000000..4b5965a7 --- /dev/null +++ b/resources/documentation_generation/templates/list_notebook_template.md @@ -0,0 +1,37 @@ +# Notebooks + +We present a complete list of the notebooks available in FarmVibes.AI with a short summary for each of them. Besides their description, we also include the expected disk space and running time required per notebook, considering the recommended VM size. + +
+ +--------------- + + +## Summary + +We organize available notebooks in the following topics: + +{% for tag_tuple, nb_list in tag_data_source -%} + +
+ {{tag_tuple[1]}} + +{% for nb in nb_list %}- [`{{nb.name}}` 📓]({{nb.repo_path}}) + +{% endfor %} +
+{% endfor %} + + + +
+ +--------------- + + +## Notebooks description + +{% for nb in data_source %}- [`{{nb.name}}` 📓]({{nb.repo_path}}) {%if nb.disk_time_req %} {{nb.disk_time_req}} {% endif %}: {{nb.description}} + +{% endfor %} + diff --git a/resources/documentation_generation/templates/list_workflow_template.md b/resources/documentation_generation/templates/list_workflow_template.md new file mode 100644 index 00000000..e3270602 --- /dev/null +++ b/resources/documentation_generation/templates/list_workflow_template.md @@ -0,0 +1,23 @@ +# Workflow List + +We group FarmVibes.AI workflows in the following categories: + +- **Data Ingestion**: workflows that download and preprocess data from a particular source, preparing data to be the starting point for most of the other workflows in the platform. +This includes raw data sources (e.g., Sentinel 1 and 2, LandSat, CropDataLayer) as well as the SpaceEye cloud-removal model; +- **Data Processing**: workflows that transform data into different data types (e.g., computing NDVI/MSAVI/Methane indexes, aggregating mean/max/min statistics of rasters, timeseries aggregation); +- **FarmAI**: composed workflows (data ingestion + processing) whose outputs enable FarmAI scenarios (e.g., predicting conservation practices, estimating soil carbon sequestration, identifying methane leakage); +- **ForestAI**: composed workflows (data ingestion + processing) whose outputs enable ForestAI scenarios (e.g., detecting forest change, estimating forest extent); +- **ML**: machine learning-related workflows to train, evaluate, and infer models within the FarmVibes.AI platform (e.g., dataset creation, inference); + +Below is a list of all available workflows within the FarmVibes.AI platform. For each of them, we provide a brief description and a link to the corresponding documentation page. + +--------- + +{% for elem in data_source -%} + +## {{elem.category}} + +{% for wf in elem.wf_list %}- [`{{wf.name}}` 📄]({{wf.markdown_link}}): {{wf.description}} + +{% endfor %} +{% endfor %} diff --git a/resources/documentation_generation/templates/workflow_yaml_template.md b/resources/documentation_generation/templates/workflow_yaml_template.md new file mode 100644 index 00000000..731da38f --- /dev/null +++ b/resources/documentation_generation/templates/workflow_yaml_template.md @@ -0,0 +1,51 @@ +# {{data_source.name}} + +{{data_source.description.short_description}} {{data_source.description.long_description}} + +```{mermaid} + {{data_source.mermaid_diagram}} +``` + +## Sources + +{% for source_name, source_desc in data_source.description.inputs.items() -%} + +- **{{source_name}}**: {{source_desc}} + +{% endfor -%} + +## Sinks + +{% for sink_name, sink_desc in data_source.description.outputs.items() -%} + +- **{{sink_name}}**: {{sink_desc}} + +{% endfor -%} + +{% if data_source.description.parameters -%} +## Parameters + +{% for param_name, param_desc in data_source.description.parameters.items() -%} + +- **{{param_name}}**: {% if param_desc is string %}{{param_desc}}{% else %}{{param_desc[0]}}{% endif %} + +{% endfor -%} +{% endif -%} + +{% if data_source.description.task_descriptions -%} +## Tasks + +{% for task_name, task_desc in data_source.description.task_descriptions.items() -%} + +- **{{task_name}}**: {{task_desc}} + +{% endfor -%} +{% endif -%} + +## Workflow Yaml + +```yaml + +{{data_source.yaml}} + +``` diff --git a/resources/envs/dev.yaml b/resources/envs/dev.yaml new file mode 100644 index 00000000..2ceafa63 --- /dev/null +++ b/resources/envs/dev.yaml @@ -0,0 +1,10 @@ +channels: + - conda-forge +dependencies: + - ruff + - pytest + - pytest-azurepipelines + - pytest-cov + - nodejs + - Jinja2~=3.1.4 + - cryptography>=42.0.0 diff --git a/resources/envs/rest-api_orchestrator.yml b/resources/envs/rest-api_orchestrator.yml new file mode 100644 index 00000000..a425e437 --- /dev/null +++ b/resources/envs/rest-api_orchestrator.yml @@ -0,0 +1,8 @@ +name: base +channels: + - conda-forge +dependencies: + - python=3.11.* + - pip + - wheel + - curl diff --git a/resources/envs/services-requirements.txt b/resources/envs/services-requirements.txt new file mode 100644 index 00000000..4852290f --- /dev/null +++ b/resources/envs/services-requirements.txt @@ -0,0 +1,15 @@ +grpcio~=1.53.0 +dapr~=1.13.0 +dapr-ext-grpc~=1.12.0 +fastapi-versioning~=0.10.0 +pystac[validation]~=1.1.0 +strenum~=0.4.7 +requests~=2.32.0 +uvicorn~=0.13.4 +pyyaml~=6.0.1 +debugpy~=1.8.1 +shapely>=1.7.1 +fastapi~=0.97.0 +fastapi_utils~=0.2.1 +pydantic~=1.8.2 +cryptography>=42.0.0 diff --git a/resources/envs/worker-requirements.txt b/resources/envs/worker-requirements.txt new file mode 100644 index 00000000..b0a04891 --- /dev/null +++ b/resources/envs/worker-requirements.txt @@ -0,0 +1,68 @@ +adlfs~=2022.10.0 +ambient-api==1.5.6 +azure-cosmos~=4.2.0 +azure-identity~=1.14.0 +azure-keyvault>=4.1.0 +azure-storage-blob>=12.5.0 +cdsapi==0.5.1 +pooch<1.5.0 +cfgrib~=0.9.10.4 +jsonschema +cryptography>=42.0.0 +dapr~=1.13.0 +dapr-ext-grpc~=1.12.0 +dask[dataframe]==2024.4.1 +debugpy~=1.8.1 +einops==0.4.1 +fastapi~=0.97.0 +fastapi-versioning~=0.10.0 +fastapi_utils~=0.2.1 +fiona~=1.8.0 +folium~=0.12.0 +fonttools~=4.43.0 +fsspec~=2024.3.1 +grpcio~=1.53.0 +h5py~=3.10.0 +herbie-data~=2022.9.0.post1 +importlib-resources==5.2.2 +ipython~=8.10 +Jinja2~=3.1.4 +matplotlib~=3.8.0 +msal~=1.22.0 +netcdf4==1.6.5 +onnxruntime~=1.17.1 +orjson~=3.9.15 +osmnx~=1.2.2 +owslib~=0.30.0 +pandas~=2.2.1 +pebble~=4.6.3 +pillow~=10.2.0 +pint~=0.23 +planetary-computer~=0.4.5 +protlearn==0.0.3 +pydantic~=1.8.2 +pydap==3.2.2 +pyngrok~=7.1.2 +pysmb==1.2.9.1 +pystac~=1.6.0 +pystac-client~=0.3.2 +PyYAML~=6.0.1 +rasterio~=1.2 +retrying~=1.3.3 +rio-cogeo~=3.4.1 +rioxarray~=0.15.1 +s2cloudless~=1.5.0 +scikit-gstat~=1.0.12 +scikit-image~=0.22.0 +scikit-learn~=1.1.0 +shapely>=1.7.1 +spyndex==0.4.0 +strenum~=0.4.7 +timezonefinder==6.2.0 +tqdm~=4.66.3 +typing-extensions~=4.7.1 +uvicorn~=0.14.0 +xarray~=2022.11.0 +xlrd~=2.0.1 +xmltodict==0.13.0 +zarr~=2.13.3 diff --git a/resources/envs/worker.yml b/resources/envs/worker.yml new file mode 100644 index 00000000..e382dc24 --- /dev/null +++ b/resources/envs/worker.yml @@ -0,0 +1,11 @@ +channels: + - pytorch + - conda-forge +dependencies: + - python=3.11.* + - libkml + - eccodes + - python-eccodes + - numpy + - wheel + - pip diff --git a/scripts/setup_python_develop_env.sh b/scripts/setup_python_develop_env.sh new file mode 100644 index 00000000..9785bbfd --- /dev/null +++ b/scripts/setup_python_develop_env.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +SCRIPTFILE=$(readlink -f "$0") +SCRIPTPATH=$(dirname "$SCRIPTFILE") +ROOTDIR=$(realpath $SCRIPTPATH/..) +DEV_ENV_FILE=$ROOTDIR/resources/envs/dev.yaml + +conda env update -f $DEV_ENV_FILE + +# Installing internal packages +terravibes_packages="vibe_core vibe_common vibe_agent vibe_server vibe_lib vibe_dev" +for package in $terravibes_packages; do + echo Installing package $package + pip install -e $ROOTDIR/src/$package +done \ No newline at end of file diff --git a/src/tests/__init__.py b/src/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/tests/benchmark/test_spaceeye_ops.py b/src/tests/benchmark/test_spaceeye_ops.py new file mode 100644 index 00000000..d4cb92db --- /dev/null +++ b/src/tests/benchmark/test_spaceeye_ops.py @@ -0,0 +1,71 @@ +import os +import time +from typing import List, cast + +import pytest + +from vibe_core.data import BaseVibeDict, DataVibe +from vibe_core.testing.comparison import assert_all_close +from vibe_dev.testing.op_tester import OpTester, ReferenceRetriever + +HERE = os.path.dirname(os.path.abspath(__file__)) +OPS_DIR = os.path.join(HERE, "..", "..", "..", "ops") +FILES_DIR = "/tmp/op_references/" +TEST_OPS = [ + "compute_cloud_prob", + "compute_sentinel_shadow", + "download_sentinel_1", + "download_sentinel_2_from_gcp", + "download_sentinel_2_from_pc", + "filter_items", + "list_sentinel_1_products", + "list_sentinel_2_L1C", + "list_sentinel_2_L2A", + "merge_cloud_masks", + "merge_sentinel1_orbits", + "merge_sentinel_orbits", + "preprocess_sentinel1", + "preprocess_sentinel2", +] +OP_YAML_DIR = { + "list_sentinel_2_L1C": "list_sentinel_2_products", + "list_sentinel_2_L2A": "list_sentinel_2_products", +} + + +@pytest.fixture +def reference_retriever(): + return ReferenceRetriever(FILES_DIR) + + +@pytest.fixture +def op_tester(request: pytest.FixtureRequest): + op_name: str = request.param # type: ignore + op_dir = OP_YAML_DIR.get(op_name, op_name) + op_config_path = os.path.join(OPS_DIR, op_dir, f"{op_name}.yaml") + return OpTester(op_config_path) + + +@pytest.fixture +def test_data(request: pytest.FixtureRequest, reference_retriever: ReferenceRetriever): + op_name = request.param # type: ignore + return reference_retriever.retrieve(op_name) + + +@pytest.mark.parametrize("op_tester,test_data", [(t, t) for t in TEST_OPS], indirect=True) +def test_op_outputs(op_tester: OpTester, test_data: List[List[BaseVibeDict]]): + for input_data, expected_output in test_data: + start = time.time() + op_output = op_tester.run(**input_data) + end = time.time() + for name, out in op_output.items(): + expected = expected_output[name] + if isinstance(expected, list): + sort_expected = sorted(expected, key=lambda x: x.time_range[0]) + sort_out = sorted(cast(List[DataVibe], out), key=lambda x: x.time_range[0]) + for o1, o2 in zip(sort_expected, sort_out): + assert_all_close(o1, o2) + else: + assert isinstance(out, DataVibe) + assert_all_close(expected, out) + print(f"Spent {end - start}s on op: {op_tester.op.name}") diff --git a/src/tests/conftest.py b/src/tests/conftest.py new file mode 100644 index 00000000..927a27c6 --- /dev/null +++ b/src/tests/conftest.py @@ -0,0 +1,13 @@ +import pytest + +from vibe_dev.testing import anyio_backend # type: ignore # noqa +from vibe_dev.testing.fake_workflows_fixtures import fake_ops_dir, fake_workflows_dir # noqa +from vibe_dev.testing.storage_fixtures import * # type: ignore # noqa: F403, F401 +from vibe_dev.testing.storage_fixtures import TEST_STORAGE # noqa: F401 +from vibe_dev.testing.utils import WorkflowTestHelper +from vibe_dev.testing.workflow_fixtures import SimpleStrData, workflow_run_config # noqa + + +@pytest.fixture(scope="session") +def workflow_test_helper(): + return WorkflowTestHelper() diff --git a/src/tests/test_notebooks.py b/src/tests/test_notebooks.py new file mode 100644 index 00000000..c2d254fb --- /dev/null +++ b/src/tests/test_notebooks.py @@ -0,0 +1,39 @@ +import json +import os +from typing import List + +import pytest + +HERE = os.path.dirname(os.path.abspath(__file__)) +PROJECT_DIR = os.path.abspath(os.path.join(HERE, "..", "..")) +NOTEBOOK_DIR = os.path.abspath(os.path.join(PROJECT_DIR, "notebooks")) +WIKI_URL = ( + "https://dev.azure.com/ResearchForIndustries/EYWA/_wiki/wikis/EYWA.wiki/214/Notebook-Metadata" +) + + +def list_notebooks() -> List[str]: + notebook_list: List[str] = [] + + for folder, _, nb_files in os.walk(NOTEBOOK_DIR): + for nb_file in nb_files: + if nb_file.endswith(".ipynb"): + nb_path = os.path.abspath(os.path.join(folder, nb_file)) + notebook_list.append(nb_path) + + return notebook_list + + +@pytest.mark.parametrize("notebook_path", list_notebooks()) +def test_workflows_description(notebook_path: str): + """Test that all notebooks have name, description and tags metadata""" + with open(notebook_path) as f: + nb_json = json.load(f) + + nb_metadata = nb_json["metadata"] + assert "name" in nb_metadata, f"Missing 'name' metadata, refer to {WIKI_URL}" + assert "description" in nb_metadata, f"Missing 'description' metadata, refer to {WIKI_URL}" + assert "disk_space" in nb_metadata, f"Missing disk space requirements, refer to {WIKI_URL}" + assert "running_time" in nb_metadata, f"Missing expected running time, refer to {WIKI_URL}" + assert "tags" in nb_metadata, f"Missing tags, refer to {WIKI_URL}" + assert len(nb_metadata["tags"]) > 0, f"Tag list is empty, refer to {WIKI_URL}" diff --git a/src/tests/test_op_workflows_integration.py b/src/tests/test_op_workflows_integration.py new file mode 100644 index 00000000..f00d58a1 --- /dev/null +++ b/src/tests/test_op_workflows_integration.py @@ -0,0 +1,312 @@ +import os +import shutil +import tempfile +from dataclasses import asdict +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +import pytest +import yaml + +from vibe_agent.ops import OperationFactoryConfig +from vibe_agent.storage import LocalFileAssetManagerConfig, LocalStorageConfig +from vibe_common.secret_provider import AzureSecretProviderConfig +from vibe_core.data.core_types import BaseVibe, DataVibe, OpIOType +from vibe_core.data.utils import StacConverter, get_base_type, serialize_stac +from vibe_dev.local_runner import LocalWorkflowRunner +from vibe_dev.testing.fake_workflows_fixtures import FakeType, get_fake_workflow_path +from vibe_server.workflow import list_workflows +from vibe_server.workflow.description_validator import WorkflowDescriptionValidator +from vibe_server.workflow.runner import ( + NoOpStateChange, + WorkflowCallback, + WorkflowChange, + WorkflowRunner, +) +from vibe_server.workflow.runner.task_io_handler import WorkflowIOHandler +from vibe_server.workflow.spec_parser import WorkflowParser, get_workflow_dir +from vibe_server.workflow.spec_parser import parse_edge_string as pes +from vibe_server.workflow.workflow import Workflow, load_workflow_by_name + +HERE = os.path.dirname(os.path.abspath(__file__)) + + +def serialize(base: BaseVibe): + return serialize_stac(StacConverter().to_stac_item(base)) # type: ignore + + +def gen_local_runner( + storage_spec: Any, + workflow_path: str, + fake_ops_path: str, + workflows_path: str, + callback: WorkflowCallback = NoOpStateChange, +) -> WorkflowRunner: + factory_spec = OperationFactoryConfig(storage_spec, AzureSecretProviderConfig()) + workflow = Workflow.build(workflow_path, fake_ops_path, workflows_path) + io_mapper = WorkflowIOHandler(workflow) + return LocalWorkflowRunner.build( + workflow, + factory_spec=factory_spec, + io_mapper=io_mapper, + update_state_callback=callback, + max_tries=5, + ) + + +def build_workflow_runner( + tmp_path: Path, + workflow_path: str, + fake_ops_path: str, + workflows_path: str, + callback: WorkflowCallback = NoOpStateChange, +) -> WorkflowRunner: + tmp_asset_path = os.path.join(str(tmp_path), "assets") + storage_spec = LocalStorageConfig( + local_path=str(tmp_path), asset_manager=LocalFileAssetManagerConfig(tmp_asset_path) + ) + return gen_local_runner( + storage_spec, workflow_path, fake_ops_path, workflows_path, callback=callback + ) + + +@pytest.mark.parametrize("workflow_name", list_workflows()) +def test_workflows_load(workflow_name: str): + workflow = load_workflow_by_name(workflow_name) + assert not workflow.has_cycle() + + +@pytest.mark.parametrize( + "workflow_name", [wf_name for wf_name in list_workflows() if not wf_name.startswith("private/")] +) +def test_workflows_description(workflow_name: str): + workflow_dir = get_workflow_dir() + workflow_path = os.path.join(workflow_dir, f"{workflow_name}.yaml") + workflow_spec = WorkflowParser.parse(workflow_path) + WorkflowDescriptionValidator.validate(workflow_spec) + + +@pytest.mark.parametrize("workflow_name", list_workflows()) +def test_list_workflows_schema_generation(workflow_name: str): + workflow = load_workflow_by_name(workflow_name) + ret: Dict[str, Any] = { + k: get_base_type(v).schema() + for k, v in workflow.inputs_spec.items() # type: ignore + } + assert ret + + +def strip_edges_and_nodes_from_workflow( + tmp_path: Path, + workflow_path: str, + fake_ops_path: str, + workflows_path: str, + strip_sinks: bool = False, + tasks_to_keep: int = 1, + del_edges: bool = False, +) -> WorkflowRunner: + base = WorkflowParser.parse(workflow_path, fake_ops_path, workflows_path) + + if len(base.tasks) > tasks_to_keep: + must_exist = [t for i, t in enumerate(base.tasks.keys()) if i < tasks_to_keep] + base.tasks = {m: base.tasks[m] for m in must_exist} + base.sinks = {e.origin: e.origin for e in base.edges if pes(e.origin)[0] in must_exist} + base.edges = [] + base.sources = {k: v for i, (k, v) in enumerate(base.sources.items()) if i < 1} + + if strip_sinks: + base.sinks = {} + + if del_edges: + base.edges = [] # type: ignore + + tasks = {k: v.to_dict() for k, v in base.tasks.items()} + base = asdict(base) + base["tasks"] = tasks + + tmp = tempfile.NamedTemporaryFile("w", delete=False) + yaml.dump(base, tmp) # type: ignore + tmp.close() + + try: + return build_workflow_runner(tmp_path, tmp.name, fake_ops_path, workflows_path) + finally: + os.unlink(tmp.name) + + +def test_no_sinks_workflow( + tmp_path: Path, + fake_ops_dir: str, + fake_workflows_dir: str, +): + with pytest.raises(ValueError): + strip_edges_and_nodes_from_workflow( + tmp_path, + get_fake_workflow_path("nested_workflow"), + fake_ops_dir, + fake_workflows_dir, + True, + ) + + +def test_degenerate_workflow(tmp_path: Path, fake_ops_dir: str, fake_workflows_dir: str): + with pytest.raises(ValueError): + # For the reader that might be asking what is going on here, + # we will end up with a two-node workflow that only has a + # single source. The idea of supporting "single" operation + # workflows is that all operations are sources and sinks. + # So, if that's not the case, then edges are required. + strip_edges_and_nodes_from_workflow( + tmp_path, + get_fake_workflow_path("nested_workflow"), + fake_ops_dir, + fake_workflows_dir, + tasks_to_keep=2, + del_edges=True, + ) + + +@pytest.mark.anyio +async def test_arbitrary_input( + tmp_path: Path, + fake_ops_dir: str, + fake_workflows_dir: str, +): + runner = build_workflow_runner( + tmp_path, get_fake_workflow_path("str_input"), fake_ops_dir, fake_workflows_dir + ) + user_input = FakeType("fake workflow execution") + out = await runner.run({k: serialize(user_input) for k in runner.workflow.inputs_spec}) + for outname in runner.workflow.output_spec: + assert outname in out + + +@pytest.mark.parametrize("workflow_name", ["nested_workflow", "workflow_inception"]) +@pytest.mark.anyio +async def test_composable_workflow( + workflow_name: str, + tmp_path: Path, + fake_ops_dir: str, + fake_workflows_dir: str, +): + user_input = FakeType("fake workflow execution") + + runner = build_workflow_runner( + tmp_path, get_fake_workflow_path(workflow_name), fake_ops_dir, fake_workflows_dir + ) + out = await runner.run({k: serialize(user_input) for k in runner.workflow.inputs_spec}) + for outname in runner.workflow.output_spec: + assert outname in out + + +@pytest.mark.anyio +async def test_ordered_times_in_workflow( + tmp_path: Path, + fake_ops_dir: str, + fake_workflows_dir: str, +): + state: Dict[str, Tuple[WorkflowChange, datetime]] = {} + + runner = build_workflow_runner( + tmp_path, get_fake_workflow_path("nested_workflow"), fake_ops_dir, fake_workflows_dir + ) + await runner.run({k: serialize(FakeType("test")) for k in runner.workflow.inputs_spec}) + + previous = None + for task in (t for t in state.keys() if t.startswith("t")): + if previous is None: + previous = state[task] + continue + assert previous[-1] < state[task][-1] + + +@pytest.mark.anyio +async def test_fan_out_single_element(tmp_path: Path, fake_ops_dir: str, fake_workflows_dir: str): + spec = WorkflowParser.parse( + get_fake_workflow_path("fan_out_and_in"), fake_ops_dir, fake_workflows_dir + ) + tmp_asset_path = os.path.join(str(tmp_path), "assets") + storage_spec = LocalStorageConfig( + local_path=str(tmp_path), asset_manager=LocalFileAssetManagerConfig(tmp_asset_path) + ) + factory_spec = OperationFactoryConfig(storage_spec, AzureSecretProviderConfig()) + for num_items in (1, 5): + spec.tasks["to_list"].parameters["num_items"] = num_items + workflow = Workflow(spec) + io_mapper = WorkflowIOHandler(workflow) + runner = LocalWorkflowRunner.build( + workflow, + io_mapper=io_mapper, + factory_spec=factory_spec, + ) + converter = StacConverter() + x = DataVibe( + "input", + time_range=(datetime.now(), datetime.now()), + geometry={"type": "Point", "coordinates": [0.0, 0.0]}, + assets=[], + ) + out = await runner.run({"input": serialize_stac(converter.to_stac_item(x))}) + shutil.rmtree(tmp_path) # Delete the cache + assert all(len(o) == num_items for o in out.values()) + + +@pytest.mark.anyio +async def test_gather_not_parallel(tmp_path: Path, fake_ops_dir: str, fake_workflows_dir: str): + runner = build_workflow_runner( + tmp_path, get_fake_workflow_path("item_gather"), fake_ops_dir, fake_workflows_dir + ) + converter = StacConverter() + x = DataVibe( + "input", + time_range=(datetime.now(), datetime.now()), + geometry={"type": "Point", "coordinates": [0.0, 0.0]}, + assets=[], + ) + out = await runner.run( + {k: serialize_stac(converter.to_stac_item(x)) for k in runner.workflow.inputs_spec} + ) + assert len(out) == 1 + + +# TODO: Restore "remote" storage_spec after fixing CosmosDB permissions +@pytest.mark.parametrize("storage_spec", ["local"], indirect=True) +@pytest.mark.anyio +async def test_op_run_race_condition(storage_spec: Any, fake_ops_dir: str, fake_workflows_dir: str): + runner = gen_local_runner( + storage_spec, get_fake_workflow_path("workflow_inception"), fake_ops_dir, fake_workflows_dir + ) + user_input = FakeType("fake workflow execution") + await runner.run({k: serialize(user_input) for k in runner.workflow.inputs_spec}) + + +@pytest.mark.parametrize("edges", [None, []]) +def test_parser_loads_workflow_with_no_edges( + edges: List[Optional[List[Any]]], fake_ops_dir: str, fake_workflows_dir: str +) -> None: + workflow_dict = WorkflowParser._load_workflow(get_fake_workflow_path("fan_out_and_in")) + workflow_dict["edges"] = edges + WorkflowParser.parse_dict(workflow_dict, fake_ops_dir, fake_workflows_dir) + + +@pytest.mark.anyio +async def test_running_workflow_with_basevibe_edges( + tmp_path: Path, + fake_ops_dir: str, # noqa + fake_workflows_dir: str, # noqa + SimpleStrData: Any, +): + data = StacConverter().to_stac_item(SimpleStrData("🍔")) # type: ignore + wf_input: OpIOType = {"input": serialize_stac(data)} + + tmp_asset_path = os.path.join(str(tmp_path), "assets") + storage_spec = LocalStorageConfig( + local_path=str(tmp_path), asset_manager=LocalFileAssetManagerConfig(tmp_asset_path) + ) + + runner = gen_local_runner( + storage_spec, get_fake_workflow_path("base_base"), fake_ops_dir, fake_workflows_dir + ) + out = await runner.run(wf_input) + assert out diff --git a/src/tests/test_ops_building.py b/src/tests/test_ops_building.py new file mode 100644 index 00000000..989d7889 --- /dev/null +++ b/src/tests/test_ops_building.py @@ -0,0 +1,36 @@ +import os +from pathlib import Path +from typing import List + +import pytest + +from vibe_agent.ops import OperationFactory +from vibe_agent.storage.asset_management import LocalFileAssetManager +from vibe_common.constants import DEFAULT_OPS_DIR +from vibe_common.secret_provider import AzureSecretProvider +from vibe_dev.testing.op_tester import FakeStorage + + +@pytest.fixture +def fake_storage(tmp_path: Path) -> FakeStorage: + asset_manager = LocalFileAssetManager(str(tmp_path)) + storage = FakeStorage(asset_manager) + return storage + + +def test_all_ops_pass_sanity_check(fake_storage: FakeStorage): + not_sane = [FileNotFoundError, RuntimeError] + factory = OperationFactory(fake_storage, AzureSecretProvider()) + failures: List[str] = [] + for dirpath, _, filenames in os.walk(DEFAULT_OPS_DIR): + for fn in filenames: + if not fn.endswith(".yaml"): + continue + path = os.path.join(dirpath, fn) + try: + factory.build(path) + except Exception as e: + if any([isinstance(e, n) for n in not_sane]): + failures.append(fn) + print(f"Failed to build op {fn} due to {type(e)}: {e}") + assert not failures, f"Failed to build the following op(s): {', '.join(failures)}" diff --git a/src/tests/test_rest_api.py b/src/tests/test_rest_api.py new file mode 100644 index 00000000..77548ebb --- /dev/null +++ b/src/tests/test_rest_api.py @@ -0,0 +1,329 @@ +from dataclasses import asdict +from typing import Any, Dict, List, Optional, Tuple, Union, cast +from unittest.mock import MagicMock, patch +from uuid import uuid4 as uuid + +import pytest +import requests +from fastapi.testclient import TestClient + +from vibe_common.constants import CONTROL_STATUS_PUBSUB, WORKFLOW_REQUEST_PUBSUB_TOPIC +from vibe_common.messaging import WorkflowCancellationMessage +from vibe_common.statestore import StateStore +from vibe_core.data.core_types import InnerIOType +from vibe_core.data.utils import StacConverter, deserialize_stac +from vibe_core.datamodel import RunConfig, RunConfigInput, RunDetails, RunStatus +from vibe_server.href_handler import BlobHrefHandler, LocalHrefHandler +from vibe_server.server import TerravibesAPI, TerravibesProvider +from vibe_server.workflow.input_handler import build_args_for_workflow +from vibe_server.workflow.workflow import load_workflow_by_name + + +@pytest.fixture +def request_client(): + href_handler = LocalHrefHandler("/tmp") + terravibes_app = TerravibesAPI(href_handler) + client = TestClient(terravibes_app.versioned_wrapper) + yield client + + +@pytest.fixture +def request_client_with_blob(): + href_handler = BlobHrefHandler() + terravibes_app = TerravibesAPI(href_handler) + client = TestClient(terravibes_app.versioned_wrapper) + yield client + + +def test_list_workflows(request_client: requests.Session): + url = "/v0/workflows" + response = request_client.get(url) + + assert response.status_code == 200 + assert isinstance(response.json(), list) + assert len(response.json()) > 0 + + for wfname in response.json(): + response = request_client.get(f"{url}/{wfname}") + assert response.status_code == 200, (wfname, response.text) + assert isinstance(response.json(), dict) + fields = "name inputs outputs parameters description" + for k in response.json(): + assert k in fields + + +def test_get_workflow_schema(request_client: requests.Session): + url = "/v0/workflows" + response = request_client.get(url) + workflow = response.json()[0] + url = f"{url}/{workflow}" + response = request_client.get(url).json() + assert isinstance(response, dict) + assert all(k in response for k in ("name", "inputs", "outputs", "parameters", "description")) + assert isinstance(response["name"], str) + assert isinstance(response["inputs"], dict) + assert isinstance(response["outputs"], dict) + assert isinstance(response["parameters"], dict) + assert isinstance(response["description"], dict) + assert sorted(response["parameters"]) == sorted(response["description"]["parameters"]) + + +def test_generate_api_documentation_page(request_client: requests.Session): + response = request_client.get("/v0/docs") + assert response.status_code == 200 + openapi_json = request_client.get("/v0/openapi.json") + assert openapi_json.status_code == 200 + + +@pytest.mark.parametrize("params", [None, {"param1": "new_param"}]) +@patch("vibe_server.server.send", return_value="OK") +@patch.object(StateStore, "transaction") +@patch.object(StateStore, "retrieve", side_effect=lambda _: []) +@patch.object(StateStore, "retrieve_bulk", side_effect=lambda _: []) +def test_workflow_submission( + retrieve_bulk: MagicMock, + retrieve: MagicMock, + transaction: MagicMock, + send: MagicMock, + workflow_run_config: Dict[str, Any], + params: Dict[str, Any], + request_client: requests.Session, +): + workflow_run_config["parameters"] = params + response = request_client.post("/v0/runs", json=workflow_run_config) + send.assert_called() + assert send.call_args[0][0].content.parameters == params + + assert response.status_code == 201 + assert len(transaction.call_args.args[0]) == 2 + id = response.json()["id"] + assert transaction.call_args.args[0][0]["value"][0] == id + submitted_config = asdict(transaction.call_args.args[0][1]["value"]) + # Add some tasks here + tasks = ["task1", "task2", "task3"] + submitted_config["tasks"] = tasks + retrieve_bulk.side_effect = [[submitted_config], [asdict(RunDetails()) for _ in tasks]] + response = request_client.get(f"/v0/runs/{id}") + assert response.json()["details"]["status"] == RunStatus.pending + retrieved_task_details = response.json()["task_details"] + assert len(retrieved_task_details) == len(tasks) + assert all(retrieved_task_details[t]["status"] == RunStatus.pending for t in tasks) + + retrieve_bulk.side_effect = lambda _: [ # type: ignore + asdict(transaction.call_args.args[0][1]["value"]) + ] + response = request_client.get(f"/v0/runs/?ids={id}") + assert response.status_code == 200 + assert len(response.json()) == 1 + + +@patch.object(StateStore, "retrieve", side_effect=lambda _: []) +def test_no_workflow_runs(_, request_client: requests.Session): + response = request_client.get("/v0/runs") + assert response.status_code == 200 + assert len(response.json()) == 0 + + +def test_invalid_workflow_submission( + workflow_run_config: Dict[str, Any], request_client: requests.Session +): + workflow_run_config["workflow"] = "invalid workflow" + response = request_client.post("/v0/runs", json=workflow_run_config) + assert response.status_code == 400 + + +def test_missing_field_workflow_submission( + workflow_run_config: Dict[str, Any], request_client: requests.Session +): + del workflow_run_config["user_input"] + response = request_client.post("/v0/runs", json=workflow_run_config) + assert response.status_code == 422 + assert response.json()["detail"][0]["type"] == "type_error" + + +@patch.object(TerravibesProvider, "submit_work", side_effect=Exception("sorry")) +@patch.object(TerravibesProvider, "update_run_state") +@patch.object(TerravibesProvider, "list_runs_from_store", return_value=[]) +def test_submit_local_workflows_with_broken_work_submission( + _, __: Any, ___: Any, workflow_run_config: Dict[str, Any], request_client: requests.Session +): + response = request_client.post("/v0/runs", json=workflow_run_config) + assert response.status_code == 500, response + + +@patch("vibe_server.server.send", return_value="OK") +@patch.object(TerravibesProvider, "submit_work") +@patch.object(StateStore, "transaction") +@patch.object(StateStore, "retrieve", side_effect=lambda _: []) +@patch.object(StateStore, "retrieve_bulk") +def test_workflow_submission_and_cancellation( + retrieve_bulk: MagicMock, + retrieve: MagicMock, + transaction: MagicMock, + _: MagicMock, + send: MagicMock, + workflow_run_config: Dict[str, Any], + request_client: requests.Session, +): + response = request_client.post("/v0/runs", json=workflow_run_config) + assert response.status_code == 201 + assert len(transaction.call_args.args[0]) == 2 + id = response.json()["id"] + assert transaction.call_args.args[0][0]["value"][0] == id + + response = request_client.post(f"/v0/runs/{id}/cancel") + assert response.status_code == 202 + assert len(transaction.call_args.args[0]) == 2 + message = send.call_args.args[0] + assert isinstance(message, WorkflowCancellationMessage) + assert str(message.run_id) == id + + send.assert_called_with( + message, "rest-api", CONTROL_STATUS_PUBSUB, WORKFLOW_REQUEST_PUBSUB_TOPIC + ) + + +@pytest.mark.parametrize("params", [None, {"param1": "new_param"}]) +@patch.object(TerravibesProvider, "submit_work") +@patch.object(TerravibesProvider, "update_run_state") +@patch.object(StateStore, "retrieve") +@patch.object(StateStore, "retrieve_bulk", side_effect=lambda _: []) +def test_workflow_resubmission( + retrieve_bulk: MagicMock, + retrieve: MagicMock, + update_run_state: MagicMock, + submit_work: MagicMock, + params: Optional[Dict[str, Any]], + workflow_run_config: Dict[str, Any], + request_client: requests.Session, +): + submitted_runs: List[RunConfig] = [] + first_run = {} + + def submit_work_effect(run: RunConfig): + nonlocal submitted_runs + submitted_runs.append(run) + + def update_run_state_effect(run_ids: List[str], new_run: RunConfig): + nonlocal first_run + first_run = asdict(new_run) + + submit_work.side_effect = submit_work_effect + update_run_state.side_effect = update_run_state_effect + + workflow_run_config["parameters"] = params + response = request_client.post("/v0/runs", json=workflow_run_config) + assert response.status_code == 201 + + retrieve.side_effect = [first_run, []] + response = request_client.post(f"/v0/runs/{uuid()}/resubmit") + + assert response.status_code == 201 + r1, r2 = submitted_runs + for p in ("workflow", "user_input", "parameters", "name"): + assert getattr(r1, p) == getattr(r2, p) + assert r1.id != r2.id + + +@patch.object(StateStore, "retrieve") +def test_resubmission_of_missing_run(retrieve: MagicMock, request_client: requests.Session): + def retrieve_effect(_): + raise KeyError() + + retrieve.side_effect = retrieve_effect + response = request_client.post(f"/v0/runs/{uuid()}/resubmit") + assert response.status_code == 404 + + +@patch.object(StateStore, "retrieve") +def test_cancelling_missing_run(retrieve: MagicMock, request_client: requests.Session): + def retrieve_effect(_): + raise KeyError() + + retrieve.side_effect = retrieve_effect + + response = request_client.post(f"/v0/runs/{uuid()}/cancel") + assert response.status_code == 404 + + +def test_getting_schema_of_missing_workflow(request_client: requests.Session): + response = request_client.get("/v0/workflows/i-don't-exist") + assert response.status_code == 404 + + +def test_build_args_for_workflow_generates_valid_output(workflow_run_config: Dict[str, Any]): + run_config = RunConfigInput(**workflow_run_config) + inputs = load_workflow_by_name(cast(str, run_config.workflow)).inputs_spec + out = build_args_for_workflow(run_config.user_input, list(inputs)) + + def genitems(values: Union[InnerIOType, List[InnerIOType]]): + if isinstance(values, list): + for e in values: + yield deserialize_stac(e) + else: + yield deserialize_stac(values) + + converter = StacConverter() + for v in genitems([v for v in out.values()]): + assert converter.from_stac_item(v) is not None + + +@pytest.mark.parametrize( + "fields_exceptions", + [ + ([], None), + (["user_input.geojson"], None), + (["user_input.geojson", "workflow"], None), + (["user_input.doesnt_exist"], KeyError), + (["something_else.doesnt_exist"], KeyError), + ], +) +def test_summarize_runs( + workflow_run_config: Dict[str, Any], fields_exceptions: Tuple[List[str], Optional[Exception]] +): + href_handler = LocalHrefHandler("/tmp") + provider = TerravibesProvider(href_handler) + fields, exception = fields_exceptions + run_config = RunConfig( + **workflow_run_config, + id=uuid(), + details=RunDetails(), + task_details={}, + spatio_temporal_json=None, + ) + if exception is not None: + with pytest.raises(exception): # type: ignore + provider.summarize_runs([run_config], fields) + else: + summary = provider.summarize_runs([run_config], fields) + print(summary) + if fields: + for field in fields: + if "doesnt" not in field: + assert field in summary[0] + + +@pytest.mark.parametrize("blob_df", [(True, type(None)), (False, int)]) +def test_system_metrics( + request_client: requests.Session, + request_client_with_blob: requests.Session, + blob_df: Tuple[bool, Any], +): + blob, df_type = blob_df + if blob: + response = request_client_with_blob.get("/v0/system-metrics") + else: + response = request_client.get("/v0/system-metrics") + + assert response.status_code == 200 + + metrics = response.json() + for field in "load_avg cpu_usage free_mem used_mem total_mem disk_free".split(): + assert field in metrics + + assert all(isinstance(v, float) for v in metrics["load_avg"]) + assert isinstance(metrics["cpu_usage"], float) + assert isinstance(metrics["free_mem"], int) + assert isinstance(metrics["used_mem"], int) + assert isinstance(metrics["total_mem"], int) + assert isinstance(metrics["disk_free"], df_type) diff --git a/src/tests/test_rest_api_client_integration.py b/src/tests/test_rest_api_client_integration.py new file mode 100644 index 00000000..6d7cae53 --- /dev/null +++ b/src/tests/test_rest_api_client_integration.py @@ -0,0 +1,241 @@ +from dataclasses import asdict +from datetime import datetime +from os.path import join as j +from typing import Any, Dict, Optional +from unittest.mock import MagicMock, patch +from uuid import UUID + +import pytest +from fastapi.testclient import TestClient +from shapely.geometry import Polygon + +from vibe_common.statestore import StateStore +from vibe_core.client import FarmvibesAiClient +from vibe_core.data import ADMAgSeasonalFieldInput +from vibe_core.datamodel import RunDetails +from vibe_server.href_handler import LocalHrefHandler +from vibe_server.orchestrator import WorkflowStateUpdate +from vibe_server.server import TerravibesAPI, TerravibesProvider +from vibe_server.workflow.runner import WorkflowChange +from vibe_server.workflow.spec_parser import WorkflowParser, get_workflow_dir + + +@pytest.fixture +def rest_client(): + href_handler = LocalHrefHandler("/tmp") + terravibes_app = TerravibesAPI(href_handler) + client = TestClient(terravibes_app.versioned_wrapper) + url_string = str(client.base_url) + rest_client = FarmvibesAiClient(url_string) + rest_client.session = client + rest_client.session.headers.update(rest_client.default_headers) + yield rest_client + + +@pytest.fixture +def the_polygon(): + return Polygon( + [ + [-88.068487, 37.058836], + [-88.036059, 37.048687], + [-88.012895, 37.068984], + [-88.026622, 37.085711], + [-88.062482, 37.081461], + [-88.068487, 37.058836], + ] + ) + + +@patch("vibe_server.server.list_existing_workflows") +@patch("vibe_server.server.TerravibesProvider.list_workflows") +@pytest.mark.anyio +async def test_list_workflows( + list_workflows: MagicMock, + list_existing_workflows: MagicMock, + rest_client: FarmvibesAiClient, +): + list_workflows.return_value = list_existing_workflows.return_value = "a/b c".split() + workflows = rest_client.list_workflows() # type: ignore + assert workflows + assert len(workflows) == len(await list_workflows()) + + +@patch.object(StateStore, "retrieve", side_effect=lambda _: []) +def test_empty_list_runs(_, rest_client: FarmvibesAiClient): + runs = rest_client.list_runs() + assert not runs + + +@pytest.mark.parametrize("workflow", ["helloworld", j(get_workflow_dir(), "helloworld.yaml")]) +@pytest.mark.parametrize("params", [None, {}, {"param1": 1}]) +@patch.object(TerravibesProvider, "submit_work") +@patch.object(StateStore, "transaction") +@patch.object(StateStore, "retrieve") +@patch.object(StateStore, "retrieve_bulk") +@patch("vibe_server.server.list_existing_workflows") +@patch("vibe_server.server.build_args_for_workflow") +@patch("vibe_server.server.validate_workflow_input") +def test_submit_run( + validate: MagicMock, + build_args: MagicMock, + list_existing_workflows: MagicMock, + retrieve_bulk: MagicMock, + retrieve: MagicMock, + transaction: MagicMock, + _: MagicMock, + rest_client: FarmvibesAiClient, + the_polygon: Polygon, + params: Optional[Dict[str, Any]], + workflow: str, + fake_ops_dir: str, +): + first_retrieve_call = True + + def retrieve_side_effect(_): + nonlocal first_retrieve_call + if first_retrieve_call: + first_retrieve_call = False + return [] + return asdict(transaction.call_args.args[0][1]["value"]) + + def bulk_side_effect(_): + return [retrieve_side_effect(_)] + + retrieve.side_effect = retrieve_side_effect + retrieve_bulk.side_effect = bulk_side_effect + + list_existing_workflows.return_value = ["a/b", "c", "helloworld"] + with patch("vibe_server.workflow.spec_parser.DEFAULT_OPS_DIR", fake_ops_dir): + run = rest_client.run( + (workflow if "yaml" not in workflow else WorkflowParser._load_workflow(workflow)), + "test-run", + geometry=the_polygon, + time_range=(datetime(2021, 2, 1), datetime(2021, 2, 2)), + parameters=params, + ) + assert run + assert run.parameters == params + build_args.assert_called() + validate.assert_called() + + +@patch.object(TerravibesProvider, "submit_work") +@patch.object(StateStore, "transaction") +@patch.object(StateStore, "retrieve") +@patch.object(StateStore, "retrieve_bulk") +def test_submit_base_vibe_run( + retrieve_bulk: MagicMock, + retrieve: MagicMock, + transaction: MagicMock, + _: MagicMock, + rest_client: FarmvibesAiClient, +): + party_id = "fake-party-id" + seasonal_field_id = "fake-seasonal-field-id" + input_data = ADMAgSeasonalFieldInput( + party_id=party_id, + seasonal_field_id=seasonal_field_id, + ) + + first_retrieve_call = True + + def retrieve_side_effect(_): + nonlocal first_retrieve_call + if first_retrieve_call: + first_retrieve_call = False + return [] + return asdict(transaction.call_args.args[0][1]["value"]) + + def bulk_side_effect(_): + return [retrieve_side_effect(_)] + + retrieve.side_effect = retrieve_side_effect + retrieve_bulk.side_effect = bulk_side_effect + + run = rest_client.run( + "data_ingestion/admag/admag_seasonal_field", + "whatever", + input_data=input_data, + ) + assert run + + +@pytest.mark.parametrize("workflow", ["helloworld", j(get_workflow_dir(), "helloworld.yaml")]) +@pytest.mark.parametrize("params", [None, {}, {"param1": 1}]) +@patch.object(TerravibesProvider, "submit_work") +@patch.object(StateStore, "transaction") +@patch.object(StateStore, "retrieve") +@patch.object(StateStore, "retrieve_bulk") +@patch("vibe_common.statestore.StateStore.store") +@patch("vibe_server.server.list_existing_workflows") +@patch("vibe_server.server.build_args_for_workflow") +@patch("vibe_server.server.validate_workflow_input") +@pytest.mark.anyio +async def test_monitor_run_with_none_datetime_fields( + validate: MagicMock, + build_args: MagicMock, + list_existing_workflows: MagicMock, + store: MagicMock, + retrieve_bulk: MagicMock, + retrieve: MagicMock, + transaction: MagicMock, + _: MagicMock, + rest_client: FarmvibesAiClient, + the_polygon: Polygon, + params: Optional[Dict[str, Any]], + workflow: str, + fake_ops_dir: str, +): + first_retrieve_call = True + run_config: Optional[Dict[str, Any]] = None + + def store_side_effect(_: Any, obj: Any): + nonlocal run_config + run_config = obj + + def retrieve_side_effect(_): + nonlocal first_retrieve_call, run_config + if first_retrieve_call: + first_retrieve_call = False + return [] + + if run_config is None: + run_config = asdict(transaction.call_args.args[0][1]["value"]) + if not run_config["task_details"]: + run_config["task_details"]["hello"] = asdict(RunDetails()) + return run_config + + def bulk_side_effect(_): + return [retrieve_side_effect(_)] + + store.side_effect = store_side_effect + retrieve.side_effect = retrieve_side_effect + retrieve_bulk.side_effect = bulk_side_effect + + list_existing_workflows.return_value = ["a/b", "c", "helloworld"] + with patch("vibe_server.workflow.spec_parser.DEFAULT_OPS_DIR", fake_ops_dir): + run = rest_client.run( + (workflow if "yaml" not in workflow else WorkflowParser._load_workflow(workflow)), + "test-run", + geometry=the_polygon, + time_range=(datetime(2021, 2, 1), datetime(2021, 2, 2)), + parameters=params, + ) + assert run + assert run.parameters == params + build_args.assert_called() + validate.assert_called() + + updater = WorkflowStateUpdate(UUID(run.id)) + await updater(WorkflowChange.WORKFLOW_STARTED, tasks=["hello"]) + + assert run.task_details + + await updater(WorkflowChange.WORKFLOW_FINISHED) + run.monitor(1, 0) + + +def test_system_metrics(rest_client: FarmvibesAiClient): + metrics = rest_client.get_system_metrics() + assert metrics + assert metrics["disk_free"] is not None diff --git a/src/tests/test_subprocess_client.py b/src/tests/test_subprocess_client.py new file mode 100644 index 00000000..1d1ce342 --- /dev/null +++ b/src/tests/test_subprocess_client.py @@ -0,0 +1,77 @@ +import os +from datetime import datetime, timezone +from typing import Tuple +from unittest.mock import Mock, patch + +import pytest +from shapely.geometry import Polygon + +from vibe_core.datamodel import RunStatus +from vibe_dev.client.subprocess_client import SubprocessClient, get_default_subprocess_client + +HERE = os.path.dirname(os.path.abspath(__file__)) + + +@pytest.fixture +def input_polygon() -> Polygon: + polygon_coords = [ + (-88.062073563448919, 37.081397673802059), + (-88.026349330507315, 37.085463858128762), + (-88.026349330507315, 37.085463858128762), + (-88.012445388773259, 37.069230099135126), + (-88.035931592028305, 37.048441375086092), + (-88.068120429075847, 37.058833638440767), + (-88.062073563448919, 37.081397673802059), + ] + + return Polygon(polygon_coords) + + +@pytest.fixture +def workflow_name() -> str: + return "helloworld" + + +@pytest.fixture +def workflow_path() -> str: + return os.path.join(HERE, "..", "..", "workflows", "helloworld.yaml") + + +@pytest.fixture +def time_range() -> Tuple[datetime, datetime]: + return ( + datetime(year=2021, month=2, day=1, tzinfo=timezone.utc), + datetime(year=2021, month=2, day=11, tzinfo=timezone.utc), + ) + + +@patch("vibe_agent.worker.Worker.is_workflow_complete", return_value=False) +@pytest.mark.anyio +async def test_local_client_with_workflow_name( + _: Mock, + input_polygon: Polygon, + workflow_name: str, + tmp_path: str, + time_range: Tuple[datetime, datetime], + capsys, # type: ignore +): + client: SubprocessClient = get_default_subprocess_client(tmp_path) + with capsys.disabled(): + output = await client.run(workflow_name, input_polygon, time_range) + assert output.status == RunStatus.done + + +@patch("vibe_agent.worker.Worker.is_workflow_complete", return_value=False) +@pytest.mark.anyio +async def test_local_client_with_workflow_path( + _: Mock, + input_polygon: Polygon, + workflow_path: str, + tmp_path: str, + time_range: Tuple[datetime, datetime], + capsys, # type: ignore +): + client: SubprocessClient = get_default_subprocess_client(tmp_path) + with capsys.disabled(): + output = await client.run(workflow_path, input_polygon, time_range) + assert output.status == RunStatus.done diff --git a/src/tests/workflows_integration/__init__.py b/src/tests/workflows_integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/tests/workflows_integration/test_helloworld_integration.py b/src/tests/workflows_integration/test_helloworld_integration.py new file mode 100644 index 00000000..cca9ecc6 --- /dev/null +++ b/src/tests/workflows_integration/test_helloworld_integration.py @@ -0,0 +1,113 @@ +import os +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional + +import pytest +from hydra_zen import MISSING, builds, instantiate +from shapely.geometry import Polygon, mapping + +from vibe_agent.storage import Storage +from vibe_common.input_handlers import gen_stac_item_from_bounds +from vibe_common.schemas import CacheInfo, ItemDict, OpRunId + +HERE = os.path.dirname(os.path.abspath(__file__)) +WORKFLOW_PATH = os.path.join(HERE, "..", "..", "..", "workflows", "helloworld.yaml") + + +class DipatchedStorage(Storage): + def __init__(self, original: Storage): + self.original = original + + def retrieve_output_from_input_if_exists(self, *args: Any): + ret = self.original.retrieve_output_from_input_if_exists(*args) + assert ret is not None + return ret + + async def retrieve_output_from_input_if_exists_async( + self, cache_info: CacheInfo, **kwargs: Any + ) -> Optional[ItemDict]: + ret = await self.original.retrieve_output_from_input_if_exists_async(cache_info, **kwargs) + assert ret is not None + return ret + + def store(self, *args: Any): + return self.original.store(*args) + + def __getattr__(self, name: str): + return getattr(self.original, name) + + def remove(self, op_run_id: OpRunId): + self.original.remove(op_run_id) + + +PatchedStorageConfig = builds( + DipatchedStorage, + original=MISSING, + zen_dataclass={ + "module": "tests.workflows_integration.test_helloworld_integration", + "cls_name": "PatchedStorageConfig", + }, +) + + +@pytest.fixture +def helloworld_input() -> Dict[str, Any]: + polygon_coords = [ + (-88.062073563448919, 37.081397673802059), + (-88.026349330507315, 37.085463858128762), + (-88.026349330507315, 37.085463858128762), + (-88.012445388773259, 37.069230099135126), + (-88.035931592028305, 37.048441375086092), + (-88.068120429075847, 37.058833638440767), + (-88.062073563448919, 37.081397673802059), + ] + polygon: Dict[str, Any] = mapping(Polygon(polygon_coords)) # type: ignore + start_date = datetime(year=2021, month=2, day=1, tzinfo=timezone.utc) + end_date = datetime(year=2021, month=2, day=11, tzinfo=timezone.utc) + + return gen_stac_item_from_bounds(polygon, start_date, end_date) + + +# TODO: add "remote" to the list of storage_spec +@pytest.mark.parametrize("storage_spec", ["local"], indirect=True) +@pytest.mark.anyio +async def test_helloworld_workflow( + storage_spec: Any, + helloworld_input: List[Dict[str, Any]], + workflow_test_helper, # type: ignore +): + runner = workflow_test_helper.gen_workflow(WORKFLOW_PATH, storage_spec) + result = await runner.run({k: helloworld_input for k in runner.workflow.inputs_spec}) + + workflow_test_helper.verify_workflow_result(WORKFLOW_PATH, result) + + +# TODO: add "remote" to the list of storage_spec +@pytest.mark.parametrize("storage_spec", ["local"], indirect=True) +@pytest.mark.anyio +async def test_helloworld_cache( + storage_spec: Any, + helloworld_input: List[Dict[str, Any]], + workflow_test_helper, # type: ignore + tmpdir: str, +): + runner = workflow_test_helper.gen_workflow(WORKFLOW_PATH, storage_spec) + + result_first_run = await runner.run({k: helloworld_input for k in runner.workflow.inputs_spec}) + workflow_test_helper.verify_workflow_result(WORKFLOW_PATH, result_first_run) + + runner = workflow_test_helper.gen_workflow( + WORKFLOW_PATH, PatchedStorageConfig(original=instantiate(storage_spec)) + ) + result_second_run = await runner.run({k: helloworld_input for k in runner.workflow.inputs_spec}) + + workflow_test_helper.verify_workflow_result(WORKFLOW_PATH, result_second_run) + + # Need to improve this test to be agnostic to the order of elements in the list + assert result_first_run.keys() == result_second_run.keys() + for k in result_first_run.keys(): + out1 = result_first_run[k] + out2 = result_second_run[k] + assert len(out1) == len(out2) + assert out1["id"] == out2["id"] + assert out1["assets"].keys() == out2["assets"].keys() diff --git a/src/tests_local_cluster/expected.tif b/src/tests_local_cluster/expected.tif new file mode 100644 index 00000000..569a8316 --- /dev/null +++ b/src/tests_local_cluster/expected.tif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:098c3513c360b6d7dd69a6c99d08ec7e099853af02658f8e0d5f0ff545d9f160 +size 7300 diff --git a/src/tests_local_cluster/test_cluster_integration.py b/src/tests_local_cluster/test_cluster_integration.py new file mode 100644 index 00000000..753361b8 --- /dev/null +++ b/src/tests_local_cluster/test_cluster_integration.py @@ -0,0 +1,190 @@ +import codecs +import getpass +import os +import shutil +from datetime import datetime, timezone +from typing import Callable, Union + +import numpy as np +import pytest +import rasterio +from shapely.geometry import Polygon + +from vibe_core.cli.helper import execute_cmd +from vibe_core.cli.local import find_redis_master +from vibe_core.cli.osartifacts import OSArtifacts +from vibe_core.cli.wrappers import KubectlWrapper +from vibe_core.client import FarmvibesAiClient, VibeWorkflowRun, get_default_vibe_client +from vibe_core.datamodel import RunStatus + +HOME = os.path.expanduser("~") +DEFAULT_FARMVIBES_CACHE_DATA_DIR = os.path.join( + os.path.join(HOME, ".cache", "farmvibes-ai"), "data" +) + +DELETE_KEY_WITH_PREFIX_CMD = 'redis-cli -a {password} KEYS "{key_prefix}" 2> /dev/null | xargs redis-cli -a {password} DEL 2> /dev/null' # noqa + +RUN_KEY_PREFIX = "run:*" +OP_KEY_PREFIX = "op:*" +ASSET_KEY_PREFIX = "asset:*" + + +class KubectlRedisWrapper(KubectlWrapper): + def __init__(self): + self.cluster_name = os.environ.get( + "FARMVIBES_AI_CLUSTER_NAME", + f"farmvibes-ai-{getpass.getuser()}", + ) + super().__init__(os_artifacts=OSArtifacts(), cluster_name=self.cluster_name) + + def delete_keys_with_prefix(self, prefix: str) -> Union[str, None]: + result = self.get_secret("redis", ".data.redis-password", self.cluster_name) + redis_password = codecs.decode(result.encode(), "base64").decode() + master_pod, redis_master, kind = find_redis_master(self) + bash_command = DELETE_KEY_WITH_PREFIX_CMD.format(password=redis_password, key_prefix=prefix) + cmd = [self.os_artifacts.kubectl, "exec", master_pod, "--", "bash", "-c", bash_command] + + retries = 3 + output = None + + for _ in range(retries): + try: + output = execute_cmd(cmd, censor_command=True) + break + except ValueError: + continue + + return output + + +def clear_cache_and_cache_metadata(): + if os.path.exists(DEFAULT_FARMVIBES_CACHE_DATA_DIR): + shutil.rmtree(DEFAULT_FARMVIBES_CACHE_DATA_DIR) + + redis_via_kubectl = KubectlRedisWrapper() + redis_via_kubectl.delete_keys_with_prefix(RUN_KEY_PREFIX) + redis_via_kubectl.delete_keys_with_prefix(OP_KEY_PREFIX) + redis_via_kubectl.delete_keys_with_prefix(ASSET_KEY_PREFIX) + + +def ensure_equal_output_images(expected_path: str, actual_path: str): + with rasterio.open(expected_path) as src: + expected_ar = ( + src.read() + ) # Actually read the data. This is a numpy array with shape (bands, height, width) + expected_profile = src.profile # Metadata about geolocation, compression, and tiling (dict) + with rasterio.open(actual_path) as src: + actual_ar = src.read() + actual_profile = src.profile + assert np.allclose(expected_ar, actual_ar) + assert all(expected_profile[k] == actual_profile[k] for k in expected_profile) + + +def num_files_in_cache(): + num_files = 0 + for dirpath, dirs, files in os.walk(DEFAULT_FARMVIBES_CACHE_DATA_DIR): + num_files += len(files) + return num_files + + +@pytest.fixture +def helloworld_workflow_fixture(): + clear_cache_and_cache_metadata() + + def run_helloworld_workflow(): + polygon_coords = [ + (-88.062073563448919, 37.081397673802059), + (-88.026349330507315, 37.085463858128762), + (-88.026349330507315, 37.085463858128762), + (-88.012445388773259, 37.069230099135126), + (-88.035931592028305, 37.048441375086092), + (-88.068120429075847, 37.058833638440767), + (-88.062073563448919, 37.081397673802059), + ] + polygon = Polygon(polygon_coords) + start_date = datetime(year=2021, month=2, day=1, tzinfo=timezone.utc) + end_date = datetime(year=2021, month=2, day=11, tzinfo=timezone.utc) + client: FarmvibesAiClient = get_default_vibe_client() + + run = client.run( + "helloworld", + "test_hello", + geometry=polygon, + time_range=(start_date, end_date), + ) + + run.block_until_complete(30) + return run + + return run_helloworld_workflow + + +def test_helloworld_once(helloworld_workflow_fixture: Callable[[], VibeWorkflowRun]): + run = helloworld_workflow_fixture() + + assert run.status == RunStatus.done, f"Workflow did not finish successfully. {run.task_details}" + assert run.output is not None, "Workflow did not produce output" + + ensure_equal_output_images( + os.path.join(os.path.dirname(__file__), "expected.tif"), + run.output["raster"][0].assets[0].local_path, # type: ignore + ) + + +def test_helloworld_workflow_twice(helloworld_workflow_fixture: Callable[[], VibeWorkflowRun]): + # when run twice result should be cached and output should be the same file + + run1 = helloworld_workflow_fixture() + assert ( + run1.status == RunStatus.done + ), f"Workflow did not finish successfully. {run1.task_details}" + assert run1.output is not None, "Workflow did not produce output" + run1_raster_path = run1.output["raster"][0].assets[0].local_path # type: ignore + + run2 = helloworld_workflow_fixture() + assert ( + run2.status == RunStatus.done + ), f"Workflow did not finish successfully. {run2.task_details}" + assert run2.output is not None, "Workflow did not produce output" + run2_raster_path = run2.output["raster"][0].assets[0].local_path # type: ignore + + assert run1_raster_path == run2_raster_path + + +def test_run_helloworld_once_delete(helloworld_workflow_fixture: Callable[[], VibeWorkflowRun]): + run = helloworld_workflow_fixture() + assert run.status == RunStatus.done, f"Workflow did not finish successfully. {run.task_details}" + assert run.output is not None, "Workflow did not produce output" + assert os.path.exists(run.output["raster"][0].assets[0].local_path) # type: ignore + + run.delete() + run.block_until_deleted(20) + assert ( + run.status == RunStatus.deleted + ), f"Workflow was not deleted successfully. {run.task_details}" + assert 0 == num_files_in_cache() + + +def test_run_helloworld_twice_delete(helloworld_workflow_fixture: Callable[[], VibeWorkflowRun]): + run1 = helloworld_workflow_fixture() + assert ( + run1.status == RunStatus.done + ), f"Workflow did not finish successfully. {run1.task_details}" + assert run1.output is not None, "Workflow did not produce output" + + run2 = helloworld_workflow_fixture() + assert ( + run2.status == RunStatus.done + ), f"Workflow did not finish successfully. {run2.task_details}" + + num_files_in_cache_before_delete = num_files_in_cache() + + run1.delete() + run1.block_until_deleted(20) + + assert ( + run1.status == RunStatus.deleted + ), f"Workflow was not deleted successfully. {run1.task_details}" + + assert num_files_in_cache_before_delete == num_files_in_cache() + assert os.path.exists(run2.output["raster"][0].assets[0].local_path) # type: ignore diff --git a/src/vibe_agent/setup.py b/src/vibe_agent/setup.py new file mode 100644 index 00000000..7291bc44 --- /dev/null +++ b/src/vibe_agent/setup.py @@ -0,0 +1,37 @@ +from setuptools import find_packages, setup + +setup( + name="vibe_agent", + version="0.0.1", + author="Microsoft", + author_email="terravibes@microsoft.com", + description="TerraVibes Geospatial Platform Package - vibe package.", + license="Proprietary", + keywords="terravibes geospatial", + packages=find_packages(exclude=["tests*"]), + install_requires=[ + "aiorwlock~=1.3.0", + "azure-cosmos~=4.5.0", + "pystac~=1.6.0", + "azure-identity~=1.14.0", + "azure-storage-blob>=12.5.0", + "httpx~=0.24.1", + "shapely>=1.7.1", + "PyYAML~=6.0.1", + "pebble~=4.6.3", + "grpcio~=1.53.0", + "dapr==1.13.0", + "dapr-ext-grpc~=1.12.0", + "redis~=4.6.0", + "hiredis~=2.2.0", + "vibe-core", + "vibe-common", + ], + entry_points={ + "console_scripts": [ + "vibe-worker = vibe_agent.launch_worker:main", + "vibe-cache = vibe_agent.launch_cache:main", + "vibe-data-ops = vibe_agent.launch_data_ops:main", + ] + }, +) diff --git a/src/vibe_agent/tests/conftest.py b/src/vibe_agent/tests/conftest.py new file mode 100644 index 00000000..f045d7c5 --- /dev/null +++ b/src/vibe_agent/tests/conftest.py @@ -0,0 +1,83 @@ +# flake8: noqa +import os +import uuid +from tempfile import TemporaryDirectory +from typing import Any, Dict + +import pytest + +from vibe_agent.ops import OperationFactoryConfig +from vibe_agent.worker import Worker +from vibe_dev.testing.storage_fixtures import * # type: ignore # noqa: F403, F401 +from vibe_dev.testing import anyio_backend # type: ignore # noqa +from vibe_dev.testing.workflow_fixtures import ( + SimpleStrData, + simple_op_spec, + workflow_execution_message, +) # type: ignore # noqa + +FILE_CONTENTS = "SAMPLE FILE CONTENTS FOR TESTING PURPOSES" + + +@pytest.fixture(scope="module") +def file_contents(): + return FILE_CONTENTS + + +@pytest.fixture(scope="module") +def local_file(file_contents: str): + with TemporaryDirectory() as tmp_dir: + filename = f"{uuid.uuid4()}.txt" + filepath = os.path.join(tmp_dir, filename) + with open(os.path.join(tmp_dir, filename), "w") as f: + f.write(file_contents) + yield filepath + + +@pytest.fixture +def local_file_ref(request: pytest.FixtureRequest, local_file: str): + ref_type: str = request.param # type: ignore + if ref_type == "uri": + return f"file://{local_file}" + elif ref_type == "path": + return local_file + else: + raise ValueError(f"Invalid reference type {ref_type}") + + +@pytest.fixture +def op_yaml() -> Dict[str, Any]: + return { + "name": "fake", + "inputs": { + "user_data": "List[DataVibe]", + }, + "output": { + "processed_data": "List[DataVibe]", + }, + "parameters": { + "fake_param": 1, + "fake_another_param": {"fake_nested": 2, "fake_nested_too": 3}, + }, + "entrypoint": {"file": "op.py", "callback_builder": "callback_builder"}, + } + + +@pytest.fixture +def op_foo() -> str: + foo_str: str = """ +def print_args(user_data): + return user_data + +def callback_builder(**kw): + return print_args + """ + return foo_str + + +@pytest.fixture +def non_existing_file(request: pytest.FixtureRequest): + location = request.param # type:ignore + if location == "local": + return "/nodir/nodir2/does_not_exist.txt" + raise ValueError(f"Expected 'local' or 'remote' request, got {location}") diff --git a/src/vibe_agent/tests/ops/test_dependencies_integration.py b/src/vibe_agent/tests/ops/test_dependencies_integration.py new file mode 100644 index 00000000..7e29f26b --- /dev/null +++ b/src/vibe_agent/tests/ops/test_dependencies_integration.py @@ -0,0 +1,56 @@ +import pytest + +from vibe_agent.ops import EntryPointDict, OperationDependencyResolver, OperationSpec +from vibe_core.data import DataVibe, TypeDictVibe +from vibe_core.datamodel import TaskDescription + + +@pytest.fixture +def operation_spec(): + return OperationSpec( + name="fake", + inputs_spec=TypeDictVibe({"vibe_input": DataVibe}), # type: ignore + output_spec=TypeDictVibe({"processed_data": DataVibe}), + parameters={}, + entrypoint=EntryPointDict(file="fake.py", callback_builder="fake_callback"), + root_folder="/tmp", + description=TaskDescription(), + ) + + +def test_resolver_empty_dependency(operation_spec: OperationSpec): + resolver = OperationDependencyResolver() + empty_dependency = resolver.resolve(operation_spec) + + assert len(empty_dependency) == 0 + + +def test_resolver_valid_dependency(operation_spec: OperationSpec): + operation_spec.parameters = {"param": 1, "another_param": "test"} + operation_spec.dependencies = {"parameters": ["param", "another_param"]} + + resolver = OperationDependencyResolver() + dependencies = resolver.resolve(operation_spec) + target_dependencoes = {"parameters": operation_spec.parameters} + + assert target_dependencoes == dependencies + + +def test_resolver_valid_partial_dependency(operation_spec: OperationSpec): + operation_spec.parameters = {"param": 1, "another_param": "test"} + operation_spec.dependencies = {"parameters": ["another_param"]} + + resolver = OperationDependencyResolver() + dependencies = resolver.resolve(operation_spec) + target_dependencies = {"parameters": {"another_param": "test"}} + + assert target_dependencies == dependencies + + +def test_resolver_invalid_dependency(operation_spec: OperationSpec): + operation_spec.parameters = {"param": 1, "another_param": "test"} + operation_spec.dependencies = {"parameters": ["unexisting_param"]} + + resolver = OperationDependencyResolver() + with pytest.raises(ValueError): + resolver.resolve(operation_spec) diff --git a/src/vibe_agent/tests/ops/test_op_cache_builder.py b/src/vibe_agent/tests/ops/test_op_cache_builder.py new file mode 100644 index 00000000..3b66f28c --- /dev/null +++ b/src/vibe_agent/tests/ops/test_op_cache_builder.py @@ -0,0 +1,182 @@ +import datetime +import random +from dataclasses import dataclass +from typing import Any, Dict, List, Union + +from pystac import Item +from pytest import fixture +from shapely.geometry import Polygon, mapping + +from vibe_common.schemas import CacheInfo, ItemDict, OpResolvedDependencies +from vibe_core.data.core_types import BaseVibe + + +@dataclass +class TestVibe(BaseVibe): + a: int + b: str + + +@fixture +def item_dict(): + num_items = 5 + polygon_coords = [ + (-88.062073563448919, 37.081397673802059), + (-88.026349330507315, 37.085463858128762), + (-88.026349330507315, 37.085463858128762), + (-88.012445388773259, 37.069230099135126), + ] + polygon: Dict[str, Any] = mapping(Polygon(polygon_coords)) # type: ignore + timestamp = datetime.datetime.now(datetime.timezone.utc) + items = [ + Item(id=str(i), geometry=polygon, datetime=timestamp, properties={}, bbox=None) + for i in range(num_items) + ] + single_item = Item( + id=str(num_items), geometry=polygon, datetime=timestamp, properties={}, bbox=None + ) + base_items = [TestVibe(i, f"{i}") for i in range(num_items)] + single_base = TestVibe(num_items, f"{num_items}") + + output_dict = { + "list_input": items, + "single_input": single_item, + "list_base": base_items, + "single_base": single_base, + } + + return output_dict + + +@fixture +def item_dict_hashes() -> Dict[str, Union[str, List[str], Dict[str, Any]]]: + return { + "vibe_source_items": { + "list_input": ["0", "1", "2", "3", "4"], + "single_input": "5", + "list_base": [ + "371c8cb9ac0a9f7d31fd0ab9d1e59efe3a5d98854e86b6bfa3207ccf4e6dfbf6", + "3d15b923441e57a7b3f9dcc93f43d8b41620b3dba7d5c4be78bf0b2a597006d2", + "c5e1ca033cc639402b7352606e8a00676636287f437739a1c773440df76d2799", + "cf3b5755718f90ffe7cdf7b27bd41da19158ea4d1fefdc7aca188bc9dcac7f19", + "eab1e3a83e5b227da228fefdf633ce9a05b12dcdb59d6739f7d1dddeb51d712f", + ], + "single_base": "66756d10b406f729019b8a049f02e293b7f7e0e3b22f613f4c7024f732e5ee11", + }, + "vibe_op_parameters": {"parameters": {"dep": 1, "another_dep": "bla"}}, + "vibe_op_version": "1", + "vibe_op_hash": "5daf389eaad4c50533c2b1ace0b6f551f1a3b9236ec35f1fa3e5a5ab11b68a32", + } + + +@fixture +def op_dependencies(): + return {"parameters": {"dep": 1, "another_dep": "bla"}} + + +def test_stable_hashes( + item_dict: ItemDict, + op_dependencies: OpResolvedDependencies, + item_dict_hashes: Dict[str, Union[str, List[str], Dict[str, Any]]], +): + cache_info = CacheInfo("test_op", "1.0", item_dict, op_dependencies) + storage_dict = cache_info.as_storage_dict() + for k, v in item_dict_hashes.items(): + assert storage_dict[k] == v + + +def test_cache_builder(item_dict: ItemDict, op_dependencies: OpResolvedDependencies): + version = "1.3" + cache_info = CacheInfo("test_op", version, item_dict, op_dependencies) + + assert cache_info.version == version[0] + + for k, v in item_dict.items(): + if isinstance(v, list): + target_ids = sorted(CacheInfo._compute_or_extract_id(v)) + for target_id, input_id in zip(target_ids, cache_info.ids[k]): + assert target_id == input_id + else: + assert cache_info.ids[k] == CacheInfo._compute_or_extract_id(v) + + +def test_cache_builder_hash(item_dict: ItemDict, op_dependencies: OpResolvedDependencies): + version = "1.3" + cache_info = CacheInfo("test_op", version, item_dict, op_dependencies) + cache_info_repeat = CacheInfo("test_op", version[0], item_dict, op_dependencies) + + assert cache_info.hash == cache_info_repeat.hash + + +def test_hash_order_invariances(item_dict: ItemDict, op_dependencies: OpResolvedDependencies): + version = "1.3" + cache_info = CacheInfo("test_op", version, item_dict, op_dependencies) + + # Shufling input ids + random.shuffle(item_dict["list_input"]) # type: ignore + random.shuffle(item_dict["list_base"]) # type: ignore + cache_info_shuffled = CacheInfo("test_op", version, item_dict, op_dependencies) + + assert cache_info.hash == cache_info_shuffled.hash + + +def test_hash_version_dependency(item_dict: ItemDict, op_dependencies: OpResolvedDependencies): + cache_info = CacheInfo("test_op", "1.3", item_dict, op_dependencies) + cache_info_repeat = CacheInfo("test_op", "2.5", item_dict, op_dependencies) + + assert cache_info.hash != cache_info_repeat.hash + + +def test_hash_source_id_dependency_single( + item_dict: ItemDict, op_dependencies: OpResolvedDependencies +): + cache_info = CacheInfo("test_op", "1.3", item_dict, op_dependencies) + item_dict["single_input"].id = str(10) # type: ignore + cache_info2 = CacheInfo("test_op", "1.3", item_dict, op_dependencies) + item_dict["single_base"].a = 2 # type: ignore + cache_info3 = CacheInfo("test_op", "1.3", item_dict, op_dependencies) + + assert cache_info.hash != cache_info2.hash + assert cache_info.hash != cache_info3.hash + assert cache_info2.hash != cache_info3.hash + + +def test_hash_source_id_dependency_list( + item_dict: ItemDict, op_dependencies: OpResolvedDependencies +): + cache_info = CacheInfo("test_op", "1.3", item_dict, op_dependencies) + item_dict["list_input"][-1].id = str(10) # type: ignore + cache_info2 = CacheInfo("test_op", "1.3", item_dict, op_dependencies) + item_dict["list_base"][-1].b = str(10) # type: ignore + cache_info3 = CacheInfo("test_op", "1.3", item_dict, op_dependencies) + + assert cache_info.hash != cache_info2.hash + assert cache_info.hash != cache_info3.hash + assert cache_info2.hash != cache_info3.hash + + +def test_hash_source_name_dependency(item_dict: ItemDict, op_dependencies: OpResolvedDependencies): + cache_info = CacheInfo("test_op", "1.3", item_dict, op_dependencies) + i = item_dict.pop("list_input") + item_dict["different_name_input"] = i + cache_info_repeat = CacheInfo("test_op", "1.3", item_dict, op_dependencies) + + assert cache_info.hash != cache_info_repeat.hash + + +def test_hash_parameter_dependency(item_dict: ItemDict, op_dependencies: OpResolvedDependencies): + op_version = "1.3" + cache_info = CacheInfo("test_op", op_version, item_dict, op_dependencies) + op_dependencies["parameters"]["dep"] = 2 + cache_info_repeat = CacheInfo("test_op", op_version, item_dict, op_dependencies) + + assert cache_info.hash != cache_info_repeat.hash + + +def test_hash_gen_basevibe(): + x = CacheInfo._compute_or_extract_id(TestVibe(1, "1")) + y = CacheInfo._compute_or_extract_id(TestVibe(2, "1")) + z = CacheInfo._compute_or_extract_id(TestVibe(1, "2")) + assert x != y + assert x != z + assert y != z diff --git a/src/vibe_agent/tests/ops/test_op_parser.py b/src/vibe_agent/tests/ops/test_op_parser.py new file mode 100644 index 00000000..278c1e7b --- /dev/null +++ b/src/vibe_agent/tests/ops/test_op_parser.py @@ -0,0 +1,51 @@ +import os +from typing import Any, Dict + +from vibe_agent.ops import OperationParser, OperationSpec +from vibe_core.file_utils import write_yaml + + +def compare_spec_yaml(spec: OperationSpec, op_yaml: Dict[str, Any], root_folder: str): + assert spec.dependencies == op_yaml.get("dependencies", {}) + assert spec.version == op_yaml.get("version", "1.0") + assert spec.parameters == op_yaml["parameters"] + assert spec.name == op_yaml["name"] + assert spec.root_folder == root_folder + assert spec.entrypoint["file"] == op_yaml["entrypoint"]["file"] + assert spec.entrypoint["callback_builder"] == op_yaml["entrypoint"]["callback_builder"] + assert op_yaml["inputs"].keys() == spec.inputs_spec.keys() + + +def test_parser_only_required(tmpdir: str, op_yaml: Dict[str, Any]): + op_yaml_file = os.path.join(tmpdir, "fake.yaml") + write_yaml(op_yaml_file, op_yaml) + spec = OperationParser().parse(op_yaml_file) + compare_spec_yaml(spec, op_yaml, tmpdir) + + +def test_parser_version(tmpdir: str, op_yaml: Dict[str, Any]): + op_yaml_file = os.path.join(tmpdir, "fake.yaml") + op_yaml["version"] = "2.5" + write_yaml(op_yaml_file, op_yaml) + spec = OperationParser().parse(op_yaml_file) + compare_spec_yaml(spec, op_yaml, tmpdir) + + +def test_parser_dependencies(tmpdir: str, op_yaml: Dict[str, Any]): + op_yaml_file = os.path.join(tmpdir, "fake.yaml") + op_yaml["dependencies"] = {"parameters": ["fake_param"]} + write_yaml(op_yaml_file, op_yaml) + spec = OperationParser().parse(op_yaml_file) + compare_spec_yaml(spec, op_yaml, tmpdir) + + +def test_parser_empty_fields(tmpdir: str, op_yaml: Dict[str, Any]): + op_yaml_file = os.path.join(tmpdir, "fake.yaml") + op_yaml["dependencies"] = None + op_yaml["version"] = None + op_yaml["parameters"] = None + write_yaml(op_yaml_file, op_yaml) + spec = OperationParser().parse(op_yaml_file) + assert spec.parameters == {} + assert spec.dependencies == {} + assert spec.version == "1.0" diff --git a/src/vibe_agent/tests/ops/test_operation.py b/src/vibe_agent/tests/ops/test_operation.py new file mode 100644 index 00000000..f472c708 --- /dev/null +++ b/src/vibe_agent/tests/ops/test_operation.py @@ -0,0 +1,110 @@ +import os +from datetime import datetime +from typing import Any, Callable +from unittest.mock import MagicMock, patch + +import pytest +from shapely import geometry as shpg + +from vibe_agent.ops import Operation, OperationFactory +from vibe_agent.ops_helper import OpIOConverter +from vibe_agent.storage.local_storage import LocalResourceExistsError +from vibe_common.schemas import CacheInfo, OperationParser +from vibe_core.data import DataVibe +from vibe_core.data.utils import StacConverter +from vibe_dev.testing.fake_workflows_fixtures import fake_ops_dir # type: ignore # noqa +from vibe_dev.testing.op_tester import FakeStorage + + +@patch.object(OperationFactory, "resolve_secrets") +def test_callback_output_mismatch_fails(resolve_secrets: MagicMock, fake_ops_dir: str): # noqa + op_spec = OperationParser().parse(os.path.join(fake_ops_dir, "fake/item_item.yaml")) + resolve_secrets.return_value = op_spec.parameters + factory = OperationFactory(None, None) # type: ignore + op = factory.build(op_spec) + + now = datetime.now() + x = DataVibe( + id="1", time_range=(now, now), geometry=shpg.mapping(shpg.box(0, 0, 1, 1)), assets=[] + ) + op._call_validate_op(user_data=x) # type: ignore + + def mock_callback(callback: Callable[..., Any]): + def fun(*args: Any, **kwargs: Any): + return {"wrong": None, **callback(*args, **kwargs)} + + return fun + + op.callback = mock_callback(op.callback) # type: ignore + with pytest.raises(RuntimeError): + op._call_validate_op(user_data=x) # type: ignore + + +@patch.object(Operation, "_call_validate_op") +@patch.object(FakeStorage, "retrieve_output_from_input_if_exists") +@patch.object(OpIOConverter, "serialize_output") +@patch.object(OpIOConverter, "deserialize_input") +@patch.object(OperationFactory, "resolve_secrets") +def test_op_cache_check_before_callback( + resolve_secrets: MagicMock, + deserialize_input: MagicMock, + serialize_output: MagicMock, + retrieve_cache: MagicMock, + call_validate: MagicMock, + fake_ops_dir: str, # noqa +): + deserialize_input.return_value = {"stac": 1} + serialize_output.side_effect = lambda x: x + cached_output = {"cached_before": "no callback 😊"} + retrieve_cache.return_value = cached_output + op_spec = OperationParser().parse(os.path.join(fake_ops_dir, "fake/item_item.yaml")) + resolve_secrets.return_value = op_spec.parameters + factory = OperationFactory(FakeStorage(None), None) # type:ignore + op = factory.build(op_spec) + cache_info = CacheInfo("test-op", "1.0", {}, {}) + object.__setattr__(cache_info, "hash", "cache_before") + out = op.run(None, cache_info) # type:ignore + assert out == cached_output + deserialize_input.assert_called_once() + serialize_output.assert_called_once() + retrieve_cache.assert_called_once() + call_validate.assert_not_called() + + +@patch.object(FakeStorage, "store") +@patch.object(StacConverter, "from_stac_item") +@patch.object(Operation, "_call_validate_op") +@patch.object(FakeStorage, "retrieve_output_from_input_if_exists") +@patch.object(OpIOConverter, "serialize_output") +@patch.object(OpIOConverter, "deserialize_input") +@patch.object(OperationFactory, "resolve_secrets") +def test_op_cache_check_after_callback( + resolve_secrets: MagicMock, + deserialize_input: MagicMock, + serialize_output: MagicMock, + retrieve_cache: MagicMock, + call_validate: MagicMock, + from_stac_item: MagicMock, + store: MagicMock, + fake_ops_dir: str, # noqa +): + deserialize_input.return_value = {"stac": 1} + serialize_output.side_effect = lambda x: x + cached_output = {"cached_after": "yes callback 😔"} + retrieve_cache.side_effect = [None, cached_output] + call_validate.return_value = {"out": "repeated callback output"} + from_stac_item.side_effect = lambda x: x + store.side_effect = LocalResourceExistsError() + op_spec = OperationParser().parse(os.path.join(fake_ops_dir, "fake/item_item.yaml")) + resolve_secrets.return_value = op_spec.parameters + factory = OperationFactory(FakeStorage(None), None) # type:ignore + op = factory.build(op_spec) + cache_info = CacheInfo("test-op", "1.0", {}, {}) + object.__setattr__(cache_info, "hash", "cache_before") + out = op.run(None, cache_info) # type:ignore + assert out == cached_output + deserialize_input.assert_called_once() + serialize_output.assert_called_once() + # Cache retrieval should be called once before the callback, and then again after + assert retrieve_cache.call_count == 2 + call_validate.assert_called_once() diff --git a/src/vibe_agent/tests/test_cache_metadata_store.py b/src/vibe_agent/tests/test_cache_metadata_store.py new file mode 100644 index 00000000..92d08f68 --- /dev/null +++ b/src/vibe_agent/tests/test_cache_metadata_store.py @@ -0,0 +1,306 @@ +import asyncio +import uuid +from dataclasses import asdict +from datetime import datetime +from typing import Any, Dict, Set, Tuple +from unittest.mock import AsyncMock, Mock, call, patch + +import pytest + +from vibe_agent.cache_metadata_store import RedisCacheMetadataStore +from vibe_agent.data_ops import DataOpsManager +from vibe_agent.storage import asset_management +from vibe_agent.storage.storage import Storage +from vibe_common.schemas import CacheInfo, OpRunId +from vibe_core.data.core_types import OpIOType +from vibe_core.datamodel import RunConfig, RunDetails, RunStatus, SpatioTemporalJson + + +class FakeOpRunResult: + def __init__(self, op_name: str, fake_asset_ids: Set[str]): + self.cache_info = CacheInfo(op_name, "1.0", {}, {}) + self.asset_ids = fake_asset_ids + + def get_output(self) -> OpIOType: + return {self.cache_info.name: {"assets": {asset_id: {} for asset_id in self.asset_ids}}} + + def get_op_run_id(self) -> OpRunId: + return OpRunId(self.cache_info.name, self.cache_info.hash) + + +@pytest.fixture +def no_asset_op_run(): + return FakeOpRunResult("no_asset_op", set()) + + +@pytest.fixture +def op_1_run(): + return FakeOpRunResult("op_1_run", {"asset-1", "asset-2"}) + + +@pytest.fixture +def op_2_run(): + return FakeOpRunResult("op_2_run", {"asset-2", "asset-3"}) + + +@pytest.fixture +def run_config() -> Dict[str, Any]: + run_config = asdict( + RunConfig( + name="fake", + workflow="fake", + parameters=None, + user_input=SpatioTemporalJson( + datetime.now(), + datetime.now(), + {}, + ), + id=uuid.uuid4(), + details=RunDetails(), + task_details={}, + spatio_temporal_json=None, + output="", + ) + ) + return run_config + + +class AsyncFakeRedis: + def __init__(self): + self.data = {} + + async def sadd(self, key: str, *values: str): + if key not in self.data: + self.data[key] = set() + self.data[key].update(values) + + async def srem(self, key: str, *values: str): + if key in self.data: + self.data[key].difference_update(values) + # Redis does not allow empty sets + if not self.data[key]: + del self.data[key] + + async def smembers(self, key: str): + return self.data.get(key, set()) + + async def scard(self, key: str): + return len(self.data.get(key, set())) + + async def sismember(self, key: str, value: str): + return value in self.data.get(key, set()) + + def pipeline(self, transaction: bool = True): + return AsyncFakeRedisPipeline(self) + + async def close(self): + pass + + +class AsyncFakeRedisPipeline: + def __init__(self, redis_client: AsyncFakeRedis): + self.redis_client = redis_client + self.commands = [] + + def __getattr__(self, name: str): + def method(*args: Any, **kwargs: Any): + command = (name, args, kwargs) + self.commands.append(command) + + return method + + async def execute(self): + coroutines = [] + for command in self.commands: + name, args, kwargs = command + method = getattr(self.redis_client, name) + coro = method(*args, **kwargs) + coroutines.append(coro) + results = await asyncio.gather(*coroutines) + return results + + +def get_mocked_data_ops() -> Tuple[DataOpsManager, AsyncFakeRedis, Mock]: + with patch("vibe_agent.cache_metadata_store.retrieve_dapr_secret"): + redis_client_mock = AsyncFakeRedis() + + storage_mock = Mock(spec=Storage) + storage_mock.asset_manager = Mock(spec=asset_management.AssetManager) + + metadata_store = RedisCacheMetadataStore() + metadata_store._get_redis_client = AsyncMock(return_value=redis_client_mock) + + do_manager = DataOpsManager(storage_mock, metadata_store=metadata_store) + do_manager._init_locks() + return do_manager, redis_client_mock, storage_mock + + +def assert_op_in_fake_redis(redis_client: AsyncFakeRedis, run_id: str, fake_op: FakeOpRunResult): + run_ops_key = RedisCacheMetadataStore._run_ops_key_format.format(run_id=run_id) + op_runs_key = RedisCacheMetadataStore._op_runs_key_format.format( + op_name=fake_op.cache_info.name, op_hash=fake_op.cache_info.hash + ) + op_ref = RedisCacheMetadataStore._op_ref_format.format( + op_name=fake_op.cache_info.name, op_hash=fake_op.cache_info.hash + ) + op_assets_key = RedisCacheMetadataStore._op_assets_key_format.format( + op_name=fake_op.cache_info.name, op_hash=fake_op.cache_info.hash + ) + assert redis_client.data[run_ops_key] == {op_ref} + assert run_id in redis_client.data[op_runs_key] + + if fake_op.asset_ids: + assert redis_client.data[op_assets_key] == fake_op.asset_ids + + for asset_id in fake_op.asset_ids: + asset_op_key = RedisCacheMetadataStore._asset_ops_key_format.format(asset_id=asset_id) + assert op_ref in redis_client.data[asset_op_key] + + +@pytest.mark.anyio +async def test_store_references_with_empty_asset_list(no_asset_op_run: FakeOpRunResult): + do_manager, redis_client_mock, _ = get_mocked_data_ops() + await do_manager.add_references( + "fake-run", no_asset_op_run.get_op_run_id(), no_asset_op_run.get_output() + ) + + assert_op_in_fake_redis(redis_client_mock, "fake-run", no_asset_op_run) + + +@pytest.mark.anyio +async def test_store_references_simple(op_1_run: FakeOpRunResult): + do_manager, redis_client_mock, _ = get_mocked_data_ops() + await do_manager.add_references("fake-run", op_1_run.get_op_run_id(), op_1_run.get_output()) + assert len(redis_client_mock.data) == 3 + len(op_1_run.asset_ids) + assert_op_in_fake_redis(redis_client_mock, "fake-run", op_1_run) + + +@pytest.mark.anyio +async def test_store_references_two_wfs_shared_op(op_1_run: FakeOpRunResult): + do_manager, redis_client_mock, _ = get_mocked_data_ops() + await do_manager.add_references("fake-run-1", op_1_run.get_op_run_id(), op_1_run.get_output()) + await do_manager.add_references("fake-run-2", op_1_run.get_op_run_id(), op_1_run.get_output()) + + assert len(redis_client_mock.data) == 4 + len(op_1_run.asset_ids) + + assert_op_in_fake_redis(redis_client_mock, "fake-run-1", op_1_run) + assert_op_in_fake_redis(redis_client_mock, "fake-run-2", op_1_run) + + +@pytest.mark.anyio +async def test_store_references_two_wfs_shared_asset( + op_1_run: FakeOpRunResult, + op_2_run: FakeOpRunResult, +): + do_manager, redis_client_mock, _ = get_mocked_data_ops() + await do_manager.add_references("fake-run-1", op_1_run.get_op_run_id(), op_1_run.get_output()) + await do_manager.add_references("fake-run-2", op_2_run.get_op_run_id(), op_2_run.get_output()) + + assert len(redis_client_mock.data) == 6 + len(op_1_run.asset_ids) + len(op_2_run.asset_ids) - 1 + + assert_op_in_fake_redis(redis_client_mock, "fake-run-1", op_1_run) + assert_op_in_fake_redis(redis_client_mock, "fake-run-2", op_2_run) + + +@patch("vibe_common.statestore.StateStore.retrieve") +@pytest.mark.anyio +async def test_delete_invalid_workflow_run(ss_retrieve_mock: Mock, run_config: Dict[str, Any]): + do_manager, _, _ = get_mocked_data_ops() + invalid_delete_statuses = [ + RunStatus.pending, + RunStatus.queued, + RunStatus.running, + RunStatus.deleting, + RunStatus.deleted, + ] + + for status in invalid_delete_statuses: + run_config["details"]["status"] = status + ss_retrieve_mock.return_value = run_config + result = await do_manager.delete_workflow_run("fake-run") + assert not result + + +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_delete_workflow_run_no_assets( + ss_store_mock: Mock, + ss_retrieve_mock: Mock, + no_asset_op_run: FakeOpRunResult, + run_config: Dict[str, Any], +): + do_manager, redis_client_mock, storage_mock = get_mocked_data_ops() + await do_manager.add_references( + "fake-run", no_asset_op_run.get_op_run_id(), no_asset_op_run.get_output() + ) + + run_config["details"]["status"] = RunStatus.done + ss_retrieve_mock.return_value = run_config + await do_manager.delete_workflow_run("fake-run") + + assert ss_store_mock.call_count == 2 + rc1 = ss_store_mock.call_args_list[0][0][1] + assert rc1.details.status == RunStatus.deleting + rc2 = ss_store_mock.call_args_list[1][0][1] + assert rc2.details.status == RunStatus.deleted + + storage_mock.asset_manager.remove.assert_not_called() + storage_mock.remove.assert_called_once_with(no_asset_op_run.get_op_run_id()) + + assert len(redis_client_mock.data) == 0 + + +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_delete_workflow_run_simple( + ss_store_mock: Mock, + ss_retrieve_mock: Mock, + op_1_run: FakeOpRunResult, + run_config: Dict[str, Any], +): + do_manager, redis_client_mock, storage_mock = get_mocked_data_ops() + await do_manager.add_references("fake-run", op_1_run.get_op_run_id(), op_1_run.get_output()) + + run_config["details"]["status"] = RunStatus.done + ss_retrieve_mock.return_value = run_config + await do_manager.delete_workflow_run("fake-run") + + assert ss_store_mock.call_count == 2 + rc1 = ss_store_mock.call_args_list[0][0][1] + assert rc1.details.status == RunStatus.deleting + rc2 = ss_store_mock.call_args_list[1][0][1] + assert rc2.details.status == RunStatus.deleted + + calls = [call(asset_id) for asset_id in op_1_run.asset_ids] + storage_mock.asset_manager.remove.assert_has_calls(calls, any_order=True) + storage_mock.remove.assert_called_once_with(op_1_run.get_op_run_id()) + + assert len(redis_client_mock.data) == 0 + + +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_delete_workflow_run_overlapping_op_and_asset( + ss_store_mock: Mock, + ss_retrieve_mock: Mock, + op_1_run: FakeOpRunResult, + op_2_run: FakeOpRunResult, + run_config: Dict[str, Any], +): + do_manager, redis_client_mock, storage_mock = get_mocked_data_ops() + await do_manager.add_references("fake-run-1", op_1_run.get_op_run_id(), op_1_run.get_output()) + await do_manager.add_references("fake-run-1", op_2_run.get_op_run_id(), op_2_run.get_output()) + await do_manager.add_references("fake-run-2", op_1_run.get_op_run_id(), op_1_run.get_output()) + + run_config["details"]["status"] = RunStatus.done + ss_retrieve_mock.return_value = run_config + await do_manager.delete_workflow_run("fake-run-1") + + storage_mock.asset_manager.remove.assert_called_once_with("asset-3") + storage_mock.remove.assert_called_once_with(op_2_run.get_op_run_id()) + + assert_op_in_fake_redis(redis_client_mock, "fake-run-2", op_1_run) + assert len(redis_client_mock.data) == 3 + len(op_1_run.asset_ids) diff --git a/src/vibe_agent/tests/test_eywa_asset.py b/src/vibe_agent/tests/test_eywa_asset.py new file mode 100644 index 00000000..0759905b --- /dev/null +++ b/src/vibe_agent/tests/test_eywa_asset.py @@ -0,0 +1,38 @@ +import mimetypes +from pathlib import Path + +import pytest + +from vibe_agent.storage.asset_management import AssetManager +from vibe_core.data import AssetVibe + +CONTENT = "FAKE CONTENT FILE" +EXTENSION = ".txt" +ID = "FAKE_FILE" +FNAME = f"{ID}{EXTENSION}" + + +@pytest.fixture +def local_file(tmp_path: Path) -> str: + with open(tmp_path / FNAME, "w") as f: + f.write(CONTENT) + + assert Path.exists(tmp_path / FNAME) + return (tmp_path / FNAME).as_posix() + + +@pytest.fixture +def remote_file(local_file: str, blob_asset_manager: AssetManager) -> str: + blob_asset_manager.store(ID, local_file) + assert blob_asset_manager.exists(ID) + return blob_asset_manager.retrieve(ID) + + +def test_local_asset(local_file: str): + local_asset = AssetVibe(reference=local_file, type=mimetypes.types_map[EXTENSION], id=ID) + + # file is local, then local path must be equal to passed reference + assert local_asset.local_path == local_file + + # Local urls are assigned with file:// prefix + assert local_asset.url == f"file://{local_file}" diff --git a/src/vibe_agent/tests/test_local_asset_manager.py b/src/vibe_agent/tests/test_local_asset_manager.py new file mode 100644 index 00000000..3ec48dac --- /dev/null +++ b/src/vibe_agent/tests/test_local_asset_manager.py @@ -0,0 +1,110 @@ +import os +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock, Mock, patch + +import pytest +import requests + +from vibe_agent.storage.asset_management import LocalFileAssetManager + + +@pytest.fixture +def manager(tmpdir: str): + return LocalFileAssetManager(tmpdir) + + +@patch("os.makedirs") +@patch("shutil.copyfile") +def test_store_add_file(shutil_mock: Mock, makedir_mock: Mock, manager: LocalFileAssetManager): + guid = "123456" + file_path = os.path.join("fake", "file", "path") + manager.exists = MagicMock(return_value=False) + + actual_return = manager.store(guid, file_path) + + target_folder = os.path.join(manager.root_path, guid) + target_file = os.path.join(target_folder, os.path.basename(file_path)) + makedir_mock.assert_called_once_with(target_folder) + shutil_mock.assert_called_once_with(file_path, target_file) + assert actual_return == target_file + + +@patch("os.makedirs") +@patch("shutil.copyfile") +def test_store_exists(shutil_mock: Mock, makedir_mock: Mock, manager: LocalFileAssetManager): + guid = "123456" + file_path = os.path.join("fake", "file", "path") + manager.exists = MagicMock(return_value=True) + return_value = "fake_return_path" + manager.retrieve = MagicMock(return_value=return_value) + + actual_return = manager.store(guid, file_path) + + makedir_mock.assert_not_called() + shutil_mock.assert_not_called() + assert actual_return == return_value + + +def test_remove(manager: LocalFileAssetManager): + guid = "123456" + manager.exists = MagicMock(return_value=True) + + with patch("shutil.rmtree") as shutil_mock: + manager.remove(guid) + shutil_mock.assert_called_once_with(os.path.join(manager.root_path, guid)) + + +@patch("shutil.rmtree") +def test_remove_not_exists(shutil_mock: Mock, manager: LocalFileAssetManager): + guid = "123456" + manager.exists = MagicMock(return_value=False) + + manager.remove(guid) + + shutil_mock.assert_not_called() + + +@patch("os.path.exists") +@patch("os.listdir") +def test_retrieve(listdir_mock: Mock, exists_mock: Mock): + with TemporaryDirectory() as tmp_dir: + guid = "123456" + file_name = os.path.join("fake_file") + manager = LocalFileAssetManager(tmp_dir) + manager.exists = MagicMock(return_value=False) + listdir_mock.return_value = [file_name] + exists_mock.return_value = True + + ret = manager.retrieve(guid) + + listdir_mock.assert_called_once_with(os.path.join(tmp_dir, guid)) + assert ret == os.path.join(tmp_dir, guid, file_name) + + +@patch("os.path.exists") +def test_exists(exists_mock: Mock): + with TemporaryDirectory() as tmp_dir: + guid = "123456" + manager = LocalFileAssetManager(tmp_dir) + manager.exists(guid) + exists_mock.assert_called_once_with(os.path.join(tmp_dir, guid)) + + +@pytest.mark.parametrize("local_file_ref", ["path", "uri"], indirect=True) +def test_store_local(manager: LocalFileAssetManager, local_file_ref: str): + asset_guid = "123456" + assert not manager.exists(asset_guid) + manager.store(asset_guid, local_file_ref) + assert manager.exists(asset_guid) + assert os.path.exists(manager.retrieve(asset_guid)) + + +@pytest.mark.parametrize("non_existing_file", ["local"], indirect=True) +def test_asset_does_not_exist_on_fail(manager: LocalFileAssetManager, non_existing_file: str): + asset_guid = "123456" + assert not manager.exists(asset_guid) + with pytest.raises((FileNotFoundError, requests.exceptions.HTTPError)): + manager.store(asset_guid, non_existing_file) + assert not manager.exists(asset_guid) + with pytest.raises(ValueError): + manager.retrieve(asset_guid) diff --git a/src/vibe_agent/tests/test_storage.py b/src/vibe_agent/tests/test_storage.py new file mode 100644 index 00000000..0574b67a --- /dev/null +++ b/src/vibe_agent/tests/test_storage.py @@ -0,0 +1,97 @@ +import os +from datetime import datetime, timezone +from typing import Any, Dict +from unittest.mock import MagicMock, patch + +import pytest +from azure.cosmos.exceptions import CosmosHttpResponseError +from pystac import Asset, Item +from shapely import geometry as shpg +from shapely.geometry import Polygon, mapping + +from vibe_agent.storage.remote_storage import CosmosStorage +from vibe_agent.storage.storage import AssetCopyHandler, ItemDict +from vibe_common.schemas import CacheInfo +from vibe_core.data import DataVibe +from vibe_core.data.utils import StacConverter +from vibe_dev.testing.storage_fixtures import * # type: ignore # noqa: F403, F401 + + +@pytest.fixture +def item_dict() -> ItemDict: + num_items = 5 + polygon_coords = [ + (-88.062073563448919, 37.081397673802059), + (-88.026349330507315, 37.085463858128762), + (-88.026349330507315, 37.085463858128762), + (-88.012445388773259, 37.069230099135126), + ] + polygon: Dict[str, Any] = mapping(Polygon(polygon_coords)) # type: ignore + timestamp = datetime.now(timezone.utc) + + def create_item(i: int): + id = str(i) + new_item = Item(id=id, geometry=polygon, datetime=timestamp, properties={}, bbox=None) + asset = Asset(href=os.path.join("/", "fake", id)) + new_item.add_asset(key=id, asset=asset) + + return new_item + + items = [create_item(i) for i in range(num_items)] + + single_item = create_item(num_items) + + output_dict: ItemDict = {"list_input": items, "single_input": single_item} + + return output_dict + + +@patch("vibe_agent.storage.asset_management.AssetManager") +def test_asset_handler_filename(mock_manager: MagicMock, item_dict: ItemDict): + expected_href = "changed!" + mock_manager.store.return_value = expected_href + asset_handler = AssetCopyHandler(mock_manager) + new_items = asset_handler.copy_assets(item_dict) + + for items in new_items.values(): + if isinstance(items, list): + for i in items: + for a in i.get_assets().values(): + assert a.href == expected_href + else: + for a in items.get_assets().values(): + assert a.href == expected_href + + +@patch("vibe_agent.storage.CosmosStorage._store_data") +def test_cosmos_storage_split(mock_handle: MagicMock): + fake_exception = CosmosHttpResponseError(status_code=413) + mock_handle.side_effect = [fake_exception, fake_exception, None] + items = { + "test_data": [ + DataVibe( + id=f"{i}", + time_range=(datetime.utcnow(), datetime.utcnow()), + geometry=shpg.mapping(shpg.box(0, 0, 1, 1)), + assets=[], + ) + for i in range(10) + ] + } + converter = StacConverter() + # `DataVibe` inherits from `BaseVibe` so the below should work fine, but + # pyright/pylance don't like it. + test_items: ItemDict = {k: converter.to_stac_item(v) for k, v in items.items()} # type: ignore + storage = CosmosStorage( + key="", + asset_manager=None, # type: ignore + stac_container_name="", + cosmos_database_name="", + cosmos_url="", + ) + cache_info = CacheInfo("test_op", "1.0", {}, {}) + storage.store("test_run", test_items, cache_info) + assert mock_handle.call_count == 3 + assert len(mock_handle.call_args_list[0].args[2][0]["items"]) == 10 + assert len(mock_handle.call_args_list[1].args[2][0]["items"]) == 5 + assert len(mock_handle.call_args_list[2].args[2][0]["items"]) == 3 diff --git a/src/vibe_agent/tests/test_uri_handling.py b/src/vibe_agent/tests/test_uri_handling.py new file mode 100644 index 00000000..bbe97e5e --- /dev/null +++ b/src/vibe_agent/tests/test_uri_handling.py @@ -0,0 +1,40 @@ +import os +from pathlib import Path + +import pytest +from azure.storage.blob import ContainerClient + +from vibe_agent.storage.file_upload import upload_to_blob +from vibe_core.uri import is_local, local_uri_to_path, uri_to_filename + + +@pytest.fixture +def filename(local_file: str): + return os.path.basename(local_file) + + +@pytest.mark.parametrize("local_file_ref", ["path", "uri"], indirect=True) +def test_filename_from_local_file(filename: str, local_file_ref: str): + assert is_local(local_file_ref) + assert uri_to_filename(local_file_ref) == filename + + +@pytest.fixture(scope="module") +def remote_file(source_container: ContainerClient, local_file: str): + filename = os.path.basename(local_file) + blob = source_container.get_blob_client(filename) + upload_to_blob(local_file, blob, overwrite=True) + return blob + + +def test_local_uri_to_path(): + abs_path = "/abs/path/to/file" + assert is_local(abs_path) + assert local_uri_to_path(abs_path) == abs_path + assert local_uri_to_path(Path(abs_path).as_uri()) == abs_path + rel_path = "rel/path/to/file" + assert is_local(rel_path) + assert local_uri_to_path(rel_path) == rel_path + abs_from_rel = local_uri_to_path(Path(rel_path).absolute().as_uri()) + assert abs_from_rel == os.path.abspath(rel_path) + assert os.path.relpath(abs_from_rel, ".") == rel_path diff --git a/src/vibe_agent/vibe_agent/__init__.py b/src/vibe_agent/vibe_agent/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/vibe_agent/vibe_agent/agent_config.py b/src/vibe_agent/vibe_agent/agent_config.py new file mode 100644 index 00000000..782c154f --- /dev/null +++ b/src/vibe_agent/vibe_agent/agent_config.py @@ -0,0 +1,93 @@ +import logging +import os + +import debugpy +from hydra_zen import builds + +from vibe_agent.storage.asset_management import BlobAssetManagerConfig +from vibe_agent.storage.local_storage import ( + LocalFileAssetManagerConfig, + LocalStorageConfig, +) +from vibe_agent.storage.remote_storage import CosmosStorageConfig +from vibe_common.constants import ( + DEFAULT_ASSET_PATH, + DEFAULT_CATALOG_PATH, + DEFAULT_SECRET_STORE_NAME, +) +from vibe_common.secret_provider import DaprSecretConfig + + +def setup_debug(activate: bool = False, port: int = 5678): + if not activate: + return + + debugpy.listen(port) + logging.info(f"Debugger enabled and listening on port {port}") + + +DebugConfig = builds(setup_debug, populate_full_signature=True) + +local_storage = LocalStorageConfig( + local_path=DEFAULT_CATALOG_PATH, + asset_manager=LocalFileAssetManagerConfig(DEFAULT_ASSET_PATH), +) + +stac_cosmos_uri = DaprSecretConfig( + store_name=DEFAULT_SECRET_STORE_NAME, + secret_name=os.environ["STAC_COSMOS_URI_SECRET"], + key_name=os.environ["STAC_COSMOS_URI_SECRET"], +) + +stac_cosmos_key = DaprSecretConfig( + store_name=DEFAULT_SECRET_STORE_NAME, + secret_name=os.environ["STAC_COSMOS_CONNECTION_KEY_SECRET"], + key_name=os.environ["STAC_COSMOS_CONNECTION_KEY_SECRET"], +) + +stac_cosmos_db = DaprSecretConfig( + store_name=DEFAULT_SECRET_STORE_NAME, + secret_name=os.environ["STAC_COSMOS_DATABASE_NAME_SECRET"], + key_name=os.environ["STAC_COSMOS_DATABASE_NAME_SECRET"], +) + +stac_cosmos_container = DaprSecretConfig( + store_name=DEFAULT_SECRET_STORE_NAME, + secret_name=os.environ["STAC_CONTAINER_NAME_SECRET"], + key_name=os.environ["STAC_CONTAINER_NAME_SECRET"], +) + +try: + storage_account_url = DaprSecretConfig( + store_name=DEFAULT_SECRET_STORE_NAME, + secret_name=os.environ["BLOB_STORAGE_ACCOUNT_URL"], + key_name=os.environ["BLOB_STORAGE_ACCOUNT_URL"], + ) +except Exception: + storage_account_url = "" + +try: + storage_account_connection_string = DaprSecretConfig( + store_name=DEFAULT_SECRET_STORE_NAME, + secret_name=os.environ["BLOB_STORAGE_ACCOUNT_CONNECTION_STRING"], + key_name=os.environ["BLOB_STORAGE_ACCOUNT_CONNECTION_STRING"], + ) +except Exception: + storage_account_connection_string = "" + + +aks_asset_manager = BlobAssetManagerConfig( + storage_account_url=storage_account_url, + storage_account_connection_string=storage_account_connection_string, + asset_container_name=os.environ["BLOB_CONTAINER_NAME"], + credential=None, + max_upload_concurrency=6, +) + +aks_cosmos_config = CosmosStorageConfig( + key=stac_cosmos_key, + asset_manager=aks_asset_manager, + stac_container_name=stac_cosmos_container, + cosmos_database_name=stac_cosmos_db, + cosmos_url=stac_cosmos_uri, +) diff --git a/src/vibe_agent/vibe_agent/cache.py b/src/vibe_agent/vibe_agent/cache.py new file mode 100644 index 00000000..cbc42bfb --- /dev/null +++ b/src/vibe_agent/vibe_agent/cache.py @@ -0,0 +1,240 @@ +import asyncio +import logging +import os +from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor +from typing import List, Optional, cast + +from cloudevents.sdk.event import v1 +from dapr.conf import settings +from dapr.ext.grpc import App, TopicEventResponse +from hydra_zen import builds +from opentelemetry import trace + +from vibe_common.constants import CACHE_PUBSUB_TOPIC, CONTROL_STATUS_PUBSUB, STATUS_PUBSUB_TOPIC +from vibe_common.dapr import dapr_ready +from vibe_common.messaging import ( + ExecuteRequestContent, + ExecuteRequestMessage, + WorkMessage, + WorkMessageBuilder, + accept_or_fail_event, + event_to_work_message, + extract_message_header_from_event, + send, +) +from vibe_common.schemas import CacheInfo, OperationSpec, OpRunId +from vibe_common.telemetry import ( + add_span_attributes, + add_trace, + get_current_trace_parent, + setup_telemetry, + update_telemetry_context, +) +from vibe_core.data.core_types import OpIOType +from vibe_core.logconfig import LOG_BACKUP_COUNT, MAX_LOG_FILE_BYTES, configure_logging + +from .cache_metadata_store_client import CacheMetadataStoreClient +from .ops import OperationDependencyResolver +from .ops_helper import OpIOConverter +from .storage.storage import Storage, StorageConfig +from .worker import WorkerMessenger + + +def get_cache_info( + dependency_resolver: OperationDependencyResolver, + input_items: OpIOType, + op_config: OperationSpec, + traceparent: str, +) -> CacheInfo: + # We need traceparent here as abstract event loop mess up the opentelemetry context + update_telemetry_context(traceparent) + + with trace.get_tracer(__name__).start_as_current_span("get_cache_info"): + dependencies = dependency_resolver.resolve(op_config) + stac = OpIOConverter.deserialize_input(input_items) + cache_info = CacheInfo(op_config.name, op_config.version, stac, dependencies) + return cache_info + + +class Cache: + pubsubname: str + pre_control_topic: str + otel_service_name: str + + def __init__( + self, + storage: Storage, + port: int = settings.GRPC_APP_PORT, + pubsubname: str = CONTROL_STATUS_PUBSUB, + cache_topic: str = CACHE_PUBSUB_TOPIC, + status_topic: str = STATUS_PUBSUB_TOPIC, + logdir: Optional[str] = None, + max_log_file_bytes: int = MAX_LOG_FILE_BYTES, + log_backup_count: int = LOG_BACKUP_COUNT, + loglevel: Optional[str] = None, + otel_service_name: str = "", + running_on_azure: bool = False, + ): + self.storage = storage + self.pubsubname = pubsubname + self.cache_topic = cache_topic + self.port = port + self.dependency_resolver = OperationDependencyResolver() + self.messenger = WorkerMessenger(pubsubname, status_topic) + self.metadata_store = CacheMetadataStoreClient() + self.logdir = logdir + self.loglevel = loglevel + self.otel_service_name = otel_service_name + self.max_log_file_bytes = max_log_file_bytes + self.log_backup_count = log_backup_count + self.executor = ThreadPoolExecutor() if running_on_azure else ProcessPoolExecutor() + self.running_on_azure = running_on_azure + logging.debug(f"Running on azure? {self.running_on_azure}") + logging.debug(f"Pool type: {type(self.executor)}") + + def retrieve_possible_output( + self, cache_info: CacheInfo, exec: Executor, traceparent: str + ) -> Optional[OpIOType]: + possible_output = self.storage.retrieve_output_from_input_if_exists(cache_info) + # We need traceparent here as abstract event loop mess up the opentelemetry context + update_telemetry_context(traceparent) + + with trace.get_tracer(__name__).start_as_current_span("retrieve_possible_output"): + if possible_output: + logging.info(f"Cache hit with hash {cache_info.hash} in op {cache_info.name}") + return OpIOConverter.serialize_output(possible_output) + logging.info(f"Cache miss with hash {cache_info.hash} in op {cache_info.name}") + return None + + @add_trace + def run_new_op(self, message: WorkMessage): + content = cast(ExecuteRequestContent, message.content) + add_span_attributes({"op_name": str(content.operation_spec.name)}) + send( + message, + self.__class__.__name__.lower(), + self.pubsubname, + content.operation_spec.image_name, + ) + + msg = ( + f"Sending new operation to worker. " + f"Op: {content.operation_spec.name}, " + f"Params: {content.operation_spec.parameters}, " + f"Input: {content.operation_spec.inputs_spec}" + ) + + logging.info(msg) + + def fetch_work(self, event: v1.Event) -> TopicEventResponse: + @add_trace + def success_callback(message: WorkMessage) -> TopicEventResponse: + add_span_attributes({"run_id": str(message.header.run_id)}) + content = cast(ExecuteRequestContent, message.content) + op_config = cast(OperationSpec, content.operation_spec) + recursion_msg = f"Recursion error for op {op_config.name} - restarting pod." + try: + try: + cache_info = get_cache_info( + self.dependency_resolver, + content.input, + op_config, + get_current_trace_parent(), + ) + except RecursionError as e: + logging.error(f"{recursion_msg} {e}") + os._exit(1) + except Exception as e: + raise RuntimeError( + f"Failed to get cache info for op {op_config.name} with exception " + f"{type(e)}:{e}" + ) from e + possible_output = self.retrieve_possible_output( + cache_info, self.executor, get_current_trace_parent() + ) + + async def async_closure(): + if possible_output is not None: + await self.metadata_store.add_refs( + str(message.run_id), + OpRunId(name=cache_info.name, hash=cache_info.hash), + possible_output, + ) + logging.info(f"Cache hit for op {op_config.name}") + await self.messenger.send_ack_reply(message) + await self.messenger.send_success_reply( + message, possible_output, cache_info + ) + else: + self.run_new_op( + WorkMessageBuilder.add_cache_info_to_execute_request( + cast(ExecuteRequestMessage, message), cache_info + ) + ) + + asyncio.run(async_closure()) + except RecursionError as e: + logging.error(f"{recursion_msg} {e}") + os._exit(1) + + logging.debug(f"Removing message for run_id {message.header.run_id} from queue") + return TopicEventResponse("success") + + @add_trace + def failure_callback(event: v1.Event, e: Exception, tb: List[str]) -> TopicEventResponse: + message = event_to_work_message(event) + content = cast(ExecuteRequestContent, message.content) + op_config = cast(OperationSpec, content.operation_spec) + log_text = f"Failure callback for op {op_config.name}, Exception {e}, Traceback {tb}" + logging.info(log_text) + # Send failure reply to orchestrator so we don't get our workflow stuck + asyncio.run(self.messenger.send_failure_reply(event.id, e, tb)) + return TopicEventResponse("drop") + + update_telemetry_context(extract_message_header_from_event(event).current_trace_parent) + + with trace.get_tracer(__name__).start_as_current_span("fetch_work"): + return accept_or_fail_event(event, success_callback, failure_callback) # type: ignore + + def run(self): + self.app = App() + + appname = f"terravibes-{self.__class__.__name__.lower()}" + configure_logging( + default_level=self.loglevel, + appname=appname, + logdir=self.logdir, + max_log_file_bytes=self.max_log_file_bytes, + log_backup_count=self.log_backup_count, + logfile=f"{appname}.log", + ) + + if self.otel_service_name: + setup_telemetry(appname, self.otel_service_name) + + @self.app.subscribe(self.pubsubname, self.cache_topic) + def fetch_work(event: v1.Event) -> TopicEventResponse: + return self.fetch_work(event) + + self.start_service() + + @dapr_ready + def start_service(self): + logging.info(f"Starting cache listening on port {self.port}") + self.app.run(self.port) + + +CacheConfig = builds( + Cache, + storage=StorageConfig, + port=settings.GRPC_APP_PORT, + pubsubname=CONTROL_STATUS_PUBSUB, + cache_topic=CACHE_PUBSUB_TOPIC, + status_topic=STATUS_PUBSUB_TOPIC, + logdir=None, + max_log_file_bytes=MAX_LOG_FILE_BYTES, + log_backup_count=LOG_BACKUP_COUNT, + loglevel=None, + otel_service_name="", + running_on_azure=False, +) diff --git a/src/vibe_agent/vibe_agent/cache_metadata_store.py b/src/vibe_agent/vibe_agent/cache_metadata_store.py new file mode 100644 index 00000000..9c4f186c --- /dev/null +++ b/src/vibe_agent/vibe_agent/cache_metadata_store.py @@ -0,0 +1,255 @@ +import logging +from typing import Dict, Protocol, Set + +from hydra_zen import builds +from redis.asyncio import Redis +from redis.asyncio.retry import Retry as RedisRetry +from redis.backoff import DEFAULT_BASE, DEFAULT_CAP, ExponentialBackoff # type: ignore +from redis.exceptions import BusyLoadingError, ConnectionError, TimeoutError + +from vibe_common.schemas import OpRunId +from vibe_common.secret_provider import retrieve_dapr_secret + + +class CacheMetadataStoreProtocol(Protocol): + """ + Protocol for a cache metadata store. This store is used to store and retrieve metadata about + the relationships of the data (i.e. workflow runs, operation runs and assets) in the cache. + """ + + async def store_references(self, run_id: str, op_run_id: OpRunId, assets: Set[str]) -> None: ... + + async def get_run_ops(self, run_id: str) -> Set[OpRunId]: ... + + async def get_op_workflow_runs(self, op_ref: OpRunId) -> Set[str]: ... + + async def get_op_assets(self, op_ref: OpRunId) -> Set[str]: ... + + async def get_assets_refs(self, asset_ids: Set[str]) -> Dict[str, Set[OpRunId]]: ... + + async def remove_workflow_op_refs( + self, workflow_run_id: str, op_run_ref: OpRunId + ) -> Set[str]: ... + + async def remove_op_asset_refs(self, op_run_ref: OpRunId, asset_ids: Set[str]) -> None: ... + + +class RedisCacheMetadataStore(CacheMetadataStoreProtocol): + """ + Redis implementation of the cache metadata store. + """ + + # TODO: pass redis service name, namespace, and port through Terraform... + _redis_host = "redis-master.default.svc.cluster.local" + _redis_port = 6379 + _key_delimiter = ":" + _run_ops_key_format = "run:{run_id}:ops" + _op_runs_key_format = "op:{op_name}:{op_hash}:runs" + _op_assets_key_format = "op:{op_name}:{op_hash}:assets" + _asset_ops_key_format = "asset:{asset_id}:ops" + _op_ref_format = "{op_name}:{op_hash}" + + def __init__(self): + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self.redis_password = retrieve_dapr_secret("kubernetes", "redis", "redis-password") + + async def _get_redis_client(self): + self.logger.debug( + f"Creating Redis client with host {self._redis_host} and port {self._redis_port}" + ) + retry = RedisRetry(ExponentialBackoff(cap=DEFAULT_CAP, base=DEFAULT_BASE), 3) + redis_client = Redis( + host=self._redis_host, + port=self._redis_port, + db=0, + password=self.redis_password, + decode_responses=True, + retry=retry, + retry_on_error=[ConnectionError, TimeoutError, BusyLoadingError], + ) # type: ignore + response = await redis_client.ping() + self.logger.debug(f"Created redis client - ping response: {response}") + return redis_client + + def _op_run_id_to_op_ref_str(self, op_run_id: OpRunId) -> str: + return self._op_ref_format.format(op_name=op_run_id.name, op_hash=op_run_id.hash) + + def _str_to_op_run_id(self, op_run_ref_str: str) -> OpRunId: + op_name, op_hash = op_run_ref_str.split(self._key_delimiter) + return OpRunId(name=op_name, hash=op_hash) + + async def store_references(self, run_id: str, op_run_id: OpRunId, assets: Set[str]) -> None: + # TODO: is a new client needed for every operation or can we intiate in init and reuse? + redis_client = await self._get_redis_client() + + try: + pipe = redis_client.pipeline(transaction=True) + + run_ops_key = self._run_ops_key_format.format(run_id=run_id) + op_ref = self._op_run_id_to_op_ref_str(op_run_id) + pipe.sadd(run_ops_key, op_ref) + + op_runs_key = self._op_runs_key_format.format( + op_name=op_run_id.name, op_hash=op_run_id.hash + ) + pipe.sadd(op_runs_key, run_id) + + if assets: + op_assets_key = self._op_assets_key_format.format( + op_name=op_run_id.name, op_hash=op_run_id.hash + ) + pipe.sadd(op_assets_key, *assets) + + for asset_id in assets: + asset_ops_key = self._asset_ops_key_format.format(asset_id=asset_id) + pipe.sadd(asset_ops_key, op_ref) + + await pipe.execute() + self.logger.debug( + f"Transaction complete for storing references for run id {run_id} " + f"(op name {op_run_id.name}, op hash {op_run_id.hash})." + ) + finally: + await redis_client.close() + + async def get_run_ops(self, run_id: str) -> Set[OpRunId]: + """ + Given a workflow run_id, return the set of op run references associated with that workflow + run as strings in the format "{op_name}:{op_hash}". + + :param run_id: The workflow run id + + :return: The set of op runs associated with the workflow run in the format + "{op_name}:{op_hash}" + """ + redis_client = await self._get_redis_client() + try: + run_ops_key = self._run_ops_key_format.format(run_id=run_id) + run_ops = await redis_client.smembers(run_ops_key) + return {self._str_to_op_run_id(o) for o in run_ops} + finally: + await redis_client.close() + + async def get_op_workflow_runs(self, op_run_id: OpRunId) -> Set[str]: + """ + Given an op run reference, return the set of workflow run ids associated with the op run. + + :param op_ref: The op run reference + + :return: The set of workflow run ids associated with the op run + """ + redis_client = await self._get_redis_client() + try: + op_runs_key = self._op_runs_key_format.format( + op_name=op_run_id.name, op_hash=op_run_id.hash + ) + return await redis_client.smembers(op_runs_key) + finally: + await redis_client.close() + + async def get_op_assets(self, op_ref: OpRunId) -> Set[str]: + """ + Given an op run reference, return the set of asset ids associated with the op run. + + :param op_ref: The op run reference + + :return: The set of asset ids associated with the op run + """ + redis_client = await self._get_redis_client() + try: + op_assets_key = self._op_assets_key_format.format( + op_name=op_ref.name, op_hash=op_ref.hash + ) + return await redis_client.smembers(op_assets_key) + finally: + await redis_client.close() + + async def get_assets_refs(self, asset_ids: Set[str]) -> Dict[str, Set[OpRunId]]: + """ + Given a list of asset ids, return the set of op run references associated with each asset. + + :param op_ref: The list of asset ids + + :return: A dictionary mapping asset ids to the set of op run references associated with + each asset + """ + redis_client = await self._get_redis_client() + + try: + pipe = redis_client.pipeline(transaction=False) + asset_ids_list = list(asset_ids) + + for asset_id in asset_ids_list: + asset_ops_key = self._asset_ops_key_format.format(asset_id=asset_id) + pipe.smembers(asset_ops_key) + + assets_smembers_result = await pipe.execute() + + results = {} + + for asset_id, asset_smembers in zip(asset_ids_list, assets_smembers_result): + results[asset_id] = [self._str_to_op_run_id(o) for o in asset_smembers] + + return results + finally: + await redis_client.close() + + async def remove_workflow_op_refs(self, workflow_run_id: str, op_run_ref: OpRunId) -> None: + """ + Removes the references between a workflow run and op run. + + :param workflow_run_id: The workflow run id + :param op_ref: The op run reference + """ + redis_client = await self._get_redis_client() + try: + pipe = redis_client.pipeline(transaction=True) + run_ops_key = self._run_ops_key_format.format(run_id=workflow_run_id) + op_ref = self._op_ref_format.format(op_name=op_run_ref.name, op_hash=op_run_ref.hash) + pipe.srem(run_ops_key, op_ref) + + op_runs_key = self._op_runs_key_format.format( + op_name=op_run_ref.name, op_hash=op_run_ref.hash + ) + pipe.srem(op_runs_key, workflow_run_id) + + await pipe.execute() + # TODO: check response for number of members removed and emit warning if not 1 + finally: + await redis_client.close() + + async def remove_op_asset_refs(self, op_run_id: OpRunId, asset_ids: Set[str]) -> None: + # TODO: the following commands could likely be more efficiently performed by invoking a Lua + # script that retrieves the op run, iterates through all of the assets ids and removes the + # asset --> op references and then deletes the op key as well + redis_client = await self._get_redis_client() + try: + pipe = redis_client.pipeline(transaction=True) + op_assets_key = self._op_assets_key_format.format( + op_name=op_run_id.name, op_hash=op_run_id.hash + ) + + for asset_id in asset_ids: + pipe.srem(op_assets_key, asset_id) + + asset_ops_key = self._asset_ops_key_format.format(asset_id=asset_id) + op_run_ref = self._op_ref_format.format( + op_name=op_run_id.name, op_hash=op_run_id.hash + ) + pipe.srem(asset_ops_key, op_run_ref) + + await pipe.execute() + # TODO: check response for number removed and emit warning if doesn't make sense + finally: + await redis_client.close() + + +CacheMetadataStoreProtocolConfig = builds( + CacheMetadataStoreProtocol, +) + +RedisCacheMetadataStoreConfig = builds( + RedisCacheMetadataStore, + builds_bases=(CacheMetadataStoreProtocolConfig,), + # config={"redis_url": getenv("REDIS_URL", "redis://localhost:6379")} +) diff --git a/src/vibe_agent/vibe_agent/cache_metadata_store_client.py b/src/vibe_agent/vibe_agent/cache_metadata_store_client.py new file mode 100644 index 00000000..84522e07 --- /dev/null +++ b/src/vibe_agent/vibe_agent/cache_metadata_store_client.py @@ -0,0 +1,38 @@ +import logging + +from vibe_common.constants import DATA_OPS_INVOKE_URL_TEMPLATE +from vibe_common.schemas import OpRunId, OpRunIdDict +from vibe_common.telemetry import get_current_trace_parent +from vibe_common.vibe_dapr_client import VibeDaprClient +from vibe_core.data.core_types import OpIOType + + +class CacheMetadataStoreClient: + def __init__(self): + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self.logger.debug("Initializing CacheMetadataStoreClient") + self.vibe_dapr_client = VibeDaprClient() + + async def add_refs( + self, + run_id: str, + op_run_id: OpRunId, + output: OpIOType, + ) -> None: + self.logger.debug( + f"Adding refs for run {run_id} with op name = {op_run_id.name} " + f"op hash = {op_run_id.hash}" + ) + + # Under load, Pydantic is having issues serializing the OpRunId dataclass object + op_run_id_dict = OpRunIdDict(name=op_run_id.name, hash=op_run_id.hash) + response = await self.vibe_dapr_client.post( + url=DATA_OPS_INVOKE_URL_TEMPLATE.format("add_refs", run_id), + data={ + "op_run_id_dict": self.vibe_dapr_client.obj_json(op_run_id_dict), + "output": self.vibe_dapr_client.obj_json(output), + }, + traceparent=get_current_trace_parent(), + ) + + assert response.ok, "Failed to add refs, but underlying method didn't capture it" diff --git a/src/vibe_agent/vibe_agent/data_ops.py b/src/vibe_agent/vibe_agent/data_ops.py new file mode 100644 index 00000000..cc1b5e31 --- /dev/null +++ b/src/vibe_agent/vibe_agent/data_ops.py @@ -0,0 +1,359 @@ +import asyncio +import logging +from typing import List, Optional, Set, cast + +from aiorwlock import RWLock +from cloudevents.sdk.event import v1 +from dapr.conf import settings +from fastapi import Request +from hydra_zen import builds +from opentelemetry import trace + +from vibe_agent.cache_metadata_store import ( + CacheMetadataStoreProtocol, + CacheMetadataStoreProtocolConfig, +) +from vibe_agent.storage.storage import Storage, StorageConfig +from vibe_common.constants import ( + CONTROL_STATUS_PUBSUB, + STATUS_PUBSUB_TOPIC, + TRACEPARENT_HEADER_KEY, + WORKFLOW_REQUEST_PUBSUB_TOPIC, +) +from vibe_common.dapr import dapr_ready +from vibe_common.dropdapr import App, TopicEventResponse, TopicEventResponseStatus +from vibe_common.messaging import ( + ExecuteReplyContent, + MessageType, + WorkMessage, + accept_or_fail_event_async, + extract_message_header_from_event, + run_id_from_traceparent, +) +from vibe_common.schemas import OpRunId, OpRunIdDict +from vibe_common.statestore import StateStore +from vibe_common.telemetry import add_trace, setup_telemetry, update_telemetry_context +from vibe_core.data.core_types import OpIOType +from vibe_core.datamodel import RunConfig, RunStatus +from vibe_core.logconfig import LOG_BACKUP_COUNT, MAX_LOG_FILE_BYTES, configure_logging +from vibe_core.utils import ensure_list + + +class DataOpsManager: + """ + The DataOpsManager is responsible for managing metadata about the system's cached data and + coordinating data operations. + + Assumptions this code makes: + - Once a workflow run is complete, its metadata (i.e. `RunConfig` in StateStore) and cached + data in Storage is immutable outside of the DataOpsManager. + - Once a op run is complete its cached data (i.e. metadata/catalog) and assets in Storage + are immutable. + + Notes about locks: + - The way metadata_store_lock essentially serializes all requests to the metadata store + whether they be add ref or delete ref requests. To make this more efficient in the future, + we can create a lock that allows many add ref requests to go through at a time but only one + delete ref request to execute at a time. + """ + + app: App + metadata_store_lock: RWLock + otel_service_name: str + statestore_lock: asyncio.Lock + + user_deletion_reason = "Deletion requested by user" + + def __init__( + self, + storage: Storage, + metadata_store: CacheMetadataStoreProtocol, + pubsubname: str = CONTROL_STATUS_PUBSUB, + status_topic: str = STATUS_PUBSUB_TOPIC, + delete_workflow_topic: str = WORKFLOW_REQUEST_PUBSUB_TOPIC, + port: int = settings.HTTP_APP_PORT, + logdir: Optional[str] = None, + max_log_file_bytes: int = MAX_LOG_FILE_BYTES, + log_backup_count: int = LOG_BACKUP_COUNT, + loglevel: Optional[str] = None, + otel_service_name: str = "", + ): + self.app = App() + self.port = port + self.pubsubname = pubsubname + self.status_topic = status_topic + self.delete_workflow_topic = delete_workflow_topic + self.storage = storage + self.metadata_store = metadata_store + self.statestore = StateStore() + self.logdir = logdir + self.max_log_file_bytes = max_log_file_bytes + self.log_backup_count = log_backup_count + self.loglevel = loglevel + self.otel_service_name = otel_service_name + + self._setup_routes() + + def _init_locks(self): + logging.debug("Creating locks") + self.metadata_store_lock = RWLock(fast=True) + self.statestore_lock = asyncio.Lock() + + def _setup_routes(self): + @self.app.startup() + def startup(): + # locks have to be be created on the app's (uvicorn's) event loop + self._init_locks() + + @self.app.subscribe_async(self.pubsubname, self.status_topic) + async def fetch_work(event: v1.Event) -> TopicEventResponse: + return await self.fetch_work(self.status_topic, event) + + @self.app.subscribe_async(self.pubsubname, self.delete_workflow_topic) + async def manage_workflow(event: v1.Event): + await self.handle_workflow_event(self.delete_workflow_topic, event) + + @self.app.method(name="add_refs/{run_id}") + async def add_refs( + request: Request, run_id: str, op_run_id_dict: OpRunIdDict, output: OpIOType + ) -> TopicEventResponse: + try: + traceparent = request.headers.get(TRACEPARENT_HEADER_KEY) + if traceparent: + update_telemetry_context(traceparent) + else: + logging.warning("No traceparent found in request headers.") + + with trace.get_tracer(__name__).start_as_current_span("add_refs"): + await self.add_references(run_id, OpRunId(**op_run_id_dict), output) + return TopicEventResponseStatus.success + except Exception as e: + logging.error( + f"Error adding references from service invocation for run id {run_id}: {e}" + ) + return TopicEventResponseStatus.drop + + async def fetch_work(self, channel: str, event: v1.Event) -> TopicEventResponse: + @add_trace + async def success_callback(message: WorkMessage) -> TopicEventResponse: + if not message.is_valid_for_channel(channel): + logging.warning( + f"Received invalid message {message} for channel {channel}. Dropping it." + ) + return TopicEventResponseStatus.drop + + if message.header.type == MessageType.execute_reply: + content = cast(ExecuteReplyContent, message.content) + logging.debug( + f"Received execute reply for run id {message.run_id} " + f"(op name {content.cache_info.name}, op hash {content.cache_info.hash})." + ) + + run_id = str(message.run_id) + op_run_id = OpRunId(content.cache_info.name, content.cache_info.hash) + await self.add_references(run_id, op_run_id, content.output) + + return TopicEventResponseStatus.success + + @add_trace + async def failure_callback( + event: v1.Event, e: Exception, traceback: List[str] + ) -> TopicEventResponse: + run_id = str(run_id_from_traceparent(event.id)) + logging.error(f"Failed to add references for run id {run_id}: {e}") + return TopicEventResponseStatus.drop + + update_telemetry_context(extract_message_header_from_event(event).current_trace_parent) + with trace.get_tracer(__name__).start_as_current_span("fetch_work"): + return await accept_or_fail_event_async(event, success_callback, failure_callback) + + async def handle_workflow_event(self, channel: str, event: v1.Event): + async def success_callback(message: WorkMessage) -> TopicEventResponse: + if not message.is_valid_for_channel(channel): + logging.warning( + f"Received invalid message {message} for channel {channel}. Dropping it." + ) + return TopicEventResponseStatus.drop + + if message.header.type == MessageType.workflow_deletion_request: + logging.debug(f"Received deletion request for run id {message.run_id}.") + + run_id = str(message.run_id) + await self.delete_workflow_run(run_id) + + return TopicEventResponseStatus.success + + async def failure_callback( + event: v1.Event, e: Exception, traceback: List[str] + ) -> TopicEventResponse: + run_id = str(run_id_from_traceparent(event.id)) + logging.error(f"Failed to delete run id {run_id}: {e}") + return TopicEventResponseStatus.drop + + return await accept_or_fail_event_async(event, success_callback, failure_callback) + + def get_asset_ids(self, output: OpIOType) -> Set[str]: + """ + Given op output as a OpIOTypes, returns the set of asset ids that are referenced in the + output. + + :param output: The op output as OpIOType + + :return: The set of asset ids referenced in the output + """ + # TODO: this should probably be moved into vibe_core.utils + asset_ids: Set[str] = set() + for output_item in output.values(): + output_item_list = ensure_list(output_item) + for i in output_item_list: + asset_ids.update(i["assets"].keys()) + return asset_ids + + async def add_references(self, run_id: str, op_run_id: OpRunId, output: OpIOType) -> None: + # many requests to add references can be processed simultaneously assuming Redis SADD used + async with self.metadata_store_lock.reader_lock: + try: + asset_ids = self.get_asset_ids(output) + await self.metadata_store.store_references(run_id, op_run_id, asset_ids) + logging.info( + f"Successfully added references for run id {run_id} " + f"(op name {op_run_id.name}, op hash {op_run_id.hash})." + ) + except Exception: + logging.exception( + f"Failed to add references for run id {run_id} " + f"(op name {op_run_id.name}, op hash {op_run_id.hash})." + ) + raise + + def _can_delete(self, run_config: RunConfig) -> bool: + can_delete = RunStatus.finished(run_config.details.status) + + if not can_delete: + if run_config.details.status == RunStatus.deleting: + logging.warning( + f"Run {run_config.id} is already being deleted. Will not process request." + ) + elif run_config.details.status == RunStatus.deleted: + logging.warning( + f"Run {run_config.id} has already been deleted. Will not process request." + ) + else: + logging.warning( + f"Cannot delete run {run_config.id} with status {run_config.details.status}." + ) + + return can_delete + + async def _init_delete(self, run_id: str) -> bool: + async with self.statestore_lock: # type: ignore + # Using an async lock to ensure two deletion requests for the same workflow run don't + # get processed at the same time. + # The data ops manager will only delete a workflow if it is in a finished status. + # The assumption is once the workflow is finished, the RunConfig will not change in the + # statestore (i.e. the status will not change) outside of the Data Ops Manager so it is + # sufficient to use asyncio lock in the Data Ops manager. + run_data = await self.statestore.retrieve(str(run_id)) + run_config = RunConfig(**run_data) + + if not self._can_delete(run_config): + return False + + run_config.details.status = RunStatus.deleting + run_config.details.reason = self.user_deletion_reason + await self.statestore.store(run_id, run_config) + return True + + async def _finalize_delete(self, run_id: str) -> None: + async with self.statestore_lock: # type: ignore + run_data = await self.statestore.retrieve(str(run_id)) + run_config = RunConfig(**run_data) + run_config.details.status = RunStatus.deleted + run_config.set_output({}) + await self.statestore.store(run_id, run_config) + + async def delete_op_run(self, op_run: OpRunId) -> None: + # TODO: the following two calls may be able to be combined into one call to a Lua script + # (need to learn more about Lua scripts) + op_asset_ids = await self.metadata_store.get_op_assets(op_run) + assets_to_ops = await self.metadata_store.get_assets_refs(op_asset_ids) + + for asset_id in op_asset_ids: + asset_ops = assets_to_ops[asset_id] + + if op_run not in asset_ops: + logging.warning( + f"Inconsistent state in metadata store: asset {asset_id} does not contain " + f"reference to {op_run}." + ) + continue + + if len(asset_ops) == 1: + # TODO: aiofiles or ?? + logging.debug(f"Removing asset {asset_id} from storage.") + self.storage.asset_manager.remove(asset_id) + + # TODO: aiofiles or ?? + logging.debug(f"Removing op run catalog {op_run} from storage.") + self.storage.remove(op_run) + await self.metadata_store.remove_op_asset_refs(op_run, op_asset_ids) + + async def delete_workflow_run(self, run_id: str) -> bool: + if not await self._init_delete(run_id): + return False + + op_runs = await self.metadata_store.get_run_ops(run_id) + + for op_run in op_runs: + # (re)grabbing write lock for each op so as not to starve other requests due to delete + async with self.metadata_store_lock.writer_lock: + op_wf_run_ids = await self.metadata_store.get_op_workflow_runs(op_run) + + if run_id not in op_wf_run_ids: + logging.warning( + f"Inconsistent state in metadata store: op {op_run} does not contain " + f"reference to workflow run {run_id}." + ) + elif len(op_wf_run_ids) == 1: + await self.delete_op_run(op_run) + + await self.metadata_store.remove_workflow_op_refs(run_id, op_run) + + await self._finalize_delete(run_id) + return True + + async def run(self): + appname = "terravibes-data-ops" + configure_logging( + default_level=self.loglevel, + appname=appname, + logdir=self.logdir, + max_log_file_bytes=self.max_log_file_bytes, + log_backup_count=self.log_backup_count, + logfile=f"{appname}.log", + ) + + if self.otel_service_name: + setup_telemetry(appname, self.otel_service_name) + + await self.start_service() + + @dapr_ready + async def start_service(self): + logging.info(f"Starting data ops manager listening on port {self.port}") + await self.app.run_async(self.port) + + +DataOpsConfig = builds( + DataOpsManager, + port=settings.GRPC_APP_PORT, + pubsubname=CONTROL_STATUS_PUBSUB, + status_topic=STATUS_PUBSUB_TOPIC, + metadata_store=CacheMetadataStoreProtocolConfig, + storage=StorageConfig, + logdir=None, + max_log_file_bytes=MAX_LOG_FILE_BYTES, + log_backup_count=LOG_BACKUP_COUNT, + loglevel=None, + otel_service_name="", +) diff --git a/src/vibe_agent/vibe_agent/launch_cache.py b/src/vibe_agent/vibe_agent/launch_cache.py new file mode 100644 index 00000000..73623dee --- /dev/null +++ b/src/vibe_agent/vibe_agent/launch_cache.py @@ -0,0 +1,34 @@ +import asyncio +from typing import Any + +import hydra +from hydra.core.config_store import ConfigStore +from hydra_zen import instantiate, make_config + +from vibe_agent.agent_config import DebugConfig, aks_cosmos_config, local_storage +from vibe_agent.cache import CacheConfig + +local_cache = CacheConfig(storage=local_storage, running_on_azure=False) +aks_cache = CacheConfig(storage=aks_cosmos_config, running_on_azure=True) + +LocalCacheConfig = make_config(impl=local_cache) +AksCacheConfig = make_config(impl=aks_cache) + +CacheLaunchConfig = make_config( + "cache", + debug=DebugConfig(), + hydra_defaults=["_self_", {"cache": "local"}], +) + + +# Register cache config with hydra's config store +cs = ConfigStore.instance() +cs.store(group="cache", name="local", node=LocalCacheConfig()) +cs.store(group="cache", name="aks", node=AksCacheConfig()) +cs.store(name="vibe_cache", node=CacheLaunchConfig) + + +@hydra.main(config_path=None, version_base=None, config_name="vibe_cache") +def main(cfg: Any): + cache_obj = instantiate(cfg) + asyncio.run(cache_obj.cache.impl.run()) diff --git a/src/vibe_agent/vibe_agent/launch_data_ops.py b/src/vibe_agent/vibe_agent/launch_data_ops.py new file mode 100644 index 00000000..4083e2a8 --- /dev/null +++ b/src/vibe_agent/vibe_agent/launch_data_ops.py @@ -0,0 +1,54 @@ +import asyncio +from typing import Any + +import hydra +from hydra.core.config_store import ConfigStore +from hydra_zen import instantiate, make_config + +from vibe_agent.agent_config import DebugConfig, aks_cosmos_config, local_storage +from vibe_agent.cache_metadata_store import RedisCacheMetadataStoreConfig +from vibe_agent.data_ops import DataOpsConfig + +# Create instiatiatable configs for CacheMetadataStoreProtocol +redis_cache_metadata_store_config = RedisCacheMetadataStoreConfig() + +# create two DataOpsConfigs: one to build DataOpsManager with local storage and another for +# to build DataOpsManager with AKS/Cosmos storage +local_data_ops_config = DataOpsConfig( + metadata_store=redis_cache_metadata_store_config, storage=local_storage +) +aks_data_ops_config = DataOpsConfig( + metadata_store=redis_cache_metadata_store_config, storage=aks_cosmos_config +) + +# two configs each with one field, impl, one set to the DataOpsConfig for local storage, the +# other for AKS/Cosmos +LocalDataOpsConfig = make_config(impl=local_data_ops_config) +AksDataOpsConfig = make_config(impl=aks_data_ops_config) + +# launching the data ops service has two parts that need to be configured: +# 1. whether or not we are debugging the service +# 2. should the DataOpsManager be referencing local storage or a AKS/Cosmos storage +# - by default, it will use the "local" entry in the "data_ops" group in the ConfigStore as the +# default config for the data_ops field +DataOpsLaunchConfig = make_config( + "data_ops", + debug=DebugConfig(), + hydra_defaults=["_self_", {"data_ops": "local"}], +) + +# Register configs config with hydra's config store +cs = ConfigStore.instance() +cs.store(group="data_ops", name="local", node=LocalDataOpsConfig) +cs.store(group="data_ops", name="aks", node=AksDataOpsConfig) +cs.store(name="vibe_data_ops", node=DataOpsLaunchConfig) + + +# The @hydra_main decorator in Hydra resolves all missing configurations from the top-level +# configuration using entries in the config store. If a configuration value is missing, Hydra +# will search the config store for a matching key and use the value stored in the config store +# if one is found. +@hydra.main(config_path=None, version_base=None, config_name="vibe_data_ops") +def main(cfg: Any): + data_ops_launch_config_obj = instantiate(cfg) + asyncio.run(data_ops_launch_config_obj.data_ops.impl.run()) diff --git a/src/vibe_agent/vibe_agent/launch_worker.py b/src/vibe_agent/vibe_agent/launch_worker.py new file mode 100644 index 00000000..fa9b7d19 --- /dev/null +++ b/src/vibe_agent/vibe_agent/launch_worker.py @@ -0,0 +1,43 @@ +import asyncio +import signal +from multiprocessing import set_start_method +from typing import Any + +import hydra +from hydra.core.config_store import ConfigStore +from hydra_zen import instantiate, make_config + +from vibe_agent.agent_config import DebugConfig, aks_cosmos_config, local_storage +from vibe_agent.ops import OperationFactoryConfig +from vibe_common.secret_provider import DaprSecretProviderConfig + +from .worker import WorkerConfig + +local_worker = WorkerConfig( + factory_spec=OperationFactoryConfig(local_storage, DaprSecretProviderConfig()), +) +aks_worker = WorkerConfig( + factory_spec=OperationFactoryConfig(aks_cosmos_config, DaprSecretProviderConfig()), +) + +LocalWorkerConfig = make_config(impl=local_worker) +AksWorkerConfig = make_config(impl=aks_worker) + +WorkerLaunchConfig = make_config( + "worker", + debug=DebugConfig(), + hydra_defaults=["_self_", {"worker": "local"}], +) + +cs = ConfigStore.instance() +cs.store(group="worker", name="local", node=LocalWorkerConfig()) +cs.store(group="worker", name="aks", node=AksWorkerConfig()) +cs.store(name="vibe_worker", node=WorkerLaunchConfig) + + +@hydra.main(config_path=None, version_base=None, config_name="vibe_worker") +def main(cfg: Any): + set_start_method("forkserver") + worker_obj = instantiate(cfg) + signal.signal(signal.SIGTERM, worker_obj.worker.impl.pre_stop_hook) + asyncio.run(worker_obj.worker.impl.run()) diff --git a/src/vibe_agent/vibe_agent/ops.py b/src/vibe_agent/vibe_agent/ops.py new file mode 100644 index 00000000..ae652dcd --- /dev/null +++ b/src/vibe_agent/vibe_agent/ops.py @@ -0,0 +1,237 @@ +import importlib.util +import inspect +import logging +import os +from importlib.abc import Loader +from typing import Any, Callable, Dict, List, Optional, Union + +from azure.cosmos.exceptions import CosmosResourceExistsError +from hydra_zen import builds + +from vibe_agent.ops_helper import OpIOConverter +from vibe_agent.storage.local_storage import LocalResourceExistsError +from vibe_common.schemas import ( + CacheInfo, + EntryPointDict, + ItemDict, + OperationParser, + OperationSpec, + OpResolvedDependencies, +) +from vibe_common.secret_provider import SecretProvider, SecretProviderConfig +from vibe_core import data +from vibe_core.data.core_types import BaseVibeDict, InnerIOType, OpIOType, TypeDictVibe + +from .storage import Storage, StorageConfig + + +class Operation: + name: str + callback: Callable[..., BaseVibeDict] + storage: Storage + converter: data.StacConverter + inputs_spec: TypeDictVibe + output_spec: TypeDictVibe + version: str + + def __init__( + self, + name: str, + callback: Callable[..., BaseVibeDict], + storage: Storage, + converter: data.StacConverter, + inputs_spec: TypeDictVibe, + output_spec: TypeDictVibe, + version: str, + ): + self.name = name + self.callback = callback + self.storage = storage + self.converter = converter + self.inputs_spec = inputs_spec + self.output_spec = output_spec + self.version = version + self.logger = logging.getLogger(self.__class__.__name__) + + intersection = set(inputs_spec.keys()).intersection(output_spec.keys()) + if intersection: + raise ValueError( + f"Operation {name} has input and output with conflicting names {intersection}" + ) + + def _fetch_from_cache(self, cache_info: CacheInfo) -> Optional[OpIOType]: + """ + Try to fetch output from the cache, returns `None` if no output is found + """ + items = self.storage.retrieve_output_from_input_if_exists(cache_info) + if items is not None: + items = OpIOConverter.serialize_output(items) + return items + + def _call_validate_op(self, **kwargs: InnerIOType) -> ItemDict: + results = self.callback(**kwargs) + result_keys = set(results) + output_keys = set(self.output_spec) + if result_keys != output_keys: + raise RuntimeError( + f"Invalid output obtained during execution of op '{self.name}'. " + f"Expected output keys {output_keys}, but callback returned {result_keys}" + ) + try: + return {k: self.converter.to_stac_item(v) for k, v in results.items()} + except AttributeError: + raise ValueError( + f"Expected a dict-like as return value of operation {self.name}, found " + f"{type(results)}" + ) + + # Run will run the operation, loading the data from the catalog + def run(self, input_items: OpIOType, cache_info: CacheInfo) -> OpIOType: + stac_items = OpIOConverter.deserialize_input(input_items) + op_hash = cache_info.hash + items_out = self._fetch_from_cache(cache_info) + if items_out is not None: + self.logger.warning( + f"Cache hit for op {self.name} with cache hash {op_hash} before computation, " + "probably due to a repeated message." + ) + return items_out + + self.logger.info(f"Running op {self.name} for cache hash {op_hash}") + run_id = data.gen_guid() + retrieved_items = self.storage.retrieve(stac_items) + self.logger.info(f"Retrieved input for op {self.name}") + items = {k: self.converter.from_stac_item(v) for k, v in retrieved_items.items()} + self.logger.info(f"Running callback for op {self.name}") + stac_results = self._call_validate_op(**items) + self.logger.info(f"Callback finished for op {self.name}") + + try: + items_out = self.storage.store(run_id, stac_results, cache_info) + self.logger.info(f"Output stored for op {self.name}") + except (LocalResourceExistsError, CosmosResourceExistsError): + # If two instances of the same op with the same input start running at the same time + # We'll have a race condition where they'll both run, and try to store into the cache + # This will instead retrieve the output from the op that wrote their results first + items_out = self._fetch_from_cache(cache_info) + if items_out is not None: + self.logger.warning( + f"Cache hit after computing op {self.name} with cache hash {op_hash}, " + "probably due to a race condition." + ) + return items_out + raise # We couldn't write and we can't read, so we break + + return OpIOConverter.serialize_output(items_out) + + +class CallableBuilder: + def __init__(self): + self.logger = logging.getLogger(self.__class__.__name__) + + def _resolve_callable( + self, op_root_folder: str, filename: str, callback_builder_name: str + ) -> Any: + modname = os.path.splitext(filename)[0] + path = os.path.join(op_root_folder, filename) + self.logger.debug( + f"Loading module spec for {modname} from path {path} " + f"with callback {callback_builder_name}" + ) + spec = importlib.util.spec_from_file_location(modname, path) + assert spec is not None + self.logger.debug(f"Loading module {modname} from spec") + module = importlib.util.module_from_spec(spec) + assert isinstance(spec.loader, Loader) + self.logger.debug(f"Executing module {modname}") + spec.loader.exec_module(module) + self.logger.debug(f"Getting callback {callback_builder_name} from module {modname}") + callback_builder = getattr(module, callback_builder_name) + + return callback_builder + + def build( + self, + op_root_folder: str, + entrypoint: EntryPointDict, + parameters: Dict[str, Any], + ) -> Callable[[Any], Any]: + self.logger.debug(f"Building callable builder for {entrypoint}") + callable_builder = self._resolve_callable( + op_root_folder, + entrypoint["file"], + entrypoint["callback_builder"], + ) + self.logger.debug(f"Building callable from {callable_builder}") + callable = callable_builder(**parameters) + if inspect.isclass(callable_builder): + callable = callable() + self.logger.debug(f"Built callable {callable}") + return callable + + +class OperationDependencyResolver: + def __init__(self): + self._resolver_map = {"parameters": self._resolve_params} + + def resolve(self, op_spec: OperationSpec) -> OpResolvedDependencies: + output: OpResolvedDependencies = {} + for item, dependencies_list in op_spec.dependencies.items(): + try: + output[item] = self._resolver_map[item](op_spec, dependencies_list) + except Exception as e: + raise ValueError( + f"Dependency {item}: {dependencies_list} could not be resolved" + ) from e + return output + + def _resolve_params(self, op_spec: OperationSpec, params_to_resolve: List[str]): + return {param_name: op_spec.parameters[param_name] for param_name in params_to_resolve} + + +class OperationFactory: + converter: data.StacConverter + storage: Storage + secret_provider: SecretProvider + callable_builder: CallableBuilder + dependency_resolver: OperationDependencyResolver + + def __init__(self, storage: Storage, secret_provider: SecretProvider): + self.storage = storage + self.converter = data.StacConverter() + self.callable_builder = CallableBuilder() + self.secret_provider = secret_provider + + self.dependency_resolver = OperationDependencyResolver() + + def build(self, op_definition: Union[str, OperationSpec]) -> Operation: + if isinstance(op_definition, str): + return self._build_impl(OperationParser.parse(op_definition)) + return self._build_impl(op_definition) + + def resolve_secrets(self, parameters: Dict[str, Any]) -> Dict[str, Any]: + return {k: self.secret_provider.resolve(v) for k, v in parameters.items()} + + def _build_impl(self, op_config: OperationSpec) -> Operation: + parameters = self.resolve_secrets(op_config.parameters) + callable = self.callable_builder.build( + op_config.root_folder, op_config.entrypoint, parameters + ) + + return Operation( + op_config.name, + callable, + self.storage, + self.converter, + op_config.inputs_spec, + op_config.output_spec, + op_config.version, + ) + + +OperationFactoryConfig = builds( + OperationFactory, + storage=StorageConfig, + secret_provider=SecretProviderConfig, + zen_dataclass={"module": "vibe_agent.ops", "cls_name": "OperationFactoryConfig"}, +) diff --git a/src/vibe_agent/vibe_agent/ops_helper.py b/src/vibe_agent/vibe_agent/ops_helper.py new file mode 100644 index 00000000..753a9ec4 --- /dev/null +++ b/src/vibe_agent/vibe_agent/ops_helper.py @@ -0,0 +1,14 @@ +from vibe_core.data.core_types import OpIOType +from vibe_core.data.utils import deserialize_stac, serialize_stac + +from .storage import ItemDict + + +class OpIOConverter: + @staticmethod + def serialize_output(output: ItemDict) -> OpIOType: + return {k: serialize_stac(v) for k, v in output.items()} + + @staticmethod + def deserialize_input(input_items: OpIOType) -> ItemDict: + return {k: deserialize_stac(v) for k, v in input_items.items()} diff --git a/src/vibe_agent/vibe_agent/storage/__init__.py b/src/vibe_agent/vibe_agent/storage/__init__.py new file mode 100644 index 00000000..90e4562f --- /dev/null +++ b/src/vibe_agent/vibe_agent/storage/__init__.py @@ -0,0 +1,4 @@ +from .asset_management import BlobAssetManagerConfig, LocalFileAssetManagerConfig +from .local_storage import LocalStorage, LocalStorageConfig +from .remote_storage import CosmosStorage, CosmosStorageConfig +from .storage import ItemDict, Storage, StorageConfig diff --git a/src/vibe_agent/vibe_agent/storage/asset_management.py b/src/vibe_agent/vibe_agent/storage/asset_management.py new file mode 100644 index 00000000..c118f591 --- /dev/null +++ b/src/vibe_agent/vibe_agent/storage/asset_management.py @@ -0,0 +1,294 @@ +import logging +import os +import shutil +from abc import ABC, abstractmethod +from dataclasses import dataclass +from functools import lru_cache +from typing import Any, List, Optional + +from azure.core.credentials import TokenCredential +from azure.identity import DefaultAzureCredential +from azure.storage.blob import BlobClient, BlobProperties, BlobServiceClient, ContainerClient +from hydra_zen import MISSING, builds + +from vibe_common.constants import DEFAULT_BLOB_ASSET_MANAGER_CONTAINER +from vibe_common.tokens import BlobTokenManagerConnectionString, BlobTokenManagerCredentialed +from vibe_core.file_downloader import download_file +from vibe_core.uri import is_local, local_uri_to_path, uri_to_filename + +from .file_upload import local_upload, remote_upload + +CACHE_SIZE = 100 + + +class AssetManager(ABC): + @abstractmethod + def store(self, asset_guid: str, file_path: str) -> str: + raise NotImplementedError + + @abstractmethod + def retrieve(self, asset_guid: str) -> str: + raise NotImplementedError + + @abstractmethod + def exists(self, asset_guid: str) -> bool: + raise NotImplementedError + + @abstractmethod + def remove(self, asset_guid: str) -> None: + raise NotImplementedError + + +class LocalFileAssetManager(AssetManager): + def __init__(self, local_storage_path: str): + self.logger = logging.getLogger(self.__class__.__name__) + self.root_path = local_storage_path + + def store(self, asset_guid: str, src_file_ref: str) -> str: + if self.exists(asset_guid): + self.logger.info(f"Attempted to write repeated entry {asset_guid}.") + return self.retrieve(asset_guid) + + dst_asset_dir = self._gen_path(asset_guid) + try: + os.makedirs(dst_asset_dir) + filename = uri_to_filename(src_file_ref) + dst_filename = os.path.join(dst_asset_dir, filename) + if is_local(src_file_ref): + shutil.copyfile(local_uri_to_path(src_file_ref), dst_filename) + else: + download_file(src_file_ref, dst_filename) + except Exception: + self.logger.exception(f"Exception when storing asset {src_file_ref}/{asset_guid}.") + # Clean up asset directory + try: + shutil.rmtree(dst_asset_dir) + except Exception: + self.logger.exception( + "Exception when cleaning up directory after failing to " + f"store asset with ID {asset_guid}" + ) + raise + raise + return dst_filename + + def retrieve(self, asset_guid: str) -> str: + asset_path = self._gen_path(asset_guid) + if not os.path.exists(asset_path): + msg = f"File with ID {asset_guid} does not exist." + self.logger.error(msg) + raise ValueError(msg) + files_in_asset_folder = os.listdir(asset_path) + + if len(files_in_asset_folder) != 1: + msg = f"Inconsistent content found for asset ID {asset_guid}" + self.logger.error(msg) + raise ValueError(msg) + + file_name = files_in_asset_folder[0] + return os.path.join(asset_path, file_name) + + def exists(self, asset_guid: str) -> bool: + return os.path.exists(self._gen_path(asset_guid)) + + def _gen_path(self, guid: str) -> str: + return os.path.join(self.root_path, guid) + + def remove(self, asset_guid: str) -> None: + if not self.exists(asset_guid): + self.logger.info(f"Asked to remove inexistent file {asset_guid}.") + return + + asset_folder = self._gen_path(asset_guid) + + try: + shutil.rmtree(asset_folder) + except Exception: + msg = f"Could not remove asset with ID {asset_guid}" + self.logger.exception(msg) + raise ValueError(msg) + + +# ATTENTION: if the blob container associated with the assets is modified (through a write or +# delete) operation, then we should invalidate the cache of this function by calling its +# cache_clear() method. +@lru_cache(maxsize=CACHE_SIZE) +def cached_blob_list_by_prefix(client: ContainerClient, guid: str) -> List[BlobProperties]: + return list(client.list_blobs(name_starts_with=guid)) + + +class BlobServiceProvider(ABC): + @abstractmethod + def get_client(self) -> BlobServiceClient: + raise NotImplementedError + + +class BlobServiceProviderWithCredentials(BlobServiceProvider): + def __init__( + self, + storage_account_url: str, + credential: Optional[TokenCredential] = None, + ): + self.credential = DefaultAzureCredential() if credential is None else credential + self.client = BlobServiceClient(storage_account_url, self.credential) + + def get_client(self) -> BlobServiceClient: + return self.client + + +class BlobServiceProviderWithConnectionString(BlobServiceProvider): + def __init__(self, connection_string: str): + self.client = BlobServiceClient.from_connection_string(connection_string) + + def get_client(self) -> BlobServiceClient: + return self.client + + +class BlobAssetManager(AssetManager): + blob_delimiter = "/" + + def __init__( + self, + storage_account_url: str = "", + storage_account_connection_string: str = "", + asset_container_name: str = DEFAULT_BLOB_ASSET_MANAGER_CONTAINER, + credential: Optional[TokenCredential] = None, + max_upload_concurrency: int = 6, + ): + self.logger = logging.getLogger(self.__class__.__name__) + # Create a blob client, authenticated. + self.credential = DefaultAzureCredential() if credential is None else credential + if storage_account_url: + self.client = BlobServiceProviderWithCredentials( + storage_account_url=storage_account_url, credential=self.credential + ).get_client() + self.blob_token_manager = BlobTokenManagerCredentialed(credential=self.credential) + elif storage_account_connection_string: + self.client = BlobServiceProviderWithConnectionString( + connection_string=storage_account_connection_string + ).get_client() + self.blob_token_manager = BlobTokenManagerConnectionString( + connection_string=storage_account_connection_string + ) + else: + msg = ( + "Could not get a blob manager since neither storage account " + "url nor connection string were provided" + ) + self.logger.exception(msg) + raise ValueError(msg) + + self.container_name = asset_container_name + self.container = self._retrieve_container() + self.max_upload_concurrency = max_upload_concurrency + + def _retrieve_container(self): + container = self.client.get_container_client(self.container_name) + if not container.exists(): + container.create_container() + + return container + + @staticmethod + def _join(*args: str): + return BlobAssetManager.blob_delimiter.join(args) + + def _list(self, guid: str) -> List[BlobProperties]: + listed_blob = cached_blob_list_by_prefix(self.container, guid) + if len(listed_blob) > 1: + ValueError(f"Encountered more than one asset with id {guid}") + + return listed_blob + + def _local_upload(self, file_path: str, blob_client: BlobClient): + # At this point, we expect a valid local path was passed to the file_path + # which can be something like "file:///path/to/file" or "/path/to/file". + local_upload(file_path, blob_client, max_concurrency=self.max_upload_concurrency) + + def store(self, asset_guid: str, file_ref: str) -> str: + if self.exists(asset_guid): + self.logger.debug(f"Attempted to write repeated entry {asset_guid}.") + blob_property = self._list(asset_guid)[0] + blob_client = self.container.get_blob_client(blob_property.name) + return blob_client.url + + filename = uri_to_filename(file_ref) + blob_name = self._join(asset_guid, filename) + blob_client = self.container.get_blob_client(blob_name) + + if is_local(file_ref): + upload = self._local_upload + else: + upload = remote_upload + + try: + upload(file_ref, blob_client) + except Exception: + self.logger.exception(f"Exception when storing asset {file_ref}/ ID {asset_guid}.") + raise + + # Clear cache as we know we have modified the blob content + cached_blob_list_by_prefix.cache_clear() + + return blob_client.url + + def retrieve(self, asset_guid: str) -> str: + # Obtains a SAS token for file and creates a URL for it. + if not self.exists(asset_guid): + msg = f"File with ID {asset_guid} does not exist." + self.logger.error(msg) + raise ValueError(msg) + + blob_property = self._list(asset_guid)[0] + blob_client = self.container.get_blob_client(blob_property.name) + + return self.blob_token_manager.sign_url(blob_client.url) + + def exists(self, asset_guid: str) -> bool: + listed_blob = self._list(asset_guid) + return len(listed_blob) == 1 + + def remove(self, asset_guid: str) -> None: + if not self.exists(asset_guid): + self.logger.debug(f"Asked to remove inexistent file {asset_guid}.") + return + + blob_property = self._list(asset_guid)[0] + try: + self.container.delete_blob(blob_property.name) + except Exception: + msg = f"Could not remove asset with ID {asset_guid}" + self.logger.exception(msg) + raise ValueError(msg) + + cached_blob_list_by_prefix.cache_clear() + + +AssetManagerConfig = builds( + AssetManager, + zen_dataclass={ + "module": "vibe_agent.storage.asset_management", + "cls_name": "AssetManagerConfig", + }, +) + + +@dataclass +class BlobAssetManagerConfig(AssetManagerConfig): + _target_: str = "vibe_agent.storage.asset_management.BlobAssetManager" + storage_account_url: Any = MISSING + storage_account_connection_string: Any = MISSING + asset_container_name: Any = MISSING + credential: Any = MISSING + max_upload_concurrency: Any = 6 + + +LocalFileAssetManagerConfig = builds( + LocalFileAssetManager, + populate_full_signature=True, + builds_bases=(AssetManagerConfig,), + zen_dataclass={ + "module": "vibe_agent.storage.asset_management", + "cls_name": "LocalFileAssetManagerConfig", + }, +) diff --git a/src/vibe_agent/vibe_agent/storage/file_upload.py b/src/vibe_agent/vibe_agent/storage/file_upload.py new file mode 100644 index 00000000..7d07fc66 --- /dev/null +++ b/src/vibe_agent/vibe_agent/storage/file_upload.py @@ -0,0 +1,24 @@ +from typing import Any + +from azure.storage.blob import BlobClient + +from vibe_core.uri import is_local, local_uri_to_path + + +def upload_to_blob(file_path: str, blob_client: BlobClient, *args: Any, **kwargs: Any): + if is_local(file_path): + local_upload(file_path, blob_client, *args, **kwargs) + else: + remote_upload(file_path, blob_client, *args, **kwargs) + + +def local_upload(file_path: str, blob_client: BlobClient, *args: Any, **kwargs: Any): + # At this point, we expect a valid local path was passed to the file_path + # which can be something like "file:///path/to/file" or "/path/to/file". + file_path = local_uri_to_path(file_path) + with open(file_path, "rb") as data: + blob_client.upload_blob(data=data, *args, **kwargs) + + +def remote_upload(file_path: str, blob_client: BlobClient, *args: Any, **kwargs: Any): + blob_client.upload_blob_from_url(file_path, *args, **kwargs) diff --git a/src/vibe_agent/vibe_agent/storage/local_storage.py b/src/vibe_agent/vibe_agent/storage/local_storage.py new file mode 100644 index 00000000..9178687f --- /dev/null +++ b/src/vibe_agent/vibe_agent/storage/local_storage.py @@ -0,0 +1,193 @@ +import asyncio +import logging +import os +import shutil +from concurrent.futures import Executor +from typing import Any, Dict, List, Optional, Union, cast + +from hydra_zen import MISSING, builds +from pystac.catalog import Catalog, CatalogType +from pystac.collection import Collection, Extent +from pystac.item import Item +from pystac.stac_io import DefaultStacIO + +from vibe_common.schemas import CacheInfo, OpRunId +from vibe_core.utils import ensure_list + +from .asset_management import LocalFileAssetManagerConfig +from .storage import AssetManager, ItemDict, Storage, StorageConfig + + +class LocalStacIO(DefaultStacIO): + def stac_object_from_dict( + self, + d: Dict[str, Any], + href: Optional[str] = None, + root: Optional[Catalog] = None, + preserve_dict: bool = False, + ) -> Any: + return super().stac_object_from_dict(d, href, root, False) + + +class LocalResourceExistsError(RuntimeError): + pass + + +class LocalStorage(Storage): + """ + This class implements the Storage abstract class. + """ + + IS_SINGULAR_FIELD = "terravibe_is_singular" + COLLECTION_TYPE = CatalogType.SELF_CONTAINED + CATALOG_TYPE = CatalogType.RELATIVE_PUBLISHED + + def __init__(self, local_path: str, asset_manager: AssetManager): + """ + Initializer expects a directory path where catalogs can be stored + """ + super().__init__(asset_manager) + self.path = local_path + self.logger = logging.getLogger(self.__class__.__name__) + self.stac_io = LocalStacIO() + + def _retrieve_items(self, catalog: Catalog) -> ItemDict: + output: ItemDict = {} + for c in catalog.get_collections(): + output[c.id] = list(c.get_items()) + if c.extra_fields[self.IS_SINGULAR_FIELD]: # type: ignore + output[c.id] = cast(List[Item], output[c.id])[0] + return output + + def _create_output_collection( + self, output_name: str, items: Union[Item, List[Item]] + ) -> Collection: + extra_info: Dict[str, bool] = {self.IS_SINGULAR_FIELD: not isinstance(items, list)} + output_items = ensure_list(items) + extent = Extent.from_items(output_items) + description = f"Stores op output {output_name} for a unique op run." + output_collection = Collection( + id=output_name, + description=description, + extent=extent, + catalog_type=self.COLLECTION_TYPE, + extra_fields=extra_info, + ) + output_collection.add_items(output_items) + + return output_collection + + def retrieve_output_from_input_if_exists( + self, + cache_info: CacheInfo, + ) -> Optional[ItemDict]: + """ + Method to help users to skip computation if the result of the previous outputs from input + and operator combo has been memo-ized as a catalog in the TerraVibes storage system + """ + catalog_path = self.get_catalog_path(cache_info.hash, cache_info.name) + if os.path.exists(catalog_path): + catalog = Catalog.from_file( + os.path.join(catalog_path, Catalog.DEFAULT_FILE_NAME), stac_io=self.stac_io + ) + return self._retrieve_items(catalog) + + return None + + async def retrieve_output_from_input_if_exists_async( + self, cache_info: CacheInfo, **kwargs: Any + ): + executor: Executor = cast(Executor, kwargs["executor"]) + return await asyncio.get_running_loop().run_in_executor( + executor, self.retrieve_output_from_input_if_exists, cache_info + ) + + def create_run_collection( + self, + run_id: str, + catalog_path: str, + items: ItemDict, + extra_info: Dict[str, Any], + ) -> Catalog: + description = f"Collection of outputs of run id {run_id}." + run_catalog = Catalog( + id=run_id, + description=description, + href=catalog_path, + catalog_type=self.CATALOG_TYPE, + extra_fields=extra_info, + ) + for output_name, output_items in items.items(): + output_collection = self._create_output_collection(output_name, output_items) + run_catalog.add_child(output_collection) + + return run_catalog + + def get_catalog_path(self, op_hash: str, op_name: str) -> str: + """ + Each catalog has a directory and json file where the corresponding assets and files are + stored/indexed + """ + return os.path.join(self.path, op_name, op_hash) + + def _catalog_cleanup(self, catalog: Catalog): + catalog_path = catalog.get_self_href() + assert catalog_path is not None, f"Catalog {catalog.id} does not have an href." + catalog.normalize_hrefs(catalog_path) + catalog.make_all_asset_hrefs_relative() + + def store(self, run_id: str, items_to_store: ItemDict, cache_info: CacheInfo) -> ItemDict: + """ + Method to store a given list of items to current TerraVibes storage STAC catalog + This method must be atomic -- that is all of it happens or none of it happens + This method must be consistent -- that is the assets/items referenced by catalogs must be in + storage & vice-versa + This method must be isolated -- applications should be able to call multiple store + operations simultaneously and safely + This method must be durable -- all changes must be available across crashes unless there + is a catastrophic failure + This method must be performant -- it should support 1000s/100s/10s of + assets/catalogs/workflows being updated simultaneously + """ + catalog_path = self.get_catalog_path(cache_info.hash, cache_info.name) + items_to_store = self.asset_handler.copy_assets(items_to_store) + catalog = self.create_run_collection( + run_id, catalog_path, items_to_store, cache_info.as_storage_dict() + ) + self._catalog_cleanup(catalog) + if not os.path.exists(catalog_path): + catalog.save(stac_io=self.stac_io) + else: + raise LocalResourceExistsError( + f"Op output already exists in storage for {cache_info.name} with id {run_id}." + ) + + return items_to_store + + def remove(self, op_run_id: OpRunId): + catalog_path = self.get_catalog_path(op_run_id.hash, op_run_id.name) + + if not os.path.exists(catalog_path): + self.logger.info( + f"Asked to remove nonexistent catalog with op name {op_run_id.name} and hash " + f"{op_run_id.hash}." + ) + return + + try: + shutil.rmtree(catalog_path) + except OSError: + self.logger.exception(f"Error removing catalog for op run {op_run_id}.") + raise + + +LocalStorageConfig = builds( + LocalStorage, + local_path=MISSING, + asset_manager=LocalFileAssetManagerConfig(MISSING), + builds_bases=(StorageConfig,), + zen_dataclass={ + "module": "vibe_agent.storage.local_storage", + "cls_name": "LocalStorageConfig", + }, +) diff --git a/src/vibe_agent/vibe_agent/storage/remote_storage.py b/src/vibe_agent/vibe_agent/storage/remote_storage.py new file mode 100644 index 00000000..76aaf973 --- /dev/null +++ b/src/vibe_agent/vibe_agent/storage/remote_storage.py @@ -0,0 +1,298 @@ +import logging +from dataclasses import asdict, dataclass, fields +from functools import lru_cache +from hashlib import sha256 +from math import ceil +from typing import Any, Dict, List, Optional, cast + +from azure.cosmos import ContainerProxy, CosmosClient, PartitionKey +from azure.cosmos.aio import ( + ContainerProxy as AsyncContainerProxy, +) +from azure.cosmos.aio import ( + CosmosClient as AsyncCosmosClient, +) +from azure.cosmos.exceptions import CosmosHttpResponseError, CosmosResourceNotFoundError +from azure.storage.blob import BlobLeaseClient +from hydra_zen import MISSING +from pystac.item import Item + +from vibe_common.constants import ( + DEFAULT_COSMOS_DATABASE_NAME, + DEFAULT_COSMOS_URI, + DEFAULT_STAC_COSMOS_CONTAINER, +) +from vibe_common.schemas import CacheInfo, OpRunId +from vibe_core.utils import ensure_list + +from .asset_management import AssetManager, BlobAssetManagerConfig +from .storage import ItemDict, Storage, StorageConfig + +LeaseDict = Dict[str, BlobLeaseClient] + + +@dataclass +class CosmosData: + id: str + op_name: str + + +@dataclass +class ItemList(CosmosData): + output_name: str + items: List[Dict[str, Any]] + type: str = "item_list" + + +@dataclass +class RunInfo(CosmosData): + run_id: str + cache_info: Dict[str, Any] + items: List[str] + singular_items: List[str] + type: str = "run_info" + + +class CosmosStorage(Storage): + PARTITION_KEY = "/op_name" + LIST_MIN_SIZE: int = 1 + # https://docs.microsoft.com/en-us/rest/api/cosmos-db/http-status-codes-for-cosmosdb + entity_too_large_status_code: int = 413 + + def __init__( + self, + key: str, + asset_manager: AssetManager, + stac_container_name: str = DEFAULT_STAC_COSMOS_CONTAINER, + cosmos_database_name: str = DEFAULT_COSMOS_DATABASE_NAME, + cosmos_url: str = DEFAULT_COSMOS_URI, + list_max_size: int = 1024, + ): + super().__init__(asset_manager) + self.key = key + self.cosmos_url = cosmos_url + self.cosmos_database_name = cosmos_database_name + self.stac_container_name = stac_container_name + self.container_proxy_async = None + self.list_max_size = list_max_size + self.logger = logging.getLogger(self.__class__.__name__) + + @property + @lru_cache + def container_proxy(self): + cosmos_client = CosmosClient(self.cosmos_url, self.key) + db = cosmos_client.create_database_if_not_exists(id=self.cosmos_database_name) + return db.create_container_if_not_exists( + self.stac_container_name, partition_key=PartitionKey(self.PARTITION_KEY) + ) + + def _convert_items(self, items: ItemDict): + converted_items: Dict[str, List[Dict[str, Any]]] = {} + singular_items: List[str] = [] + for key, item in items.items(): + if isinstance(item, Item): + singular_items.append(key) + item = ensure_list(item) + converted_item = [i.to_dict() for i in item] + converted_items[key] = converted_item + return converted_items, singular_items + + def _build_item_list_id(self, ids: List[str], output_name: str, run_hash: str): + ids.append(run_hash) + ids.append(output_name) + return sha256("".join(ids).encode()).hexdigest() + + def _build_items_to_store( + self, + op_name: str, + run_hash: str, + item_dict: Dict[str, List[Dict[str, Any]]], + list_size: int, + ): + output: List[ItemList] = [] + id_list: List[str] = [] + for output_name, items in item_dict.items(): + items = ensure_list(items) + num_items = len(items) + num_partitions = ceil(num_items / list_size) + for i in range(num_partitions): + offset = i * list_size + last_item = min(offset + list_size, num_items) + partitioned_items = items[offset:last_item] + items_ids = [i["id"] for i in partitioned_items] + partition_id = self._build_item_list_id(items_ids, output_name, run_hash) + id_list.append(partition_id) + output.append(ItemList(partition_id, op_name, output_name, partitioned_items)) + return output, id_list + + def _store_data( + self, op_name: str, run_to_store: Dict[str, Any], items_to_store: List[Dict[str, Any]] + ): + container = self._get_container() + stored_items: List[str] = [] + try: + for i in items_to_store: + container.create_item(body=i) + stored_items.append(i["id"]) + container.create_item(body=run_to_store) + except Exception: + # rolling back + for i in stored_items: + container.delete_item(i, op_name) + raise + + def store(self, run_id: str, items: ItemDict, cache_info: CacheInfo) -> ItemDict: + items = self.asset_handler.copy_assets(items) + dict_items, singular_items = self._convert_items(items) + extra_fields = cache_info.as_storage_dict() + run_hash = extra_fields[self.HASH_FIELD] + list_size = self.list_max_size + e = RuntimeError("No tries to store have been made") + items_lists: List[ItemList] = [] + while list_size > self.LIST_MIN_SIZE: + try: + items_lists, items_id_list = self._build_items_to_store( + cache_info.name, run_hash, dict_items, list_size + ) + run_to_store = asdict( + RunInfo( + run_hash, + cache_info.name, + run_id, + extra_fields, + items_id_list, + singular_items, + ) + ) + items_to_store = [asdict(items_list) for items_list in items_lists] + self._store_data(cache_info.name, run_to_store, items_to_store) + return items + except CosmosHttpResponseError as er: + try: + status_code = int(er.status_code) # type: ignore + except TypeError: + raise er # Couldn't get the status code, so just break + if status_code != self.entity_too_large_status_code: + # We are only handling EntityTooLarge + raise + e = er + list_size = ceil(max(len(i.items) for i in items_lists) / 2) + raise RuntimeError( + f"Could not store items. Tried from {self.list_max_size} " + f"to {self.LIST_MIN_SIZE} sized lists" + ) from e + + def _get_container(self) -> ContainerProxy: + return self.container_proxy + + def _get_container_async(self) -> AsyncContainerProxy: + if self.container_proxy_async is None: + cosmos_client_async = AsyncCosmosClient(self.cosmos_url, self.key) + db = cosmos_client_async.get_database_client(self.cosmos_database_name) + self.container_proxy_async = db.get_container_client(self.stac_container_name) + return self.container_proxy_async + + def _get_run_info( + self, op_name: str, op_run_hash: str, container: ContainerProxy + ) -> Optional[RunInfo]: + try: + retrieved_item = cast(Dict[str, Any], container.read_item(op_run_hash, op_name)) + except CosmosResourceNotFoundError: + return None + run_info_fields = [f.name for f in fields(RunInfo)] + run_info_dict = {k: v for k, v in retrieved_item.items() if k in run_info_fields} + return RunInfo(**run_info_dict) + + async def _get_run_info_async( + self, op_name: str, op_run_hash: str, container: AsyncContainerProxy + ) -> Optional[RunInfo]: + try: + retrieved_item = await container.read_item(op_run_hash, op_name) + except CosmosResourceNotFoundError: + return None + run_info_fields = [f.name for f in fields(RunInfo)] + run_info_dict = {k: v for k, v in retrieved_item.items() if k in run_info_fields} + return RunInfo(**run_info_dict) + + def process_items(self, run_info: RunInfo, retrieved_items: List[Dict[str, Any]]): + item_list_fields = [f.name for f in fields(ItemList)] + items_dict: Dict[str, List[Dict[str, Any]]] = {} + for i in retrieved_items: + items_info_dict = {k: v for k, v in i.items() if k in item_list_fields} + items_list = ItemList(**items_info_dict) + output_name = items_list.output_name + dict_list = items_dict.get(output_name, []) + dict_list += items_list.items + items_dict[output_name] = dict_list + + singular_input = run_info.singular_items + retrieved_stac: ItemDict = {} + + for output_name, output_values in items_dict.items(): + converted_items = [Item.from_dict(ov, preserve_dict=False) for ov in output_values] + if output_name in singular_input: + retrieved_stac[output_name] = converted_items[0] + else: + retrieved_stac[output_name] = converted_items + return retrieved_stac + + def _retrieve_items(self, run_info: RunInfo, container: ContainerProxy): + retrieved_items = [container.read_item(i, run_info.op_name) for i in run_info.items] + return self.process_items(run_info, retrieved_items) + + async def _retrieve_items_async(self, run_info: RunInfo, container: AsyncContainerProxy): + retrieved_items = [await container.read_item(i, run_info.op_name) for i in run_info.items] + return self.process_items(run_info, retrieved_items) + + def retrieve_output_from_input_if_exists(self, cache_info: CacheInfo) -> Optional[ItemDict]: + container = self._get_container() + run_info = self._get_run_info(cache_info.name, cache_info.hash, container) + if run_info is None: + return None + + return self._retrieve_items(run_info, container) + + async def retrieve_output_from_input_if_exists_async( + self, cache_info: CacheInfo, **kwargs: Any + ) -> Optional[ItemDict]: + container = self._get_container_async() + + run_info = await self._get_run_info_async(cache_info.name, cache_info.hash, container) + if run_info is None: + return None + + return await self._retrieve_items_async(run_info, container) + + def remove(self, op_run_id: OpRunId): + container = self._get_container() + run_info = self._get_run_info(op_run_id.name, op_run_id.hash, container) + if run_info is None: + return None + + for i in run_info.items: + try: + container.delete_item(i, run_info.op_name) + except CosmosResourceNotFoundError as er: + self.logger.warning( + f"The item {i} that is a part of {op_run_id} does not exist in the " + f"Cosmos DB container: {er}" + ) + + try: + container.delete_item(op_run_id.hash, op_run_id.name) + except CosmosResourceNotFoundError as er: + self.logger.warning( + f"The item {op_run_id} does not exist in the Cosmos DB container: {er}" + ) + + +# Having to manually create Cosmos configuration so we can retrieve its +# key using a secret provider. +@dataclass +class CosmosStorageConfig(StorageConfig): + _target_: str = "vibe_agent.storage.remote_storage.CosmosStorage" + key: Any = MISSING + asset_manager: BlobAssetManagerConfig = MISSING + stac_container_name: Any = MISSING + cosmos_database_name: Any = MISSING + cosmos_url: Any = MISSING diff --git a/src/vibe_agent/vibe_agent/storage/storage.py b/src/vibe_agent/vibe_agent/storage/storage.py new file mode 100644 index 00000000..10022ebc --- /dev/null +++ b/src/vibe_agent/vibe_agent/storage/storage.py @@ -0,0 +1,112 @@ +""" +Storage module for TerraVibes. Helps store, index, retrieve, and catalog geospatial knowledge that +an instance of TerraVibes contains. +""" + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional + +from hydra_zen import builds +from pystac.asset import Asset + +from vibe_common.schemas import CacheInfo, ItemDict, OpRunId +from vibe_core.utils import ensure_list + +from .asset_management import AssetManager, AssetManagerConfig + + +class AssetCopyHandler: + def __init__(self, asset_manager: AssetManager): + self.asset_manager = asset_manager + + def _copy_asset(self, guid: str, asset: Asset): + file_path = asset.get_absolute_href() + assert file_path is not None + asset.href = self.asset_manager.store(guid, file_path) + + def _copy_prepared_assets(self, assets_to_copy: Dict[str, Asset]): + copied_assets: List[str] = [] + try: + for guid, asset in assets_to_copy.items(): + self._copy_asset(guid, asset) + copied_assets.append(guid) + except Exception: + for f in copied_assets: + self.asset_manager.remove(f) + raise + + def _prepare_assets(self, items: ItemDict): + assets: Dict[str, Asset] = {} + for item in items.values(): + item = ensure_list(item) + for i in item: + assets.update(i.assets) + return assets + + def copy_assets(self, items: ItemDict): + assets = self._prepare_assets(items) + self._copy_prepared_assets(assets) + + return items + + +class Storage(ABC): + """ + The TerraVibes storage class contains abstract methods that have to be implemented. The abstract + methods are "store", "retrieve", and "retrieve_output_from_input_if_exists". Store and retrieve + are self explanatory. The latter one helps retrieve data by querying with the inputs that + generated the output that the user is looking for. These methods are mandatory when + implementing a storage class in TerraVibes. + """ + + asset_manager: AssetManager + asset_copy_handler: AssetCopyHandler + HASH_FIELD: str = "vibe_op_hash" + + def __init__(self, asset_manager: AssetManager): + self.asset_manager = asset_manager + self.asset_handler = AssetCopyHandler(asset_manager) + + @abstractmethod + def store(self, run_id: str, items: ItemDict, cache_info: CacheInfo) -> ItemDict: + raise NotImplementedError + + def retrieve(self, input_items: ItemDict) -> ItemDict: + """ + Method to retrieve a list of items from the current TerraVibes storage STAC catalog + """ + for possible_item_list in input_items.values(): + items = ensure_list(possible_item_list) + for item in items: + for guid, asset in item.assets.items(): + asset.href = self.asset_manager.retrieve(guid) + + return input_items + + @abstractmethod + def retrieve_output_from_input_if_exists(self, cache_info: CacheInfo) -> Optional[ItemDict]: + raise NotImplementedError + + @abstractmethod + async def retrieve_output_from_input_if_exists_async( + self, cache_info: CacheInfo, **kwargs: Any + ) -> Optional[ItemDict]: + raise NotImplementedError + + @abstractmethod + def remove(self, op_run_id: OpRunId): + """ + Method to delete a STAC catalog from storage. Note: this does not remove the assets + referenced by a STAC catalog. + """ + raise NotImplementedError + + +StorageConfig = builds( + Storage, + asset_manager=AssetManagerConfig, + zen_dataclass={ + "module": "vibe_agent.storage.storage", + "cls_name": "StorageConfig", + }, +) diff --git a/src/vibe_agent/vibe_agent/worker.py b/src/vibe_agent/vibe_agent/worker.py new file mode 100644 index 00000000..7b45fc4c --- /dev/null +++ b/src/vibe_agent/vibe_agent/worker.py @@ -0,0 +1,524 @@ +import asyncio +import concurrent.futures +import json +import logging +import os +import resource +import signal +import sys +import threading +import time +import traceback +from multiprocessing.context import ForkServerContext +from typing import Any, Dict, List, Optional, Tuple, Union, cast +from uuid import UUID + +import pebble.concurrent +from cloudevents.sdk.event import v1 +from dapr.conf import settings +from dapr.ext.grpc import App, TopicEventResponse +from hydra_zen import MISSING, builds, instantiate +from opentelemetry import trace +from pebble import ProcessFuture +from pebble.common import ProcessExpired + +from vibe_common.constants import CONTROL_STATUS_PUBSUB, STATUS_PUBSUB_TOPIC +from vibe_common.dapr import dapr_ready +from vibe_common.messaging import ( + CacheInfoExecuteRequestContent, + CacheInfoExecuteRequestMessage, + WorkMessage, + WorkMessageBuilder, + accept_or_fail_event, + extract_message_header_from_event, + send_async, +) +from vibe_common.schemas import CacheInfo +from vibe_common.statestore import StateStore +from vibe_common.telemetry import ( + add_span_attributes, + add_trace, + setup_telemetry, + update_telemetry_context, +) +from vibe_core.data.core_types import OpIOType +from vibe_core.datamodel import RunConfig, RunStatus +from vibe_core.logconfig import LOG_BACKUP_COUNT, MAX_LOG_FILE_BYTES, configure_logging +from vibe_core.utils import get_input_ids + +from .ops import OperationFactoryConfig, OperationSpec + +MESSAGING_RETRY_INTERVAL_S = 1 +TERMINATION_GRACE_PERIOD_S = 5 +MAX_OP_EXECUTION_TIME_S = 60 * 60 * 3 + + +class ShuttingDownException(Exception): + pass + + +class OpSignalHandler: + def __init__(self, logger: logging.Logger): + self.logger = logger + self.resource_description = { + "ru_utime": "User time", + "ru_stime": "System time", + "ru_maxrss": "Max. Resident Set Size", + "ru_ixrss": "Shared Memory Size", + "ru_idrss": "Unshared Memory Size", + "ru_isrss": "Stack Size", + "ru_inblock": "Block inputs", + "ru_oublock": "Block outputs", + } + + def parse_resources_usage(self, rusages: List[resource.struct_rusage]): + return { + resource: { + "description": description, + "value": sum([getattr(rusage, resource) for rusage in rusages]), + } + for resource, description in self.resource_description.items() + } + + def build_log_message(self, signum: int, child_pid: Optional[Tuple[int, int]]) -> str: + resource_usages = [resource.getrusage(resource.RUSAGE_SELF)] + + if signum == signal.SIGTERM: + msgs_list = ["Terminating op gracefully with SIGTERM."] + else: + msgs_list = [ + f"Received signal when executing op (signal {signal.Signals(signum).name}).", + ] + + if child_pid: + pid, exit_code = child_pid + msgs_list.append(f" Child pid = {pid} exit code = {exit_code >> 8},") + resource_usages.append(resource.getrusage(resource.RUSAGE_CHILDREN)) + + msgs_list.append(f"Op resources = {self.parse_resources_usage(resource_usages)}") + + return " ".join(msgs_list) + + def get_log_function(self, child_pid: Optional[Tuple[int, int]]): + if child_pid: + _, exit_code = child_pid + if not os.WIFEXITED(exit_code): + return self.logger.error + + return self.logger.info + + def log(self, signum: int, _: Any): + child_pid = None + try: + child_pid = os.waitpid(-1, os.WNOHANG) + except ChildProcessError: + # That's OK. There is no child process + pass + + message = self.build_log_message(signum, child_pid) + log_function = self.get_log_function(child_pid) + log_function(message) + + +@pebble.concurrent.process(daemon=False, context=ForkServerContext()) +# This must not be a daemonic process. Otherwise, we won't be able to run ops +# that start children. +def run_op( + factory_spec: OperationFactoryConfig, # type: ignore + spec: OperationSpec, + input: OpIOType, + cache_info: CacheInfo, +) -> Union[OpIOType, traceback.TracebackException]: + logger = logging.getLogger(f"{__name__}.run_op") + logger.info(f"Building op {spec.name} to process input {get_input_ids(input)}") + + op_signal_handler = OpSignalHandler(logger) + + for sign in (signal.SIGINT, signal.SIGTERM, signal.SIGCHLD): + signal.signal(sign, op_signal_handler.log) + + try: + factory = instantiate(factory_spec) + return factory.build(spec).run(input, cache_info) + except Exception as e: + return traceback.TracebackException.from_exception(e) + + +class WorkerMessenger: + pubsubname: str + status_topic: str + logger: logging.Logger + + def __init__( + self, pubsubname: str = CONTROL_STATUS_PUBSUB, status_topic: str = STATUS_PUBSUB_TOPIC + ): + self.pubsubname = pubsubname + self.status_topic = status_topic + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + + async def send(self, message: WorkMessage) -> None: + tries: int = 0 + sent = False + while True: + try: + sent = await send_async(message, "worker", self.pubsubname, self.status_topic) + except Exception: + pass + if sent: + break + tries += 1 + # We did some work, now we have to report what happened to the op + # If we are shutting down, we have TERMINATION_GRACE_PERIOD_S to try before exiting. + # Otherwise, it seems to make sense to keep retrying until we succeed. + self.logger.warn( + f"Failed to send {message} after {tries} attempts. " + f"Sleeping for {MESSAGING_RETRY_INTERVAL_S}s before retrying." + ) + await asyncio.sleep(MESSAGING_RETRY_INTERVAL_S) + + async def send_ack_reply(self, origin: WorkMessage) -> None: + await self.send(WorkMessageBuilder.build_ack_reply(origin.id)) + self.logger.debug(msg=f"Sent ACK for {origin.id}") + + @add_trace + async def send_success_reply( + self, + origin: WorkMessage, + out: OpIOType, + cache_info: Optional[CacheInfo] = None, + ) -> None: + if cache_info is None and not isinstance(origin, CacheInfoExecuteRequestMessage): + raise ValueError( + "cache_info must be provided if origin is not a CacheInfoExecuteRequestMessage" + ) + if not cache_info: + content = cast(CacheInfoExecuteRequestContent, origin.content) + cache_info = CacheInfo( + name=content.cache_info.name, + version=content.cache_info.version, + ids=content.cache_info.ids, + parameters=content.cache_info.parameters, + ) + await self.send(WorkMessageBuilder.build_execute_reply(origin.id, cache_info, out)) + self.logger.debug(msg=f"Sent success response for {origin.id}") + + async def send_failure_reply(self, traceparent: str, e: Exception, tb: List[str]) -> None: + assert type(e) is not None, "`send_failure_reply` called without an exception to handle" + reply = WorkMessageBuilder.build_error( + traceparent, + str(type(e)), + str(e), + tb, + ) + await self.send(reply) + self.logger.debug(f"Sent failure response for {traceparent}") + + +class Worker: + app: App + max_tries: int + pubsubname: str + status_topic: str + control_topic: str + current_message: Optional[WorkMessage] = None + shutting_down: bool = False + child_monitoring_period_s: int = 10 + termination_grace_period_s: int = 2 + state_store: StateStore + current_child: Optional[ProcessFuture] = None + factory_spec: OperationFactoryConfig # type: ignore + otel_service_name: str + + def __init__( + self, + termination_grace_period_s: int, + control_topic: str, + max_tries: int, + factory_spec: OperationFactoryConfig, # type: ignore + port: int = settings.HTTP_APP_PORT, + pubsubname: str = CONTROL_STATUS_PUBSUB, + status_topic: str = STATUS_PUBSUB_TOPIC, + logdir: Optional[str] = None, + max_log_file_bytes: int = MAX_LOG_FILE_BYTES, + log_backup_count: int = LOG_BACKUP_COUNT, + loglevel: Optional[str] = None, + otel_service_name: str = "", + **kwargs: Dict[str, Any], + ): + self.pubsubname = pubsubname + self.termination_grace_period_s = termination_grace_period_s + self.control_topic = control_topic + self.status_topic = status_topic + self.port = port + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self.logdir: Optional[str] = logdir + self.loglevel = loglevel + self.max_log_file_bytes = max_log_file_bytes + self.log_backup_count = log_backup_count + self.otel_service_name = otel_service_name + + self.app = App() + self.messenger = WorkerMessenger(pubsubname, status_topic) + self.current_message = None + self.shutdown_lock = threading.Lock() + self.work_lock = threading.Lock() + self.max_tries = max_tries + self.factory_spec = factory_spec + self.statestore = StateStore() + self.name = self.__class__.__name__ + self._setup_routes_and_events() + + def _terminate_child(self): + if self.current_child is not None: + try: + self.current_child.cancel() + except Exception: + self.logger.info( + f"Failed to terminate child {self.current_child}, " + "probably because it terminated already" + ) + + def _setup_routes_and_events(self): + @self.app.subscribe(self.pubsubname, self.control_topic) + def fetch_work(event: v1.Event) -> TopicEventResponse: + return self.fetch_work(self.control_topic, event) + + @self.app.method(name="shutdown") + def shutdown() -> TopicEventResponse: + self.logger.info("Initiating shutdown sequence") + self.pre_stop_hook(signal.SIGTERM, None) + return TopicEventResponse("retry") + + def pre_stop_hook(self, signum: int, _: Any): + self.shutdown_lock.acquire() + if self.shutting_down: + self.logger.warning( + f"Shutdown requested while already shutting down. Ignoring. (signal: {signum})" + ) + self.shutdown_lock.release() + return + self.shutting_down = True + try: + if self.current_message is not None: + self._terminate_child() + finally: + if self.app._server is not None: + self.app._server.stop(None) + self.shutdown_lock.release() + + def run(self): + appname = "terravibes-worker" + configure_logging( + default_level=self.loglevel, + appname=appname, + logdir=self.logdir, + max_log_file_bytes=self.max_log_file_bytes, + log_backup_count=self.log_backup_count, + ) + if self.otel_service_name: + setup_telemetry(appname, self.otel_service_name) + self.start_service() + + @dapr_ready + def start_service(self): + self.logger.info(f"Starting worker listening on port {self.port}") + while not self.shutting_down: + # For some reason, the FastAPI lifecycle shutdown action is + # executing without us intending for it to run. We add this loop + # here to bring the server up if we haven't explicitly initiated the + # shutdown routine. + self.app.run(self.port) + time.sleep(1) + + @add_trace + def run_op_from_message(self, message: WorkMessage, timeout_s: float): + try: + self.current_message = message + content = cast(CacheInfoExecuteRequestContent, message.content) + out = self.run_op_with_retry(content, message.run_id, timeout_s) + asyncio.run(self.messenger.send_success_reply(message, out)) + except ShuttingDownException: + # We are shutting down. Don't send a reply. Another worker will pick + # this up. + raise + except Exception as e: + _, _, tb = sys.exc_info() + asyncio.run(self.messenger.send_failure_reply(message.id, e, traceback.format_tb(tb))) + raise + finally: + self.current_message = None + + def is_workflow_complete(self, message: WorkMessage) -> bool: + try: + run = asyncio.run(self.statestore.retrieve(str(message.run_id))) + except KeyError: + self.logger.warn( + f"Run {message.run_id} not found in statestore. Assuming it's not complete." + ) + return False + if not isinstance(run, dict): + run = json.loads(run) + runconfig = RunConfig(**run) + return RunStatus.finished(runconfig.details.status) + + def fetch_work(self, channel: str, event: v1.Event) -> TopicEventResponse: + @add_trace + def success_callback(message: WorkMessage) -> TopicEventResponse: + add_span_attributes({"run_id": str(message.run_id)}) + if not message.is_valid_for_channel(channel): + self.logger.warning( + f"Received invalid message {message} for channel {channel}. Dropping it." + ) + return TopicEventResponse("drop") + if self.is_workflow_complete(message): + self.logger.warning( + f"Rejecting event with id {event.id} for completed/failed/cancelled " + f"workflow {message.run_id}." + ) + return TopicEventResponse("drop") + + if self.shutting_down: + self.logger.info(f"Shutdown in progress. Rejecting event {event.id}") + return TopicEventResponse("retry") + + if not self.work_lock.acquire(blocking=False): + self.logger.info(f"Worker busy. Rejecting new work event {event.id}") + return TopicEventResponse("retry") + try: + asyncio.run(self.messenger.send_ack_reply(message)) + self.run_op_from_message(message, MAX_OP_EXECUTION_TIME_S) + return TopicEventResponse("success") + except ShuttingDownException: + return TopicEventResponse("retry") + except Exception: + self.logger.exception(f"Failed to run op for event {event.id}") + raise + finally: + self.work_lock.release() + + @add_trace + def failure_callback(event: v1.Event, e: Exception, tb: List[str]) -> TopicEventResponse: + asyncio.run(self.messenger.send_failure_reply(event.id, e, tb)) + return TopicEventResponse("drop") + + update_telemetry_context(extract_message_header_from_event(event).current_trace_parent) + return accept_or_fail_event(event, success_callback, failure_callback) # type: ignore + + def get_future_result( + self, child: ProcessFuture, monitoring_period_s: int, timeout_s: float + ) -> Any: + start_time = time.time() + while time.time() - start_time < timeout_s: + try: + ret = child.result(monitoring_period_s) + return ret + except concurrent.futures.TimeoutError: + assert self.current_message is not None, ( + "There's a correctness issue in the worker code. " + "`current_message` should not be `None`." + ) + if self.is_workflow_complete(self.current_message): + self.logger.info( + f"Workflow {self.current_message.run_id} is complete. " + "Terminating child process." + ) + child.cancel() + raise RuntimeError( + "Workflow was completed/failed/cancelled while running op. " + "Terminating child process." + ) + if self.shutting_down: + self.logger.info("Shutdown process initiated. Terminating child process.") + child.cancel() + raise ShuttingDownException() + continue + except concurrent.futures.CancelledError: + if self.shutting_down: + raise ShuttingDownException() + self.logger.warn( + f"Child process was cancelled while running op {self.current_message}. " + "But we're not shutting down. This is unexpected." + ) + raise + except Exception as e: + self.logger.exception(f"Child process failed with exception {e}") + return traceback.TracebackException.from_exception(e) + raise TimeoutError(f"Op execution took longer than the allowed {timeout_s} seconds.") + + @add_trace + def try_run_op( + self, spec: OperationSpec, content: CacheInfoExecuteRequestContent, inner_timeout: float + ) -> Union[OpIOType, traceback.TracebackException]: + trace.get_current_span().set_attribute("op_name", str(spec.name)) + self.current_child = cast( + ProcessFuture, + run_op(self.factory_spec, spec, content.input, content.cache_info), # type: ignore + ) + ret = self.get_future_result( + self.current_child, self.child_monitoring_period_s, inner_timeout + ) + + return ret + + @add_trace + def run_op_with_retry( + self, content: CacheInfoExecuteRequestContent, run_id: UUID, timeout_s: float + ) -> OpIOType: + spec = cast(OperationSpec, content.operation_spec) + ret: Union[traceback.TracebackException, OpIOType] = traceback.TracebackException( + RuntimeError, RuntimeError(f"Couldn't run op {spec} at all (run id: {run_id})"), None + ) + self.logger.info( + f"Will try to execute op {spec} with input {get_input_ids(content.input)} " + f"for at most {self.max_tries} tries in child process." + ) + final_time = time.time() + timeout_s + for i in range(self.max_tries): + inner_timeout = final_time - time.time() + if self.shutting_down: + self.logger.info( + "Stopping execution of op because the shutdown process has been initiated." + ) + raise ShuttingDownException() + try: + ret = self.try_run_op(spec, content, inner_timeout) + if not isinstance(ret, traceback.TracebackException): + self.logger.debug(f"Op {spec} ran successfully on try {i+1} (run id: {run_id})") + break + self.logger.error( + f"Failed to run op {spec} with input {get_input_ids(content.input)} " + f"in subprocess. (try {i+1}/{self.max_tries}) {''.join(ret.format())}" + ) + except ProcessExpired: + self.logger.exception(f"pebble child process failed on try {i+1}/{self.max_tries}") + except TimeoutError as e: + msg = ( + f"Op execution timed out on try {i+1}/{self.max_tries}. " + f"Total time allowed: {timeout_s} seconds. " + f"Last try was allowed to run for {inner_timeout} seconds." + ) + self.logger.exception(msg) + raise RuntimeError(msg) from e + self.current_child = None + if isinstance(ret, traceback.TracebackException): + raise RuntimeError("".join(ret.format())) + return ret + + +WorkerConfig = builds( + Worker, + port=settings.GRPC_APP_PORT, + pubsubname=CONTROL_STATUS_PUBSUB, + control_topic=MISSING, + status_topic=STATUS_PUBSUB_TOPIC, + max_tries=5, + termination_grace_period_s=TERMINATION_GRACE_PERIOD_S, + factory_spec=OperationFactoryConfig, + zen_partial=False, + hydra_recursive=False, + logdir=None, + max_log_file_bytes=MAX_LOG_FILE_BYTES, + log_backup_count=LOG_BACKUP_COUNT, + loglevel=None, + otel_service_name="", +) diff --git a/src/vibe_common/setup.py b/src/vibe_common/setup.py new file mode 100644 index 00000000..cc805405 --- /dev/null +++ b/src/vibe_common/setup.py @@ -0,0 +1,38 @@ +from setuptools import find_packages, setup + +setup( + name="vibe-common", + version="0.0.1", + author="Microsoft", + author_email="terravibes@microsoft.com", + description="TerraVibes Geospatial Platform Package - vibe common package.", + license="Proprietary", + keywords="terravibes geospatial", + packages=find_packages(exclude=["tests*"]), + python_requires="~=3.8", + install_requires=[ + "aiohttp~=3.9.0", + "aiohttp-retry~=2.8.3", + "azure-keyvault>=4.1.0", + "jsonschema~=4.6", + "requests~=2.32.0", + "cloudevents~=1.2", + "grpcio~=1.53.0", + "dapr~=1.13.0", + "fastapi_utils~=0.2.1", + "pyyaml~=6.0.1", + "vibe_core", + "debugpy", + "azure-identity~=1.14.0", + "azure-storage-blob>=12.5.0", + "uvicorn~=0.13.4", + "uvloop~=0.17.0", + "fastapi~=0.109.1", + "httptools~=0.6.0", + "gunicorn~=21.2.0", + "opentelemetry-api~=1.20.0", + "opentelemetry-sdk~=1.20.0", + "opentelemetry-exporter-otlp~=1.20.0", + "opentelemetry-instrumentation~=0.41b0", + ], +) diff --git a/src/vibe_common/tests/conftest.py b/src/vibe_common/tests/conftest.py new file mode 100644 index 00000000..20b9d94e --- /dev/null +++ b/src/vibe_common/tests/conftest.py @@ -0,0 +1,18 @@ +from vibe_dev.testing import anyio_backend +from vibe_dev.testing.fake_workflows_fixtures import fake_ops_dir, fake_workflows_dir +from vibe_dev.testing.workflow_fixtures import ( + SimpleStrData, + SimpleStrDataType, + simple_op_spec, + workflow_execution_message, +) + +__all__ = [ + "SimpleStrDataType", + "SimpleStrData", + "workflow_execution_message", + "simple_op_spec", + "fake_ops_dir", + "fake_workflows_dir", + "anyio_backend", +] diff --git a/src/vibe_common/tests/test_input_handlers.py b/src/vibe_common/tests/test_input_handlers.py new file mode 100644 index 00000000..67e0527e --- /dev/null +++ b/src/vibe_common/tests/test_input_handlers.py @@ -0,0 +1,97 @@ +from datetime import datetime, timedelta, timezone +from typing import Any, Dict +from unittest.mock import Mock, patch + +import pytest + +from vibe_common.input_handlers import gen_stac_item_from_bounds, handle_non_collection + + +def test_with_feature_geojson(): + start_date = datetime.now(timezone.utc) + end_date = start_date - timedelta(days=6 * 30) + + test_feature: Dict[str, Any] = { + "type": "Feature", + "properties": {"Name": "some_name"}, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-118.675944, 46.916908], + [-118.675944, 46.79631], + [-118.841574, 46.79631], + [-118.841574, 46.916908], + [-118.675944, 46.916908], + ] + ], + }, + } + + item = handle_non_collection(test_feature, start_date, end_date) + + assert item["properties"]["start_datetime"] == start_date.isoformat() + assert item["properties"]["end_datetime"] == end_date.isoformat() + assert item["geometry"] == test_feature["geometry"] + + +def test_with_geometry_geojson(): + start_date = datetime.now(timezone.utc) + end_date = start_date - timedelta(days=6 * 30) + + test_geometry: Dict[str, Any] = { + "type": "Polygon", + "name": "some_name", + "coordinates": [ + [ + [-85.34557342529297, 37.441882193395124], + [-85.18661499023436, 37.441882193395124], + [-85.18661499023436, 37.53804390907164], + [-85.34557342529297, 37.53804390907164], + [-85.34557342529297, 37.441882193395124], + ] + ], + } + + item = handle_non_collection(test_geometry, start_date, end_date) + + assert item["properties"]["start_datetime"] == start_date.isoformat() + assert item["properties"]["end_datetime"] == end_date.isoformat() + assert item["geometry"] == test_geometry + + +@patch("vibe_common.input_handlers.handle_non_collection") +def test_with_feature_collection_geojson(mock_handle: Mock): + start_date = datetime.now(timezone.utc) + end_date = start_date - timedelta(days=6 * 30) + + test_feature = { + "type": "Feature", + "properties": {"Name": "some_name"}, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-118.675944, 46.916908], + [-118.675944, 46.79631], + [-118.841574, 46.79631], + [-118.841574, 46.916908], + [-118.675944, 46.916908], + ] + ], + }, + } + + test_collection: Dict[str, Any] = { + "type": "FeatureCollection", + "name": "some_name", + "features": [test_feature], + } + + gen_stac_item_from_bounds(test_collection, start_date, end_date) + + mock_handle.assert_called_once_with(test_feature, start_date, end_date) + + test_collection["features"].append(test_feature) + with pytest.raises(ValueError): + gen_stac_item_from_bounds(test_collection, start_date, end_date) diff --git a/src/vibe_common/tests/test_messaging.py b/src/vibe_common/tests/test_messaging.py new file mode 100644 index 00000000..4d7cbf35 --- /dev/null +++ b/src/vibe_common/tests/test_messaging.py @@ -0,0 +1,153 @@ +import json +import sys +import traceback as tb +from typing import Callable, List, Type, cast +from unittest.mock import patch + +import pytest +from dapr.conf import settings + +from vibe_common.constants import PUBSUB_URL_TEMPLATE +from vibe_common.messaging import ( + ErrorContent, + ExecuteReplyContent, + ExecuteRequestContent, + MessageHeader, + MessageType, + OperationSpec, + OpStatusType, + WorkMessage, + build_work_message, + decode, + encode, + operation_spec_serializer, + run_id_from_traceparent, + send, +) +from vibe_common.schemas import CacheInfo +from vibe_core.data import TypeDictVibe +from vibe_dev.testing.workflow_fixtures import SimpleStrDataType + + +@pytest.fixture +def message_header(traceparent: str) -> MessageHeader: + header = MessageHeader( + type=MessageType.execute_request, run_id=run_id_from_traceparent(traceparent) + ) + return header + + +@pytest.fixture +def traceparent(workflow_execution_message: WorkMessage) -> str: + return workflow_execution_message.id + + +@pytest.fixture +def execute_request_content( + simple_op_spec: OperationSpec, SimpleStrData: Type[SimpleStrDataType] +) -> ExecuteRequestContent: + data = SimpleStrData("some fake data") + content = ExecuteRequestContent( + input=TypeDictVibe({"user_input": {"data": data}}), # type: ignore + operation_spec=simple_op_spec, + ) + return content + + +def test_workflow_message_construction(workflow_execution_message: ExecuteRequestContent): + assert workflow_execution_message + + +def test_execute_request_message_construction( + message_header: MessageHeader, traceparent: str, execute_request_content: ExecuteRequestContent +): + build_work_message( + header=message_header, + content=execute_request_content, + traceparent=traceparent, # type: ignore + ) + + +def test_execute_reply_message_construction(message_header: MessageHeader, traceparent: str): + content = ExecuteReplyContent( + cache_info=CacheInfo("test_op", "1.0", {}, {}), status=OpStatusType.done, output={} + ) + message_header.type = MessageType.execute_reply + build_work_message(header=message_header, content=content, traceparent=traceparent) + + +def test_error_message_construction(message_header: MessageHeader, traceparent: str): + try: + 1 / 0 # type: ignore + except ZeroDivisionError: + e, value, traceback = sys.exc_info() + content = ErrorContent( + status=OpStatusType.failed, + ename=e.__name__, # type: ignore + evalue=str(e), # type: ignore + traceback=tb.format_tb(traceback), # type: ignore + ) + message_header.type = MessageType.error + build_work_message(header=message_header, content=content, traceparent=traceparent) + + +@patch("requests.post") +def test_send_work_message(post: Callable[..., None], workflow_execution_message: WorkMessage): + send(workflow_execution_message, "test", "fake", "fake") + post.assert_called_with( + PUBSUB_URL_TEMPLATE.format( + cast(str, settings.DAPR_RUNTIME_HOST), + cast(str, settings.DAPR_HTTP_PORT), + "fake", + "fake", + ), + json=workflow_execution_message.to_cloud_event("test"), + headers={ + "Content-Type": "application/cloudevents+json", + "traceparent": workflow_execution_message.id, + }, + ) + + +def test_operation_spec_serializer(execute_request_content: ExecuteRequestContent): + spec = execute_request_content.operation_spec + assert spec is not None + out = operation_spec_serializer(spec) + type_mapper = { + "plain_input": "SimpleStrDataType", + "list_input": "List[SimpleStrDataType]", + "terravibes_input": "DataVibe", + "terravibes_list": "List[DataVibe]", + } + for k, v in type_mapper.items(): + assert out["inputs_spec"][k] == v + spec.inputs_spec["nested_list_input"] = List[List[SimpleStrDataType]] # type: ignore + with pytest.raises(ValueError): + operation_spec_serializer(spec) + + +def test_encoder_decoder(): + messages = [ + "1, 2, 3, 4", + "🤩😱🤷‍🤔🍎😜♾️🍔🤭😒😵‍", + json.dumps( + { + "+♾️": float("+inf"), + "-♾️": float("-inf"), + "🦇👨": [float("nan") for _ in range(20)], + } + ), + ] + + for message in messages: + assert message == decode(encode(message)) + + +def test_refuse_to_encode_message_with_invalid_values(workflow_execution_message: WorkMessage): + invalid_values = (float("nan"), float("inf"), float("-inf")) + + for value in invalid_values: + content = cast(ExecuteRequestContent, workflow_execution_message.content) + content.input["plain_input"]["data"] = [{"a": value}] # type: ignore + with pytest.raises(ValueError): + workflow_execution_message.to_cloud_event("test") diff --git a/src/vibe_common/tests/test_statestore.py b/src/vibe_common/tests/test_statestore.py new file mode 100644 index 00000000..80f39786 --- /dev/null +++ b/src/vibe_common/tests/test_statestore.py @@ -0,0 +1,21 @@ +from typing import Any + +import pytest + +from vibe_common.statestore import StateStore + + +class MockResponse: + def __init__(self, content: Any): + self._content = content + + async def json(self, loads: Any, **kwargs: Any) -> Any: + return loads(self._content, **kwargs) + + +@pytest.mark.anyio +async def test_store_fails_with_invalid_input(): + store = StateStore() + for value in [float(x) for x in "inf -inf nan".split()]: + with pytest.raises(ValueError): + await store.store("key", value) diff --git a/src/vibe_common/tests/test_vibe_dapr_client.py b/src/vibe_common/tests/test_vibe_dapr_client.py new file mode 100644 index 00000000..c9c7e68c --- /dev/null +++ b/src/vibe_common/tests/test_vibe_dapr_client.py @@ -0,0 +1,63 @@ +from datetime import datetime +from typing import Any + +import pytest + +from vibe_common.vibe_dapr_client import VibeDaprClient +from vibe_core.datamodel import Message, SpatioTemporalJson + + +class MockResponse: + def __init__(self, content: Any): + self._content = content + + async def json(self, loads: Any, **kwargs: Any) -> Any: + return loads(self._content, **kwargs) + + +def test_state_store_dumps_dataclass(): + client = VibeDaprClient() + assert client._dumps(Message(message="hi", id=None, location=None)) + + +def test_state_store_fails_to_dump_pydantic_model_with_invalid_values(): + client = VibeDaprClient() + with pytest.raises(ValueError): + client._dumps( + SpatioTemporalJson( + start_date=datetime.now(), + end_date=datetime.now(), + geojson={"location": float("nan")}, + ) + ) + + +def test_state_store_float_serialized_as_str(): + lat = -52.6324171000924 + lon = -7.241144827812494 + test_input = SpatioTemporalJson( + start_date=datetime.now(), + end_date=datetime.now(), + geojson={"coordinates": [lat, lon]}, + ) + client = VibeDaprClient() + test_input_json = client.obj_json(test_input) + assert test_input_json["geojson"]["coordinates"][0] == repr(lat) + assert test_input_json["geojson"]["coordinates"][1] == repr(lon) + + +@pytest.mark.anyio +async def test_state_store_response_deserialize_floats(): + lat = -52.6324171000924 + lon = -7.241144827812494 + + test_response = MockResponse( + str.encode('{{"geojson": {{"coordinates": ["{0}", "{1}"]}}}}'.format(lat, lon)) + ) + + client = VibeDaprClient() + test_response_json = await client.response_json(test_response) # type: ignore + assert isinstance(test_response_json["geojson"]["coordinates"][0], float) + assert isinstance(test_response_json["geojson"]["coordinates"][1], float) + assert test_response_json["geojson"]["coordinates"][0] == lat + assert test_response_json["geojson"]["coordinates"][1] == lon diff --git a/src/vibe_common/vibe_common/__init__.py b/src/vibe_common/vibe_common/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/vibe_common/vibe_common/constants.py b/src/vibe_common/vibe_common/constants.py new file mode 100644 index 00000000..2aa0e480 --- /dev/null +++ b/src/vibe_common/vibe_common/constants.py @@ -0,0 +1,83 @@ +import os +from typing import Dict, Final, List, Tuple, cast + +from dapr.conf import settings + +from vibe_core.cli.local import DATA_SUFFIX + +HeaderDict = Dict[str, str] +WorkReply = Tuple[str, int, HeaderDict] + +DEFAULT_STORE_PATH: Final[str] = os.environ.get( + "DEFAULT_STORE_PATH", os.path.join("/mnt", DATA_SUFFIX) +) +DEFAULT_CATALOG_PATH: Final[str] = os.environ.get( + "DEFAULT_CATALOG_PATH", os.path.join(DEFAULT_STORE_PATH, "stac") +) +DEFAULT_ASSET_PATH: Final[str] = os.environ.get( + "DEFAULT_ASSET_PATH", os.path.join(DEFAULT_STORE_PATH, "assets") +) +DEFAULT_BLOB_ASSET_MANAGER_CONTAINER: Final[str] = "assets" +DEFAULT_COSMOS_DATABASE_NAME: Final[str] = "prod-catalog" +DEFAULT_STAC_COSMOS_CONTAINER: Final[str] = "prod-stac" +DEFAULT_COSMOS_KEY_VAULT_KEY_NAME: Final[str] = "stac-cosmos-write-key" +DEFAULT_COSMOS_URI: Final[str] = "" +DEFAULT_SECRET_STORE_NAME: Final[str] = "azurekeyvault" + +CONTROL_STATUS_PUBSUB: Final[str] = "control-pubsub" +CONTROL_PUBSUB_TOPIC: Final[str] = "commands" +CACHE_PUBSUB_TOPIC: Final[str] = "cache-commands" +STATUS_PUBSUB_TOPIC: Final[str] = "updates" + +TRACEPARENT_VERSION: Final[str] = "00" +TRACEPARENT_FLAGS: Final[int] = 1 + +TRACE_FORMAT: Final[str] = "032x" +SPAN_FORMAT: Final[str] = "016x" +FLAGS_FORMAT: Final[str] = "02x" + +TRACEPARENT_STRING = ( + f"{TRACEPARENT_VERSION}-{{trace_id:{TRACE_FORMAT}}}" + f"-{{parent_id:{SPAN_FORMAT}}}-{{trace_flags:{FLAGS_FORMAT}}}" +) +TRACEPARENT_HEADER_KEY: Final[str] = "Traceparent" + +WORKFLOW_ARTIFACTS_PUBSUB_TOPIC: Final[str] = "workflow-artifacts-commands" +WORKFLOW_REQUEST_PUBSUB_TOPIC: Final[str] = "workflow_execution_request" +STATE_URL_PATH = "/v1.0/state" +STATE_URL_TEMPLATE: Final[str] = ( + f"http://{settings.DAPR_RUNTIME_HOST}:{settings.DAPR_HTTP_PORT}{STATE_URL_PATH}" "/{}/{}" +) +PUBSUB_URL_TEMPLATE: Final[str] = "http://{}:{}/v1.0/publish/{}/{}" +PUBSUB_WORKFLOW_URL: Final[str] = PUBSUB_URL_TEMPLATE.format( + cast(str, settings.DAPR_RUNTIME_HOST), + cast(str, settings.DAPR_HTTP_PORT), + CONTROL_STATUS_PUBSUB, + WORKFLOW_REQUEST_PUBSUB_TOPIC, +) +SERVICE_INVOCACATION_URL_PATH = "/v1.0/invoke" +DATA_OPS_INVOKE_URL_TEMPLATE: Final[str] = ( + f"http://{settings.DAPR_RUNTIME_HOST}:{settings.DAPR_HTTP_PORT}" + f"{SERVICE_INVOCACATION_URL_PATH}/terravibes-data-ops/method/" + "{}/{}" +) + +RUNS_KEY: Final[str] = "runs" +ALLOWED_ORIGINS: Final[List[str]] = [ + o + for o in os.getenv( + "ALLOWED_ORIGINS", + "http://localhost:3000," + "http://localhost," + "http://127.0.0.1:8080," + "http://127.0.0.1:3000,", + ).split(",") + if o +] + +MAX_PARALLEL_REQUESTS: Final[int] = 8 + +HERE = os.path.dirname(os.path.abspath(__file__)) +DEFAULT_OPS_DIR = os.path.abspath(os.path.join(HERE, "..", "..", "..", "ops")) +if not os.path.exists(DEFAULT_OPS_DIR): + DEFAULT_OPS_DIR = os.path.join("/", "app", "ops") diff --git a/src/vibe_common/vibe_common/dapr.py b/src/vibe_common/vibe_common/dapr.py new file mode 100644 index 00000000..781728bf --- /dev/null +++ b/src/vibe_common/vibe_common/dapr.py @@ -0,0 +1,123 @@ +import asyncio +import logging +from functools import partial, wraps +from typing import Any, Callable, overload + +from aiohttp import ClientResponse +from dapr.clients import DaprClient +from dapr.conf import settings + +from vibe_common.constants import SERVICE_INVOCACATION_URL_PATH, STATE_URL_PATH + +MAX_TIMEOUT_TRIES = 3 +DAPR_WAIT_TIME_S = 90 + + +def dapr_ready_decorator( + func: Callable[..., Any], dapr_wait_time_s: int = DAPR_WAIT_TIME_S +) -> Callable[..., Any]: + if asyncio.iscoroutinefunction(func): + + @wraps(func) + async def wrapper(*args: Any, **kwargs: Any): # type: ignore + with DaprClient() as dapr_client: + logger = logging.getLogger(f"{__name__}.wait_dapr") + logger.info(f"Waiting {dapr_wait_time_s} seconds for dapr to be ready") + try: + dapr_client.wait(dapr_wait_time_s) + except Exception: + logger.exception("dapr is not ready") + raise + logger.info("dapr is ready.") + return await func(*args, **kwargs) + else: + + @wraps(func) + def wrapper(*args: Any, **kwargs: Any): + with DaprClient() as dapr_client: + logger = logging.getLogger(f"{__name__}.wait_dapr") + logger.info(f"Waiting {dapr_wait_time_s} seconds for dapr to be ready") + try: + dapr_client.wait(dapr_wait_time_s) + except Exception: + logger.exception("dapr is not ready") + raise + logger.info("dapr is ready.") + return func(*args, **kwargs) + + return wrapper + + +@overload +def dapr_ready(func: None = None, *, dapr_wait_time_s: int = DAPR_WAIT_TIME_S) -> Any: ... + + +@overload +def dapr_ready(func: Callable[..., Any]) -> Callable[..., Any]: ... + + +def dapr_ready(func: Any = None, *, dapr_wait_time_s: int = DAPR_WAIT_TIME_S) -> Any: + if func is None: + return partial(dapr_ready_decorator, dapr_wait_time_s=dapr_wait_time_s) + else: + return dapr_ready_decorator(func, dapr_wait_time_s=dapr_wait_time_s) + + +def process_dapr_state_response(response: ClientResponse) -> ClientResponse: + if not response.ok: + if response.status == 400: + raise RuntimeError("State store is not configured") + elif response.status == 404: + raise KeyError(f"Key specified in {response.url} not found") + if response.request_info.method == "GET" and response.status == 204: + # https://docs.dapr.io/reference/api/state_api/#http-response-1 + raise KeyError(f"Key specified in {response.url} not found") + return response + + +async def process_dapr_service_invocation_response( + response: ClientResponse, +) -> ClientResponse: + if not response.ok: + if response.status == 400: + raise RuntimeError("Method name not given for service invocation.") + elif response.status == 403: + raise RuntimeError(f"Invocation forbidden by access control for {response.url}") + elif response.status == 500: + content = await response.read() + raise RuntimeError(f"Response 500 for {response.url} -- response body: {content}") + return response + + +async def process_dapr_response(response: ClientResponse) -> ClientResponse: + if response.url.host != settings.DAPR_RUNTIME_HOST: + logging.warning("This url is not a response from Dapr: {response.url.host}") + return response + + if response.url.path.startswith(STATE_URL_PATH): + return process_dapr_state_response(response) + elif response.url.path.startswith(SERVICE_INVOCACATION_URL_PATH): + return await process_dapr_service_invocation_response(response) + else: + logging.warning( + "We only handle Dapr responses for state management and service invocation. " + "Response URL = {response.url}" + ) + return response + + +async def handle_aiohttp_timeout(response: ClientResponse) -> ClientResponse: + logger = logging.getLogger(f"{__name__}.handle_aiohttp_timeout") + tries: int = 0 + while True: + try: + await response.read() + return await process_dapr_response(response) + except asyncio.TimeoutError: + tries += 1 + logger.warning( + f"Timeout interacting with Dapr via HTTP, " + f"retrying ({tries}/{MAX_TIMEOUT_TRIES})" + ) + if tries >= MAX_TIMEOUT_TRIES: + raise diff --git a/src/vibe_common/vibe_common/dropdapr.py b/src/vibe_common/vibe_common/dropdapr.py new file mode 100644 index 00000000..5e8926f8 --- /dev/null +++ b/src/vibe_common/vibe_common/dropdapr.py @@ -0,0 +1,202 @@ +""" +dropdapr - A drop-in replacement for dapr-ext-grpc subscribe using FastAPI. +""" + +from typing import Any, Awaitable, Callable, Coroutine, Dict, List, Optional, TypedDict, Union + +import uvicorn +from cloudevents.sdk.event import v1 +from fastapi import FastAPI # type: ignore +from pydantic import BaseConfig + +BaseConfig.arbitrary_types_allowed = True + + +class TopicEventResponse(Dict[str, str]): + def __getattr__(self, attr: str): + if attr == "status": + return self["status"] + + def __init__(self, *args: Any, **kwargs: Dict[Any, Any]): + if len(args) == 1 and isinstance(args[0], str): + super().__init__({"status": args[0].upper()}) + else: + super().__init__(*args, **kwargs) + + +class TopicEventResponseStatus: + success: TopicEventResponse = TopicEventResponse({"status": "SUCCESS"}) + retry: TopicEventResponse = TopicEventResponse({"status": "RETRY"}) + drop: TopicEventResponse = TopicEventResponse({"status": "DROP"}) + + +class DaprSubscription(TypedDict): + pubsubname: str + topic: str + route: str + metadata: Optional[Dict[str, str]] + + +class App: + def __init__(self): + self.app = FastAPI() + self.subscriptions: List[DaprSubscription] = [] + self.server: Optional[uvicorn.Server] = None + + self.app.add_api_route( + "/", + self.index, # type: ignore + methods=["GET"], + response_model=Dict, + ) + + self.app.add_api_route( + "/dapr/subscribe", + lambda: self.subscriptions, # type: ignore + methods=["GET"], + response_model=Any, + ) + + def index(self): + return { + "detail": "This server only works with dapr. Please don't make requests to it.", + "subscriptions": self.subscriptions, + } + + def add_subscription( + self, + handler: Callable[..., Union[TopicEventResponse, Coroutine[Any, Any, Any]]], + pubsub: str, + topic: str, + metadata: Optional[Dict[str, str]] = {}, + ): + event_handler_route = f"/events/{pubsub}/{topic}" + self.app.add_api_route( + event_handler_route, + handler, # type: ignore + methods=["POST"], + response_model=Any, + ) + + self.subscriptions.append( + { + "pubsubname": pubsub, + "topic": topic, + "route": event_handler_route, + "metadata": metadata, + } + ) + + def subscribe_async(self, pubsub: str, topic: str, metadata: Optional[Dict[str, str]] = {}): + def decorator(func: Callable[[v1.Event], Awaitable[Any]]): + async def event_wrapper(request: Dict[str, Any]): + event = v1.Event() + event.SetEventType(request["type"]) + event.SetEventID(request["id"]) + event.SetSource(request["source"]) + try: + event.SetData(request["data"]) + except KeyError: + event.SetData(request["data_base64"]) + event.SetContentType(request["datacontenttype"]) + try: + return await func(event) + except RuntimeError: + return TopicEventResponseStatus.retry + except Exception: + return TopicEventResponseStatus.drop + + self.add_subscription(event_wrapper, pubsub, topic, metadata) + + return decorator + + def subscribe(self, pubsub: str, topic: str, metadata: Optional[Dict[str, str]] = {}): + def decorator(func: Callable[[v1.Event], Any]): + def event_wrapper(request: Dict[str, Any]): + event = v1.Event() + event.SetEventType(request["type"]) + event.SetEventID(request["id"]) + event.SetSource(request["source"]) + try: + event.SetData(request["data"]) + except KeyError: + event.SetData(request["data_base64"]) + event.SetContentType(request["datacontenttype"]) + try: + return func(event) + except RuntimeError: + return TopicEventResponseStatus.retry + except Exception: + return TopicEventResponseStatus.drop + + self.add_subscription(event_wrapper, pubsub, topic, metadata) + + return decorator + + def method(self, name: str): + def decorator(func): # type: ignore + route = f"/{name}" + self.app.add_api_route( + route, + func, + methods=["GET", "POST"], + response_model=Any, + ) + + return decorator + + def startup(self): + def decorator(func: Callable[[], None]): + self.app.add_event_handler("startup", func) + + return decorator + + def shutdown(self): + def decorator(func): # type: ignore + self.app.add_event_handler("shutdown", func) + + return decorator + + def health(self, endpoint: str = "/health"): + def decorator(func): # type: ignore + self.app.add_api_route( + endpoint, + func, + methods=["GET"], + response_model=Any, + ) + + return decorator + + def run( + self, + port: int, + limit_concurrency: Optional[int] = None, + ): + config = uvicorn.Config( + self.app, + host="127.0.0.1", + port=port, + log_config=None, + limit_concurrency=limit_concurrency, + ) + self.server = uvicorn.Server(config) + self.server.run() # type: ignore + + async def run_async( + self, + port: int, + limit_concurrency: Optional[int] = None, + workers: int = 1, + ): + config = uvicorn.Config( + self.app, + host="127.0.0.1", + port=port, + log_config=None, + limit_concurrency=limit_concurrency, + loop="uvloop", + workers=workers, + ) + self.server = uvicorn.Server(config) + await self.server.serve() diff --git a/src/vibe_common/vibe_common/input_handlers.py b/src/vibe_common/vibe_common/input_handlers.py new file mode 100644 index 00000000..66a873ba --- /dev/null +++ b/src/vibe_common/vibe_common/input_handlers.py @@ -0,0 +1,61 @@ +from datetime import datetime +from typing import Any, Dict + +from vibe_core.data import DataVibe, StacConverter, gen_hash_id + +# Checking geojson dict and extracting geometry +VALID_GEOMETRIES = ["Polygon", "MultiPolygon"] +INVALID_GEOMETRIES = [ + "Point", + "LineString", + "MultiPoint", + "MultiLineString", + "GeometryCollection", +] + + +def handle_non_collection( + geojson_dict: Dict[str, Any], start_date: datetime, end_date: datetime +) -> Dict[str, Any]: + geotype = geojson_dict["type"] + + if geotype == "Feature": + geometry = geojson_dict["geometry"] + elif geotype in VALID_GEOMETRIES: + geometry = geojson_dict + elif geotype == "FeatureCollection": + raise ValueError("Feature collection not supported here.") + elif geotype in INVALID_GEOMETRIES: + raise ValueError( + f"Invalid geometry {geotype}. Input geometry must be Polygon or MultiPolygon." + ) + else: + raise ValueError(f"Invalid geojson type {geotype}.") + + converter = StacConverter() + time_range = (start_date, end_date) + data = DataVibe( + id=gen_hash_id("input", geometry, time_range), + time_range=time_range, + geometry=geometry, + assets=[], + ) + stac_item = converter.to_stac_item(data) + + return stac_item.to_dict(include_self_link=False) + + +def gen_stac_item_from_bounds( + geojson_dict: Dict[str, Any], start_date: datetime, end_date: datetime +) -> Dict[str, Any]: + geotype = geojson_dict["type"] + + if geotype == "FeatureCollection": + if len(geoms := geojson_dict["features"]) > 1: + raise ValueError( + f"Only one feature is currently supported as input to a workflow, found " + f"{len(geoms)} features in feature collection" + ) + return handle_non_collection(geoms[0], start_date, end_date) + + return handle_non_collection(geojson_dict, start_date, end_date) diff --git a/src/vibe_common/vibe_common/messaging.py b/src/vibe_common/vibe_common/messaging.py new file mode 100644 index 00000000..237ec076 --- /dev/null +++ b/src/vibe_common/vibe_common/messaging.py @@ -0,0 +1,648 @@ +import json +import logging +import sys +import traceback +from dataclasses import asdict +from datetime import datetime +from enum import auto +from random import getrandbits +from typing import ( + Any, + Awaitable, + Callable, + Dict, + Final, + List, + Literal, + Optional, + Set, + Type, + Union, + cast, + get_args, + get_type_hints, + overload, +) +from uuid import UUID + +import aiohttp +import requests +from cloudevents.sdk.event import v1 +from dapr.clients.grpc._response import TopicEventResponse +from dapr.conf import settings +from fastapi_utils.enums import StrEnum +from pydantic import BaseModel as PyBaseModel +from pydantic import Field, ValidationError, validator +from pystac.item import Item + +import vibe_common.telemetry as telemetry +from vibe_core.data.core_types import OpIOType +from vibe_core.data.utils import get_base_type, is_container_type, serialize_stac +from vibe_core.datamodel import decode, encode +from vibe_core.utils import get_input_ids + +from .constants import ( + CACHE_PUBSUB_TOPIC, + CONTROL_PUBSUB_TOPIC, + PUBSUB_URL_TEMPLATE, + STATUS_PUBSUB_TOPIC, + TRACEPARENT_FLAGS, + TRACEPARENT_STRING, + WORKFLOW_REQUEST_PUBSUB_TOPIC, +) +from .dropdapr import TopicEventResponse as HttpTopicEventResponse +from .schemas import CacheInfo, OperationSpec + +CLOUDEVENTS_JSON: Final[str] = "application/cloudevents+json" +OCTET_STREAM: Final[str] = "application/octet-stream" +MAXIMUM_MESSAGE_SIZE: Final[int] = 256 * 1024 + +MessageContent = Union[ + "AckContent", + "CacheInfoExecuteRequestContent", + "ExecuteRequestContent", + "ExecuteReplyContent", + "ErrorContent", + "WorkflowExecutionContent", + "EvictedReplyContent", + "WorkflowCancellationContent", + "WorkflowDeletionContent", +] +ValidVersion = Literal["1.0"] + + +class OpStatusType(StrEnum): + done = auto() + failed = auto() + + +class MessageType(StrEnum): + ack = auto() + cache_info_execute_request = auto() + error = auto() + execute_request = auto() + execute_reply = auto() + evicted_reply = auto() + workflow_execution_request = auto() + workflow_cancellation_request = auto() + workflow_deletion_request = auto() + + +class BaseModel(PyBaseModel): + class Config: + json_encoders = {Item: serialize_stac} + + +class MessageHeader(BaseModel): + type: MessageType + run_id: UUID + id: str = "" + parent_id: str = "" + current_trace_parent: str = "" + version: ValidVersion = "1.0" + created_at: datetime = Field(default_factory=datetime.now) + + @validator("id", always=True) + def set_id(cls, value: str, values: Dict[str, Any]): + return value or gen_traceparent(values["run_id"]) + + +class ExecuteRequestContent(BaseModel): + input: OpIOType + operation_spec: OperationSpec + + def __str__(self): + return ( + f"{self.__class__.__name__}" + f"(operation_spec={self.operation_spec}, " + f"input={get_input_ids(self.input)})" + ) + + +class CacheInfoExecuteRequestContent(ExecuteRequestContent): + cache_info: CacheInfo + + def __str__(self): + return ( + f"{self.__class__.__name__}" + f"(operation_spec={self.operation_spec}, " + f"input={get_input_ids(self.input)}, " + f"cache_info={self.cache_info})" + ) + + +class ExecuteReplyContent(BaseModel): + cache_info: CacheInfo + status: OpStatusType + output: OpIOType + + +class AckContent(BaseModel): + pass + + +class EvictedReplyContent(BaseModel): + pass + + +class ErrorContent(BaseModel): + status: OpStatusType + ename: str + evalue: str + traceback: List[str] + + +class WorkflowExecutionContent(BaseModel): + input: OpIOType + workflow: Dict[str, Any] + parameters: Optional[Dict[str, Any]] + + def __str__(self): + return ( + f"{self.__class__.__name__}(workflow={self.workflow}, parameters={self.parameters}, " + f"input={get_input_ids(self.input)})" + ) + + +class WorkflowCancellationContent(BaseModel): + pass + + +class WorkflowDeletionContent(BaseModel): + pass + + +class BaseMessage(BaseModel): + header: MessageHeader + content: MessageContent + _supported_channels: Set[str] + + class Config: + # VibeType is not JSON serializable, so we need to convert + # it to string, and convert it back when we receive the + # message + json_encoders = {OperationSpec: lambda x: operation_spec_serializer(x)} # type: ignore + + def is_valid_for_channel(self, channel: str): + return channel in self._supported_channels + + @property + def id(self): + return self.header.id + + @property + def parent_id(self): + return self.header.parent_id + + @property + def run_id(self): + return self.header.run_id + + @property + def current_trace_parent(self): + return self.header.current_trace_parent + + def update_current_trace_parent(self): + self.header.current_trace_parent = telemetry.get_current_trace_parent() + + @validator("content") + def validate_content(cls, value: MessageContent, values: Dict[str, MessageHeader]): + type: MessageType = values["header"].type + if not isinstance(value, MESSAGE_TYPE_TO_CONTENT_TYPE[type]): + raise ValueError( + f"Message of type {type} doesn't specify content of correct type " + f"({MESSAGE_TYPE_TO_CONTENT_TYPE[type]})" + ) + + if isinstance(value, ExecuteRequestContent) and value.operation_spec is None: + raise ValueError("Operation execution content requires an operation_spec") + return value + + def to_cloud_event(self, source: str) -> Dict[str, Any]: + """Converts this message to a CloudEvents 1.0 dict representation. + + Params: + source: str + From the spec: The "source" is the context in which the + occurrence happened. We should use the name of the TerraVibes + component that created this message. + + For details, please see the specification at + https://github.com/cloudevents/spec/blob/v1.0/spec.md + """ + + return { + "specversion": "1.0", + "datacontenttype": CLOUDEVENTS_JSON, + "type": f"ai.terravibes.work.{self.header.type}", + "source": source, + "data": encode(self.json(allow_nan=False)), + "time": datetime.now().isoformat(timespec="seconds") + "Z", # RFC3339 time + "subject": f"{self.header.type}-{self.header.id}", + "id": self.id, + "traceparent": self.id, + "traceid": self.id, + } + + +class CacheInfoExecuteRequestMessage(BaseMessage): + _supported_channels: Set[str] = {CONTROL_PUBSUB_TOPIC} + content: ExecuteRequestContent + + +class ExecuteRequestMessage(BaseMessage): + _supported_channels: Set[str] = {CACHE_PUBSUB_TOPIC} + content: ExecuteRequestContent + + +class ExecuteReplyMessage(BaseMessage): + _supported_channels: Set[str] = {STATUS_PUBSUB_TOPIC} + content: ExecuteReplyContent + + +class EvictedReplyMessage(BaseMessage): + _supported_channels: Set[str] = {STATUS_PUBSUB_TOPIC} + content: EvictedReplyContent + + +class ErrorMessage(BaseMessage): + _supported_channels: Set[str] = {STATUS_PUBSUB_TOPIC} + content: ErrorContent + + +class WorkflowDeletionMessage(BaseMessage): + _supported_channels: Set[str] = {WORKFLOW_REQUEST_PUBSUB_TOPIC} + content: WorkflowDeletionContent + + +class WorkflowExecutionMessage(BaseMessage): + _supported_channels: Set[str] = {WORKFLOW_REQUEST_PUBSUB_TOPIC} + content: WorkflowExecutionContent + + +class WorkflowCancellationMessage(BaseMessage): + _supported_channels: Set[str] = {WORKFLOW_REQUEST_PUBSUB_TOPIC} + content: WorkflowCancellationContent + + +class AckMessage(BaseMessage): + _supported_channels: Set[str] = {STATUS_PUBSUB_TOPIC} + content: AckContent + + +WorkMessage = Union[ + AckMessage, + CacheInfoExecuteRequestMessage, + ExecuteRequestMessage, + ExecuteReplyMessage, + EvictedReplyMessage, + ErrorMessage, + WorkflowExecutionMessage, + WorkflowCancellationMessage, + WorkflowDeletionMessage, +] + + +class WorkMessageBuilder: + @staticmethod + def build_execute_request( + run_id: UUID, + traceparent: str, + op_spec: OperationSpec, + input: OpIOType, + ) -> WorkMessage: + header = MessageHeader( + type=MessageType.execute_request, + run_id=run_id, + parent_id=traceparent, + ) + content = ExecuteRequestContent(input=input, operation_spec=op_spec) + return ExecuteRequestMessage(header=header, content=content) + + @staticmethod + def add_cache_info_to_execute_request( + execute_request_message: ExecuteRequestMessage, cache_info: CacheInfo + ) -> WorkMessage: + header = execute_request_message.header + header.type = MessageType.cache_info_execute_request + content = CacheInfoExecuteRequestContent( + input=execute_request_message.content.input, + operation_spec=execute_request_message.content.operation_spec, + cache_info=cache_info, + ) + return CacheInfoExecuteRequestMessage(header=header, content=content) + + @staticmethod + def build_workflow_request( + run_id: UUID, + workflow: Dict[str, Any], + parameters: Optional[Dict[str, Any]], + input: OpIOType, + ) -> WorkMessage: + header = MessageHeader(type=MessageType.workflow_execution_request, run_id=run_id) + content = WorkflowExecutionContent(input=input, workflow=workflow, parameters=parameters) + return WorkflowExecutionMessage(header=header, content=content) + + @staticmethod + def build_workflow_cancellation(run_id: UUID) -> WorkMessage: + header = MessageHeader(type=MessageType.workflow_cancellation_request, run_id=run_id) + content = WorkflowCancellationContent() + return WorkflowCancellationMessage(header=header, content=content) + + @staticmethod + def build_workflow_deletion(run_id: UUID) -> WorkMessage: + header = MessageHeader(type=MessageType.workflow_deletion_request, run_id=run_id) + content = WorkflowDeletionContent() + return WorkflowDeletionMessage(header=header, content=content) + + @staticmethod + def build_execute_reply( + traceparent: str, cache_info: CacheInfo, output: OpIOType + ) -> WorkMessage: + run_id = run_id_from_traceparent(traceparent) + header = MessageHeader(type=MessageType.execute_reply, run_id=run_id, parent_id=traceparent) + content = ExecuteReplyContent( + cache_info=cache_info, status=OpStatusType.done, output=output + ) + return ExecuteReplyMessage(header=header, content=content) + + @staticmethod + def build_error(traceparent: str, ename: str, evalue: str, traceback: List[str]) -> WorkMessage: + run_id = run_id_from_traceparent(traceparent) + header = MessageHeader(type=MessageType.error, run_id=run_id, parent_id=traceparent) + content = ErrorContent( + status=OpStatusType.failed, ename=ename, evalue=evalue, traceback=traceback + ) + return ErrorMessage(header=header, content=content) + + @staticmethod + def build_evicted_reply(traceparent: str) -> WorkMessage: + run_id = run_id_from_traceparent(traceparent) + header = MessageHeader(type=MessageType.evicted_reply, run_id=run_id, parent_id=traceparent) + content = EvictedReplyContent() + return EvictedReplyMessage(header=header, content=content) + + @staticmethod + def build_ack_reply(traceparent: str) -> WorkMessage: + run_id = run_id_from_traceparent(traceparent) + header = MessageHeader(type=MessageType.ack, run_id=run_id, parent_id=traceparent) + content = AckContent() + return AckMessage(header=header, content=content) + + +MESSAGE_TYPE_TO_CONTENT_TYPE: Dict[MessageType, Type[MessageContent]] = { + MessageType.ack: AckContent, + MessageType.cache_info_execute_request: CacheInfoExecuteRequestContent, + MessageType.error: ErrorContent, + MessageType.evicted_reply: EvictedReplyContent, + MessageType.execute_reply: ExecuteReplyContent, + MessageType.execute_request: ExecuteRequestContent, + MessageType.workflow_execution_request: WorkflowExecutionContent, + MessageType.workflow_cancellation_request: WorkflowCancellationContent, + MessageType.workflow_deletion_request: WorkflowDeletionContent, +} + + +def build_work_message( + header: MessageHeader, content: MessageContent, traceparent: Optional[str] = None +) -> WorkMessage: + error = None + for cls in get_args(WorkMessage): + try: + ret = cls(header=header, content=content) + if traceparent is not None: + ret.header.parent_id = traceparent + return ret + except ValidationError as e: + error = e + assert error is not None + raise error + + +def extract_event_data(event: v1.Event) -> Dict[str, Any]: + logger = logging.getLogger(f"{__name__}.extract_event_data") + if not isinstance(event.data, (bytes, str)): + logger.error("Received data is not a byte stream nor a string.") + raise ValueError("Unable to decode event data {event.data}") + try: + # dapr tries to encode our already-encoded string + data = json.loads(decode(json.loads(event.data))) + except json.decoder.JSONDecodeError: + data = json.loads( + decode(event.data if isinstance(event.data, str) else event.data.decode()) + ) + + return data + + +def event_to_work_message(event: v1.Event) -> WorkMessage: + data = extract_event_data(event) + header = MessageHeader(**data["header"]) + content = MESSAGE_TYPE_TO_CONTENT_TYPE[header.type](**data["content"]) + return build_work_message(header, content) + + +def extract_message_header_from_event(event: v1.Event) -> MessageHeader: + extracted_data = extract_event_data(event) + return MessageHeader(**extracted_data["header"]) + + +def send(message: WorkMessage, source: str, pubsubname: str, topic: str) -> bool: + message.update_current_trace_parent() + logger = logging.getLogger(f"{__name__}.send") + try: + logger.debug( + f"Sending message with header {message.header} from " + f"{source} to pubsub {pubsubname}, topic {topic}" + ) + response = requests.post( + PUBSUB_URL_TEMPLATE.format( + cast(str, settings.DAPR_RUNTIME_HOST), + cast(str, settings.DAPR_HTTP_PORT), + pubsubname, + topic, + ), + json=message.to_cloud_event(source), + headers={ + "Content-Type": CLOUDEVENTS_JSON, + "traceparent": message.id, + }, + ) + request_body_length = ( + len(response.request.body) if response.request and response.request.body else 0 + ) + logger.debug( + f"Last request to pubsub {pubsubname} topic {topic} had " + f"status code {response.status_code} and body length {request_body_length} bytes" + ) + if request_body_length > MAXIMUM_MESSAGE_SIZE: + logger.warning( + f"Last request to pubsub {pubsubname} topic {topic} exceeded " + f"maximum safe message size of {MAXIMUM_MESSAGE_SIZE} bytes. " + f"The message might have been dropped by the message broker." + ) + return response.ok + except Exception: + logger.exception( + f"Failed to send payload {message} from {source} to pubsub {pubsubname}, topic {topic}" + ) + raise + + +async def send_async(message: WorkMessage, source: str, pubsubname: str, topic: str) -> bool: + message.update_current_trace_parent() + logger = logging.getLogger(f"{__name__}.send_async") + try: + logger.debug( + f"Sending async message with header {message.header} from " + f"{source} to pubsub {pubsubname}, topic {topic}" + ) + async with aiohttp.ClientSession() as session: + payload = message.to_cloud_event(source) + async with await session.post( + PUBSUB_URL_TEMPLATE.format( + cast(str, settings.DAPR_RUNTIME_HOST), + cast(str, settings.DAPR_HTTP_PORT), + pubsubname, + topic, + ), + json=payload, + headers={ + "Content-Type": CLOUDEVENTS_JSON, + "traceparent": message.id, + }, + ) as response: + request_body_length = json.dumps(payload).encode("utf-8").__len__() + logger.debug( + f"Last request to pubsub {pubsubname} topic {topic} had " + f"status code {response.status} and body length {request_body_length} bytes" + ) + if request_body_length > MAXIMUM_MESSAGE_SIZE: + logger.warning( + f"Last request to pubsub {pubsubname} topic {topic} exceeded " + f"maximum safe message size of {MAXIMUM_MESSAGE_SIZE} bytes. " + f"The message might have been dropped by the message broker." + ) + return response.ok + except Exception: + logger.exception( + f"Failed to send payload {message} from {source} to pubsub {pubsubname}, topic {topic}" + ) + raise + + +def operation_spec_serializer(spec: OperationSpec) -> Dict[str, Any]: + opdict = asdict(spec) + for field in "inputs_spec output_spec".split(): + if field not in opdict: + continue + for k, v in opdict[field].items(): + if is_container_type(v): + base = get_base_type(v) + v = f"List[{base.__name__}]" + else: + v = get_base_type(v).__name__ + opdict[field][k] = str(v) + return opdict + + +def gen_traceparent(run_id: UUID) -> str: + """Generates a unique identifier that can be used as W3C traceparent header. + + See https://www.w3.org/TR/trace-context/#examples-of-http-traceparent-headers for examples. + """ + trace_id = int(run_id.hex, 16) + parent_id = getrandbits(64) + + return TRACEPARENT_STRING.format( + trace_id=trace_id, parent_id=parent_id, trace_flags=TRACEPARENT_FLAGS + ) + + +def run_id_from_traceparent(traceparent: str) -> UUID: + """Given the contents of a TerraVibes traceparent header, extracts a run_id from it.""" + + return UUID(traceparent.split("-")[1]) + + +@overload +def accept_or_fail_event( + event: v1.Event, + success_callback: Callable[[WorkMessage], HttpTopicEventResponse], + failure_callback: Callable[[v1.Event, Exception, List[str]], HttpTopicEventResponse], +) -> HttpTopicEventResponse: ... + + +@overload +def accept_or_fail_event( + event: v1.Event, + success_callback: Callable[[WorkMessage], TopicEventResponse], + failure_callback: Callable[[v1.Event, Exception, List[str]], TopicEventResponse], +) -> TopicEventResponse: ... + + +def accept_or_fail_event( + event: v1.Event, + success_callback: Callable[[WorkMessage], Union[HttpTopicEventResponse, TopicEventResponse]], + failure_callback: Callable[ + [v1.Event, Exception, List[str]], Union[HttpTopicEventResponse, TopicEventResponse] + ], +): + logger = logging.getLogger(f"{__name__}.accept_or_fail_event") + try: + message = event_to_work_message(event) + logger.info(f"Received message: header={message.header}") + return success_callback(message) + except Exception as e: + _, _, exc_traceback = sys.exc_info() + logger.exception(f"Failed to process event with id {event.id}") + try: + return failure_callback(event, e, traceback.format_tb(exc_traceback)) + except Exception: + logger.error(f"Unable to parse traceparent. Discarding event with id {event.id}") + + ResponseType = get_type_hints(success_callback).get("return", HttpTopicEventResponse) + return ResponseType("drop") + + +@overload +async def accept_or_fail_event_async( + event: v1.Event, + success_callback: Callable[[WorkMessage], Awaitable[HttpTopicEventResponse]], + failure_callback: Callable[[v1.Event, Exception, List[str]], Awaitable[HttpTopicEventResponse]], +) -> HttpTopicEventResponse: ... + + +@overload +async def accept_or_fail_event_async( + event: v1.Event, + success_callback: Callable[[WorkMessage], Awaitable[TopicEventResponse]], + failure_callback: Callable[[v1.Event, Exception, List[str]], Awaitable[TopicEventResponse]], +) -> TopicEventResponse: ... + + +async def accept_or_fail_event_async( + event: v1.Event, + success_callback: Callable[ + [WorkMessage], Awaitable[Union[HttpTopicEventResponse, TopicEventResponse]] + ], + failure_callback: Callable[ + [v1.Event, Exception, List[str]], + Awaitable[Union[HttpTopicEventResponse, TopicEventResponse]], + ], +): + logger = logging.getLogger(f"{__name__}.accept_or_fail_event_async") + try: + message = event_to_work_message(event) + logger.info(f"Received message: header={message.header}") + return await success_callback(message) + except Exception as e: + _, _, exc_traceback = sys.exc_info() + logger.exception(f"Failed to process event with id {event.id}") + try: + return await failure_callback(event, e, traceback.format_tb(exc_traceback)) + except Exception: + logger.error(f"Unable to parse traceparent. Discarding event with id {event.id}") + + ResponseType = get_type_hints(success_callback).get("return", HttpTopicEventResponse) + return ResponseType("drop") diff --git a/src/vibe_common/vibe_common/schemas.py b/src/vibe_common/vibe_common/schemas.py new file mode 100644 index 00000000..c3fa06b1 --- /dev/null +++ b/src/vibe_common/vibe_common/schemas.py @@ -0,0 +1,224 @@ +import os +from copy import deepcopy +from dataclasses import field, fields +from hashlib import sha256 +from itertools import chain +from typing import Any, ClassVar, Dict, List, Optional, Union, cast + +import yaml +from pydantic.dataclasses import dataclass +from pystac.item import Item +from typing_extensions import TypedDict # Required to avoid pydantic error + +from vibe_core.data.core_types import BaseVibe, TypeDictVibe, TypeParser +from vibe_core.datamodel import TaskDescription +from vibe_core.utils import rename_keys + +from .constants import CONTROL_PUBSUB_TOPIC + +ItemDict = Dict[str, Union[Item, List[Item]]] +CacheIdDict = Dict[str, Union[str, List[str]]] +OpDependencies = Dict[str, List[str]] +OpResolvedDependencies = Dict[str, Dict[str, Any]] + + +class EntryPointDict(TypedDict): + file: str + callback_builder: str + + +@dataclass +class OperationSpec: + name: str + root_folder: str + inputs_spec: TypeDictVibe + output_spec: TypeDictVibe + entrypoint: EntryPointDict + description: TaskDescription + dependencies: OpDependencies = field(default_factory=dict) + parameters: Dict[str, Any] = field(default_factory=dict) + default_parameters: Dict[str, Any] = field(default_factory=dict) + version: str = "1.0" + image_name: str = CONTROL_PUBSUB_TOPIC + + def __hash__(self): + return hash(self.name) + + +def update_parameters(parameters: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]: + for k, v in override.items(): + if isinstance(v, dict): + parameters[k] = update_parameters(parameters.get(k, {}), cast(Dict[str, Any], v)) + else: + if k not in parameters: + raise ValueError(f"Tried to overwrite non-existent parameter {k}.") + parameters[k] = v + return parameters + + +class OperationParser: + required_fields: List[str] = "name inputs output parameters entrypoint".split() + default_version: str = "1.0" + + @classmethod + def parse( + cls, + op_definition_path: str, + parameters_override: Optional[Dict[str, Any]] = None, + ) -> OperationSpec: + op_config = cls._load_config(op_definition_path) + op_root_folder = os.path.dirname(op_definition_path) + + p = op_config.get("parameters", {}) + default_params: Dict[str, Any] = {} if p is None else p + + inputs = cls._parse_iospec(op_config["inputs"]) + output = cls._parse_iospec(op_config["output"]) + dependencies: OpDependencies = op_config.get("dependencies", {}) + version: str = op_config.get("version", cls.default_version) + version = str(version) if version is not None else version + + params = deepcopy(default_params) + if parameters_override is not None: + params = update_parameters(params, parameters_override) + + description = op_config.get("description", {}) + description = {} if description is None else description + description = rename_keys(description, {"output": "outputs"}) + description = TaskDescription(**description) + + return OperationSpec( + name=op_config["name"], + inputs_spec=inputs, + output_spec=output, + entrypoint=EntryPointDict( + file=op_config["entrypoint"]["file"], + callback_builder=op_config["entrypoint"]["callback_builder"], + ), + parameters=params, + default_parameters=default_params, + root_folder=op_root_folder, + dependencies=dependencies if dependencies is not None else {}, + version=version if version is not None else cls.default_version, + description=description, + ) + + @classmethod + def _parse_iospec(cls, iospec: Dict[str, str]) -> TypeDictVibe: + return TypeDictVibe({k: TypeParser.parse(v) for k, v in iospec.items()}) + + @staticmethod + def _load_config(path: str): + with open(path, "r") as stream: + data = yaml.safe_load(stream) + + for opfield in OperationParser.required_fields: + if opfield not in data: + raise ValueError(f"Operation config {path} is missing required field {opfield}") + + return data + + +@dataclass(frozen=True) +class OpRunId: + name: str + hash: str + + +class OpRunIdDict(TypedDict): + name: str + hash: str + + +@dataclass(init=False) +class CacheInfo: + name: str + version: str + hash: str = field(init=False) + parameters: OpResolvedDependencies = field(init=False) + ids: Dict[str, Union[str, List[str]]] = field(init=False) + + FIELD_TO_STORAGE: ClassVar[Dict[str, str]] = { + "version": "vibe_op_version", + "name": "vibe_op_name", + "hash": "vibe_op_hash", + "ids": "vibe_source_items", + "parameters": "vibe_op_parameters", + } + + def __init__( + self, + name: str, + version: str = "1.0", + sources: Optional[ItemDict] = None, + parameters: OpResolvedDependencies = {}, + **kwargs: Dict[str, Any], + ): + self.name = name + self.version = version.split(".")[0] + + if sources is not None: + kwargs["sources"] = sources + kwargs["parameters"] = self.parameters = parameters + + if "ids" not in kwargs: + if "sources" not in kwargs: + raise ValueError("CacheInfo missing both `ids` and `sources` fields.") + self.ids = self._populate_ids(cast(ItemDict, kwargs["sources"])) + else: + self.ids = kwargs["ids"] + + if "hash" in kwargs: + self.hash = cast(str, kwargs["hash"]) + else: + if "parameters" not in kwargs: + raise ValueError("CacheInfo missing required parameter `parameters`") + self.hash = sha256( + "".join( + [ + self._join_mapping(self.ids), + self._join_mapping(cast(OpResolvedDependencies, kwargs["parameters"])), + self.version, + ] + ).encode() + ).hexdigest() + + def as_storage_dict(self): + return { + self.FIELD_TO_STORAGE[f.name]: getattr(self, f.name) + for f in fields(self) # type: ignore + } + + @classmethod + def _compute_or_extract_id( + cls, thing: Union[Item, BaseVibe, List[Item], List[BaseVibe]] + ) -> Union[List[str], str]: + if isinstance(thing, list): + return [cast(str, cls._compute_or_extract_id(e)) for e in thing] + return thing.hash_id if hasattr(thing, "hash_id") else thing.id # type: ignore + + @classmethod + def _join(cls, thing: Union[Any, List[Any]]) -> str: + # TODO: this join might lead to collisions, but we're keeping it for now + # to avoid breaking existing caches + return "".join([str(i) for i in thing]) if isinstance(thing, list) else str(thing) + + @classmethod + def _join_mapping(cls, mapping: Union[CacheIdDict, OpResolvedDependencies]) -> str: + return "".join( + chain.from_iterable( + [ + (k, cls._join_mapping(v) if isinstance(v, dict) else cls._join(v)) + for k, v in sorted(mapping.items(), key=lambda e: e[0]) + ] + ) + ) + + @classmethod + def _populate_ids(cls, inputs: ItemDict) -> CacheIdDict: + return { + k: cast(List[str], sorted([cls._compute_or_extract_id(e) for e in v])) + if isinstance(v, list) + else cls._compute_or_extract_id(v) + for k, v in inputs.items() + } diff --git a/src/vibe_common/vibe_common/secret_provider.py b/src/vibe_common/vibe_common/secret_provider.py new file mode 100644 index 00000000..798d294c --- /dev/null +++ b/src/vibe_common/vibe_common/secret_provider.py @@ -0,0 +1,186 @@ +import logging +import re +import time +from abc import ABC, abstractmethod +from typing import Any, Optional + +from azure.core.credentials import TokenCredential +from azure.core.exceptions import ResourceNotFoundError, ServiceRequestError +from azure.identity import DefaultAzureCredential +from azure.keyvault.secrets import KeyVaultSecret, SecretClient +from dapr.clients import DaprClient +from hydra_zen import builds + +from vibe_common.dapr import dapr_ready + +CONNECTION_REFUSED_SUBSTRING = "connect: connection refused" +DAPR_WAIT_TIME_S = 30 + + +class SecretProvider(ABC): + def __init__(self): + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self.expression = re.compile(r"^@SECRET\(([^,]*?), ([^,]*?)\)") + + def is_secret(self, value: str) -> bool: + return self.expression.match(value) is not None + + @abstractmethod + def _resolve_impl(self, value: Any) -> str: + raise NotImplementedError + + def resolve(self, value: Any) -> str: + if not (isinstance(value, str) and self.is_secret(value)): + return value + + return self._resolve_impl(value) + + +class DaprSecretProvider(SecretProvider): + def _resolve_impl(self, value: Any) -> str: + while True: + _, secret_name = self.expression.findall(value)[0] + try: + # dapr´s local file and local env vars secret stores do not allow + # live update, that is, any update to a secret would require the + # worker to be redeployed. So, we are using kubernetes secret store. + # Even though Kubernetes supports multiple keys in a secret, secrets + # added to the Kubernetes secret store via FarmVibes have a single + # key whose name is the same as the secret name. + return retrieve_dapr_secret("kubernetes", secret_name, secret_name) + except Exception as e: + stre = str(e) + if CONNECTION_REFUSED_SUBSTRING in stre: + self.logger.info( + "dapr sidecar temporarily unavailable, " + f"will retry to resolve secret {value}" + ) + # No need for exponential backoffs here. This is the k8s + # cluster misbehaving and it will return (hopefully + # quickly) + time.sleep(DAPR_WAIT_TIME_S) + continue + raise ValueError( + f"Could not retrive secret {secret_name} from Dapr.\n Error message {stre}" + ) + + +class AzureSecretProvider(SecretProvider): + def __init__(self, credential: Optional[TokenCredential] = None): + super().__init__() + self.__credential = credential + + @property + def credential(self): + if self.__credential is None: + self.__credential = DefaultAzureCredential() + + return self.__credential + + def retrieve_from_keyvault(self, keyvault_name: str, secret_name: str) -> KeyVaultSecret: + try: + secret_client = SecretClient( + vault_url=f"https://{keyvault_name}.vault.azure.net/", credential=self.credential + ) + secret = secret_client.get_secret(secret_name) + except ResourceNotFoundError as e: + raise ValueError(f"Could not retrieve secret {secret_name}.\n Error message {str(e)}") + except ServiceRequestError as e: + raise ValueError(f"Invalid keyvault {keyvault_name}.\n Error message {str(e)}") + + return secret + + def _resolve_impl(self, value: Any) -> str: + keyvault_name, secret_name = self.expression.findall(value)[0] + secret = self.retrieve_from_keyvault(keyvault_name, secret_name) + + assert secret.value is not None + + return secret.value + + +@dapr_ready(dapr_wait_time_s=DAPR_WAIT_TIME_S) +def retrieve_dapr_secret( + store_name: str, + secret_name: str, + key_name: str, +) -> str: + """ + Using Dapr, retrieve a secret from a given secret store. + + Args: + store_name: The name of the secret store from which to fetch the secret + secret_name: The name of the secret to fetch + key_name: The name of the key in the secret to fetch (Note: For secret stores that have + multiple key-value pairs in a secret this would be the key to fetch. If the secret store + supports only one key-value pair, this argument is the same as the `secret_name`.) + + Returns: + The secret value + """ + logger = logging.getLogger(f"{__name__}.retrieve_dapr_secret") + with DaprClient() as dapr_client: + key = dapr_client.get_secret(store_name, secret_name).secret[key_name] + logger.info(f"Retrieving secret {secret_name} from store {store_name}") + return key + + +def retrieve_keyvault_secret( + keyvault_name: str, secret_name: str, cred: Optional[TokenCredential] = None +): + cred = cred or DefaultAzureCredential() + kv = SecretClient(keyvault_name, credential=cred) + key = kv.get_secret(secret_name).value + if key is None: + raise ValueError( + f"Could not find cosmos key with name {secret_name} on vault {keyvault_name}" + ) + return key + + +DaprSecretConfig = builds( + retrieve_dapr_secret, + populate_full_signature=True, + zen_dataclass={ + "module": "vibe_common.secret_provider", + "cls_name": "DaprSecretConfig", + }, +) + +KeyVaultSecretConfig = builds( + retrieve_keyvault_secret, + populate_full_signature=True, + zen_dataclass={ + "module": "vibe_common.secret_provider", + "cls_name": "KeyVaultSecretConfig", + }, +) + +SecretProviderConfig = builds( + SecretProvider, + populate_full_signature=True, + zen_dataclass={ + "module": "vibe_common.secret_provider", + "cls_name": "SecretProviderConfig", + }, +) + +DaprSecretProviderConfig = builds( + DaprSecretProvider, + populate_full_signature=True, + builds_bases=(SecretProviderConfig,), + zen_dataclass={ + "module": "vibe_common.secret_provider", + "cls_name": "DaprSecretProviderConfig", + }, +) + +AzureSecretProviderConfig = builds( + AzureSecretProvider, + populate_full_signature=True, + builds_bases=(SecretProviderConfig,), + zen_dataclass={ + "module": "vibe_common.secret_provider", + "cls_name": "AzureSecretProviderConfig", + }, +) diff --git a/src/vibe_common/vibe_common/statestore.py b/src/vibe_common/vibe_common/statestore.py new file mode 100644 index 00000000..21a1ddc3 --- /dev/null +++ b/src/vibe_common/vibe_common/statestore.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import logging +from typing import Any, List, Optional, Protocol, TypedDict + +from vibe_common.constants import STATE_URL_TEMPLATE +from vibe_common.vibe_dapr_client import VibeDaprClient + +LOGGER = logging.getLogger(__name__) +STATE_STORE = "statestore" +METADATA = {"partitionKey": "eywa"} + + +class TransactionOperation(TypedDict): + key: str + operation: str + value: Optional[Any] + + +class StateStoreProtocol(Protocol): + async def retrieve(self, key: str, traceparent: Optional[str] = None) -> Any: ... + + async def retrieve_bulk( + self, keys: List[str], parallelism: int = 2, traceparent: Optional[str] = None + ) -> List[Any]: ... + + async def store(self, key: str, obj: Any, traceparent: Optional[str] = None) -> bool: ... + + async def transaction( + self, operations: List[TransactionOperation], traceparent: Optional[str] = None + ) -> bool: ... + + +class StateStore(StateStoreProtocol): + def __init__( + self, + state_store: str = STATE_STORE, + partition_key: str = METADATA["partitionKey"], + ): + self.vibe_dapr_client = VibeDaprClient() + self.state_store: str = state_store + self.partition_key: str = partition_key + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + + async def retrieve(self, key: str, traceparent: Optional[str] = None) -> Any: + try: + response = await self.vibe_dapr_client.get( + STATE_URL_TEMPLATE.format(self.state_store, key), + traceparent=traceparent, + params={"metadata.partitionKey": METADATA["partitionKey"]}, + ) + + return await self.vibe_dapr_client.response_json(response) + except KeyError as e: + raise KeyError(f"Key {key} not found") from e + + async def retrieve_bulk( + self, keys: List[str], parallelism: int = 8, traceparent: Optional[str] = None + ) -> List[Any]: + """Retrieves keys in bulk. + + This only exists because our UI needs to display details about all + workflows, and retrieving in bulk saves on round trips to the state + store. + """ + + response = await self.vibe_dapr_client.post( + url=STATE_URL_TEMPLATE.format(self.state_store, "bulk"), + data={ + "keys": keys, + "parallelism": parallelism, + }, + traceparent=traceparent, + params={"metadata.partitionKey": METADATA["partitionKey"]}, + ) + + states = await self.vibe_dapr_client.response_json(response) + + if len(states) != len(keys): + keyset = set(keys) + for state in states: + keyset.remove(state[0]) + raise KeyError(f"Failed to retrieve keys {keyset} from state store.") + return [state["data"] for state in states] + + async def store(self, key: str, obj: Any, traceparent: Optional[str] = None) -> None: + response = await self.vibe_dapr_client.post( + STATE_URL_TEMPLATE.format(self.state_store, ""), + data=[ + { + "key": key, + "value": self.vibe_dapr_client.obj_json(obj), + "metadata": {"partitionKey": self.partition_key}, + } + ], + traceparent=traceparent, + ) + assert response.ok, "Failed to store state, but underlying method didn't capture it" + + async def transaction( + self, operations: List[TransactionOperation], traceparent: Optional[str] = None + ) -> None: + queries = [ + { + "operation": o["operation"], + "request": { + "key": o["key"], + "value": self.vibe_dapr_client.obj_json(o["value"]), + }, + } + for o in operations + ] + await self.vibe_dapr_client.post( + url=STATE_URL_TEMPLATE.format(self.state_store, "transaction"), + data={ + "operations": queries, + "metadata": {"partitionKey": self.partition_key}, + }, + traceparent=traceparent, + ) diff --git a/src/vibe_common/vibe_common/telemetry.py b/src/vibe_common/vibe_common/telemetry.py new file mode 100644 index 00000000..b902a573 --- /dev/null +++ b/src/vibe_common/vibe_common/telemetry.py @@ -0,0 +1,83 @@ +import inspect +import logging +from functools import wraps +from typing import Any, Callable, Dict + +from opentelemetry import trace +from opentelemetry.context import attach +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.propagate import extract +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.trace.span import INVALID_SPAN + +from vibe_common.constants import TRACEPARENT_STRING + +LOGGER = logging.getLogger(__name__) + + +def setup_telemetry(service_name: str, exporter_endpoint: str): + resource = Resource(attributes={"service.name": service_name}) + provider = TracerProvider(resource=resource) + + # Create an OTLP exporter instance + # The insecure=True flag is used here because we're running the + # service locally (from the k8s cluster perspective) without + # Transport Layer Security (TLS). + otlp_exporter = OTLPSpanExporter(endpoint=exporter_endpoint, insecure=True) + + provider.add_span_processor(BatchSpanProcessor(otlp_exporter)) + + # Sets the global default tracer provider + trace.set_tracer_provider(provider) + + +def get_current_trace_parent() -> str: + span = trace.get_current_span() + + if span == INVALID_SPAN: + LOGGER.warning("No current span found. Returning empty traceparent.") + + trace_id = span.get_span_context().trace_id + span_id = span.get_span_context().span_id + trace_flags = span.get_span_context().trace_flags + return TRACEPARENT_STRING.format(trace_id=trace_id, parent_id=span_id, trace_flags=trace_flags) + + +def add_span_attributes(attributes: Dict[str, Any]): + current_span = trace.get_current_span() + for k, v in attributes.items(): + current_span.set_attribute(k, v) + + +def update_telemetry_context(trace_parent: str): + """Updates the current telemetry context with the trace parent""" + attach(extract({"traceparent": trace_parent})) + + +def add_trace(func: Callable[..., Any]): + if inspect.iscoroutinefunction(func): + return _add_trace_async(func) + else: + return _add_trace_sync(func) + + +def _add_trace_sync(func: Callable[..., Any]): + @wraps(func) + def wrapper(*args, **kwargs): # type: ignore + tracer = trace.get_tracer(__name__) + with tracer.start_as_current_span(func.__name__): + return func(*args, **kwargs) + + return wrapper + + +def _add_trace_async(func: Callable[..., Any]): + @wraps(func) + async def wrapper(*args, **kwargs): # type: ignore + tracer = trace.get_tracer(__name__) + with tracer.start_as_current_span(func.__name__): + return await func(*args, **kwargs) + + return wrapper diff --git a/src/vibe_common/vibe_common/tokens.py b/src/vibe_common/vibe_common/tokens.py new file mode 100644 index 00000000..f4a6595b --- /dev/null +++ b/src/vibe_common/vibe_common/tokens.py @@ -0,0 +1,234 @@ +import logging +from abc import ABC, abstractmethod +from datetime import datetime, timedelta +from typing import Dict, Optional, Union, cast +from urllib.parse import urljoin, urlparse + +from azure.core.credentials import TokenCredential +from azure.identity import DefaultAzureCredential +from azure.storage.blob import ( + BlobClient, + BlobSasPermissions, + BlobServiceClient, + UserDelegationKey, + generate_blob_sas, +) + + +class StorageUserKey(ABC): + @abstractmethod + def is_valid(self) -> bool: + raise NotImplementedError("Subclass needs to implement this") + + @abstractmethod + def get_access_key(self) -> Union[UserDelegationKey, str]: + raise NotImplementedError("Subclass needs to implement this") + + +class StorageUserKeyCredentialed(StorageUserKey): + delegation_key: UserDelegationKey + key_expiration: Optional[datetime] + sas_expiration: timedelta + + def __init__( + self, + url: str, + sas_expiration: timedelta, + key_lease_time: timedelta, + credential: Optional[TokenCredential] = None, + ): + self.sas_expiration = sas_expiration + self.key_lease_time = key_lease_time + self.credential = DefaultAzureCredential() if credential is None else credential + self.storage_url = self._get_storage_url(url) + self.client = None + + # Update expiration and delegation keys + self._generate() + + def _get_storage_url(self, url: str) -> str: + return urlparse(url.rstrip("/")).netloc + + def _get_client(self): + if not self.client: + self.client = BlobServiceClient(self.storage_url, self.credential) + + return self.client + + def is_valid(self) -> bool: + if not self.key_expiration: + return False + return datetime.utcnow() + self.sas_expiration < self.key_expiration + + def _generate(self): + self.key_expiration = datetime.utcnow() + self.key_lease_time + client = self._get_client() + self.delegation_key = client.get_user_delegation_key(datetime.utcnow(), self.key_expiration) + + def get_access_key(self) -> Union[UserDelegationKey, str]: + if not self.is_valid(): + self._generate() + return self.delegation_key + + +class StorageUserKeyConnectionString(StorageUserKey): + def __init__( + self, + sas_expiration: timedelta, + key_lease_time: timedelta, + connection_string: str, + ): + self.connection_string = connection_string + self.client = None + + def _get_client(self): + if not self.client: + self.client = BlobServiceClient.from_connection_string(self.connection_string) + + return self.client + + def is_valid(self) -> bool: + return True + + def get_access_key(self) -> Union[UserDelegationKey, str]: + client = self._get_client() + return client.credential.account_key + + +class BlobTokenManager(ABC): + sas_expiration_days: int + lease_time_multiplier: int + user_key_cache: Dict[str, StorageUserKey] = {} + + def __init__( + self, + sas_expiration_days: int = 1, + lease_time_ratio: int = 2, + ): + self.logger = logging.getLogger(self.__class__.__name__) + self.sas_expiration = timedelta(days=sas_expiration_days) + self.lease_time_ratio = lease_time_ratio + self.key_lease_time = self.lease_time_ratio * self.sas_expiration + + @abstractmethod + def _get_storage_user_key( + self, + url: str, + sas_expiration: timedelta, + key_lease_time: timedelta, + ) -> StorageUserKey: + raise NotImplementedError("Subclass needs to implement this") + + def _get_user_key(self, url: str, account_name: str) -> StorageUserKey: + if account_name not in self.user_key_cache: + self.logger.debug(f"Creating a new user key for account {account_name}") + storage_user_key = self._get_storage_user_key( + url, self.sas_expiration, self.key_lease_time + ) + + self.user_key_cache[account_name] = storage_user_key + + return self.user_key_cache[account_name] + + @abstractmethod + def _get_token(self, blob_client: BlobClient): + raise NotImplementedError("Subclass needs to implement this") + + def sign_url(self, url: str) -> str: + blob_client = BlobClient.from_blob_url(blob_url=url) + sas_token = self._get_token(blob_client) + return f"{urljoin(url, urlparse(url).path)}?{sas_token}" + + +class BlobTokenManagerCredentialed(BlobTokenManager): + def __init__( + self, + sas_expiration_days: int = 1, + lease_time_ratio: int = 2, + credential: Optional[TokenCredential] = None, + ): + super().__init__(sas_expiration_days, lease_time_ratio) + self.credential = DefaultAzureCredential() if credential is None else credential + + def _get_storage_user_key( + self, + url: str, + sas_expiration: timedelta, + key_lease_time: timedelta, + ) -> StorageUserKey: + return StorageUserKeyCredentialed( + url, + sas_expiration, + key_lease_time, + credential=self.credential, + ) + + def _get_token( + self, + blob_client: BlobClient, + ): + account_name: str = cast(str, blob_client.account_name) + container_name: str = blob_client.container_name + blob_name: str = blob_client.blob_name + + start = datetime.utcnow() + end = start + self.sas_expiration + user_delegation_key = cast( + UserDelegationKey, self._get_user_key(blob_client.url, account_name).get_access_key() + ) + + sas_token = generate_blob_sas( + account_name=account_name, + container_name=container_name, + user_delegation_key=user_delegation_key, + blob_name=blob_name, + permission=BlobSasPermissions(read=True), + start=start, + expiry=end, + ) + return sas_token + + +class BlobTokenManagerConnectionString(BlobTokenManager): + def __init__( + self, + connection_string: str, + sas_expiration_days: int = 1, + lease_time_ratio: int = 2, + ): + super().__init__(sas_expiration_days, lease_time_ratio) + self.connection_string = connection_string + + def _get_storage_user_key( + self, + url: str, + sas_expiration: timedelta, + key_lease_time: timedelta, + ) -> StorageUserKey: + return StorageUserKeyConnectionString( + sas_expiration, + key_lease_time, + self.connection_string, + ) + + def _get_token( + self, + blob_client: BlobClient, + ): + account_name: str = cast(str, blob_client.account_name) + container_name: str = blob_client.container_name + blob_name: str = blob_client.blob_name + + start = datetime.utcnow() + end = start + self.sas_expiration + account_key = cast(str, self._get_user_key(blob_client.url, account_name).get_access_key()) + sas_token = generate_blob_sas( + account_name=account_name, + container_name=container_name, + account_key=account_key, + blob_name=blob_name, + permission=BlobSasPermissions(read=True), + start=start, + expiry=end, + ) + return sas_token diff --git a/src/vibe_common/vibe_common/vibe_dapr_client.py b/src/vibe_common/vibe_common/vibe_dapr_client.py new file mode 100644 index 00000000..66551f83 --- /dev/null +++ b/src/vibe_common/vibe_common/vibe_dapr_client.py @@ -0,0 +1,157 @@ +import json +import logging +from functools import partial +from typing import Any, Mapping, Optional + +from aiohttp import ClientResponse, ClientSession +from aiohttp_retry import ExponentialRetry, RetryClient + +from vibe_common.constants import TRACEPARENT_HEADER_KEY +from vibe_common.dapr import handle_aiohttp_timeout, process_dapr_response +from vibe_core.data.json_converter import dump_to_json + +MAX_SESSION_ATTEMPTS = 10 +MAX_TIMEOUT_S = 30 +MAX_DIRECT_INVOKE_TRIES = 3 + +METADATA = {"partitionKey": "eywa"} + +""" +This is an implementation of a Dapr HTTP client that currently support Dapr service invocation +and state management through HTTP. +""" + + +class VibeDaprClient: + def __init__(self): + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + + def _build_client(self) -> RetryClient: + session = ClientSession() + retry_options = ExponentialRetry( + attempts=MAX_SESSION_ATTEMPTS, + max_timeout=MAX_TIMEOUT_S, + statuses={400, 500, 502, 503, 504}, + ) + retry_client = RetryClient(client_session=session, retry_options=retry_options) + return retry_client + + async def get( + self, + url: str, + traceparent: Optional[str], + params: Optional[Mapping[str, str]] = None, + ) -> ClientResponse: + async with self._build_client() as session: + try: + response = await session.get( + url, headers={"traceparent": traceparent} if traceparent else {}, params=params + ) + await handle_aiohttp_timeout(response) + return await process_dapr_response(response) + except KeyError: + raise + except Exception: + self.logger.exception(f"Failed to process request for {url}") + raise RuntimeError(f"dapr failed to process request for {url}") + + async def post( + self, + url: str, + data: Any, + traceparent: Optional[str], + params: Optional[Mapping[str, str]] = None, + ) -> ClientResponse: + if url.endswith("/"): + url = url[:-1] + + tries: int = 0 + + while True: + async with self._build_client() as session: + try: + headers = {"Content-Type": "application/json"} + if traceparent: + headers[TRACEPARENT_HEADER_KEY] = traceparent + response = await session.post( + url, + data=self._dumps(data), + headers=headers, + params=params, + ) + await handle_aiohttp_timeout(response) + return await process_dapr_response(response) + except RuntimeError as e: + if "ERR_DIRECT_INVOKE" in str(e): + tries += 1 + self.logger.warning( + f"ERR_DIRECT_INVOKE raised by Dapr, " + f"retrying ({tries}/{MAX_DIRECT_INVOKE_TRIES})" + ) + if tries >= MAX_DIRECT_INVOKE_TRIES: + self.logger.exception(f"Failed to process request for {url}") + raise + except Exception: + self.logger.exception(f"Failed to process request for {url}") + raise RuntimeError(f"dapr failed to process request for {url}") + + def obj_json(self, obj: Any, **kwargs: Any) -> Any: + """JSON representation of object `obj` encoding floats as strings. + + Unfortunately Dapr's JSON deserializer clips floating point precision + so floats are encoded as strings + + Args: + obj: the object to be converted + kwargs: optional keyword arguments passed to `_dumps` + + Returns: + Object `obj` represented as JSON + """ + return json.loads(self._dumps(obj, **kwargs), parse_float=lambda f_as_s: f_as_s) + + async def response_json(self, response: ClientResponse) -> Any: + """Loads a JSON from a `ClientResponse`. + + Because floats are encoded as strings before being sent to Dapr due to the truncation that + occurs in the Dapr sidecar when using its HTTP API, this method decodes any string that + can be parsed as a float into a Python float. + + Args: + response: The `ClientResponse` object with our data + + Returns: + The JSON of our response, with floats correctly decoded as floats + """ + return await response.json(loads=partial(json.loads, object_hook=_decode)) + + def _dumps(self, obj: Any, **kwargs: Any) -> str: + return dump_to_json(obj, **kwargs) + + +def _decode(obj: Any) -> Any: + """Returns the given decoded JSON object with all string values that can be parsed as floats as + Python floats. + + This function covers all possible valid JSON objects as valid JSON values are strings, objects + (Python dict), arrays (Python list), numbers (Python int/float), or the literals true (Python + True), false (Python False), or null (Python None)): + https://www.rfc-editor.org/rfc/rfc8259#section-3 + + Args: + obj: A decoded JSON object + + Returns: + The same decoded JSON object with all string values that can be parsed as floats as floats + """ + if isinstance(obj, str): + try: + return float(obj) + except ValueError: + return obj + elif isinstance(obj, dict): + return {k: _decode(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [_decode(v) for v in obj] + else: + return obj diff --git a/src/vibe_common/vibe_common/workflow/__init__.py b/src/vibe_common/vibe_common/workflow/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/vibe_core/pyproject.toml b/src/vibe_core/pyproject.toml index cdad7708..9b588886 100644 --- a/src/vibe_core/pyproject.toml +++ b/src/vibe_core/pyproject.toml @@ -18,9 +18,9 @@ vibe_core = ["terraform/*.tf"] [project] name = "vibe-core" -version ="2024.05.27" +version = "0.0.1" authors = [ - { name="Microsoft FarmVibes.AI Team", email="eywa-devs@microsoft.com" }, + { name="Microsoft FarmVibes.AI Team", email="terravibes@microsoft.com" }, ] description = "FarmVibes.AI Geospatial Platform Package - vibe core package." license = {text = "MIT"} @@ -44,7 +44,7 @@ dependencies = [ "pydantic~=1.10.0", "strenum~=0.4.7", "shapely>=1.7.1", - "requests~=2.31.0", + "requests~=2.32.0", "pystac~=1.6.0", "hydra-zen~=0.10", "rich~=13.7.1", @@ -57,3 +57,8 @@ dependencies = [ [project.scripts] farmvibes-ai = "vibe_core.cli.main:main" + +[project.optional-dependencies] +test = [ + "orjson~=3.9.15", +] \ No newline at end of file diff --git a/src/vibe_core/tests/test_stac_converter.py b/src/vibe_core/tests/test_stac_converter.py new file mode 100644 index 00000000..23224363 --- /dev/null +++ b/src/vibe_core/tests/test_stac_converter.py @@ -0,0 +1,127 @@ +# pyright: reportUnknownMemberType=false + +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Tuple + +import pytest +from shapely import geometry as shpg +from shapely.geometry.base import BaseGeometry + +from vibe_core.data import AssetVibe, DataVibe, Raster, StacConverter + + +@dataclass +class DateVibe(DataVibe): + date_field: datetime + int_field: int + str_field: str + other_field: Any + other_list: List[str] + date_list: List[datetime] + date_dict: Dict[str, datetime] + date_tuple: Tuple[datetime, datetime, datetime] + mixed_tuple: Tuple[int, datetime] + var_tuple: Tuple[datetime, ...] + nested_list: List[List[datetime]] + dict_list: Dict[str, List[datetime]] + super_nest: Dict[Any, List[Dict[Any, Dict[Any, Tuple[datetime, ...]]]]] + super_nest_no: Dict[Any, List[Dict[Any, Dict[Any, Tuple[Any, ...]]]]] + + +@dataclass +class ShapeVibe(DataVibe): + shape: BaseGeometry + shape_dict: Dict[str, BaseGeometry] + + +@pytest.fixture +def converter() -> StacConverter: + return StacConverter() + + +def test_conversion_roundtrip(converter: StacConverter, tmp_path: Path): + asset_path = tmp_path.as_posix() + now = datetime.now() + geom: Dict[str, Any] = shpg.mapping(shpg.box(-1, -1, 1, 1)) + terravibes_data = DataVibe(id="assetless", time_range=(now, now), geometry=geom, assets=[]) + # Assetless DataVibe conversion + assert converter.from_stac_item(converter.to_stac_item(terravibes_data)) == terravibes_data + mimefull = AssetVibe(reference=asset_path, type="image/tiff", id="mimefull") + terravibes_data.assets.append(mimefull) + # Conversion with asset that has mimetype + assert converter.from_stac_item(converter.to_stac_item(terravibes_data)) == terravibes_data + mimeless = AssetVibe(reference=asset_path, type=None, id="mimeless") + # Conversion with asset that has no mimetype + terravibes_data.assets.append(mimeless) + assert converter.from_stac_item(converter.to_stac_item(terravibes_data)) == terravibes_data + + +def test_conversion_raster(converter: StacConverter, tmp_path: Path): + asset_path = tmp_path.as_posix() + now = datetime.now() + geom: Dict[str, Any] = shpg.mapping(shpg.box(-1, -1, 1, 1)) + tiff_asset = AssetVibe(reference=asset_path, type="image/tiff", id="tiff_asset") + json_asset = AssetVibe(reference=asset_path, type="application/json", id="json_asset") + raster = Raster( + id="extra_info_test", + time_range=(now, now), + geometry=geom, + assets=[tiff_asset, json_asset], + bands={"B1": 0, "B2": 1, "B3": 2}, + ) + converted = converter.from_stac_item(converter.to_stac_item(raster)) + assert isinstance(converted, Raster) + assert converted == raster + assert raster.raster_asset == converted.raster_asset + assert raster.visualization_asset == converted.visualization_asset + + +def test_datetime_field_serialization(converter: StacConverter): + now = datetime.now() + geom: Dict[str, Any] = shpg.mapping(shpg.box(-1, -1, 1, 1)) + test_vibe = DateVibe( + "assetless", + (now, now), + geom, + [], + now, + 1, + "1", + None, + ["1", "2"], + [datetime.now() for _ in range(2)], + {f"{i}": datetime.now() for i in range(3)}, + (datetime.now(), datetime.now(), datetime.now()), + (1, datetime.now()), + tuple(datetime.now() for _ in range(4)), + [[datetime.now()]], + {"1": [datetime.now() for _ in range(2)], "2": [datetime.now() for _ in range(3)]}, + {0: [{0: {0: (datetime.now(),)}}]}, + {0: [{0: {0: ("NO",)}}]}, + ) + forward = converter.to_stac_item(test_vibe) + assert forward.properties["date_field"] == now.isoformat() + round_trip = converter.from_stac_item(forward) + assert test_vibe == round_trip + + +def test_geom_field_serialization(converter: StacConverter): + now = datetime.now() + geom: Dict[str, Any] = shpg.mapping(shpg.box(-1, -1, 1, 1)) + test_vibe = ShapeVibe( + "assetless", + (now, now), + geom, + [], + shpg.box(0, 0, 2, 2), + {f"{i}": shpg.box(0, 0, i, i) for i in range(1, 5)}, + ) + forward = converter.to_stac_item(test_vibe) + assert forward.properties["shape"] == { + "type": "Polygon", + "coordinates": (((2.0, 0.0), (2.0, 2.0), (0.0, 2.0), (0.0, 0.0), (2.0, 0.0)),), + } + round_trip = converter.from_stac_item(forward) + assert test_vibe == round_trip diff --git a/src/vibe_core/tests/test_type_serialization.py b/src/vibe_core/tests/test_type_serialization.py new file mode 100644 index 00000000..c37290fd --- /dev/null +++ b/src/vibe_core/tests/test_type_serialization.py @@ -0,0 +1,98 @@ +import inspect +import typing +from datetime import datetime +from unittest.mock import MagicMock, patch + +import orjson +import pytest + +import vibe_core.data +from vibe_core.data.utils import StacConverter, deserialize_stac, serialize_stac + +BASIC_MOCK_VALUES = { + int: 42, + float: 42.0, + str: "mock_str", + bool: True, + datetime: datetime.now(), +} + +DATAVIBES_MOCK_FIELDS = { + "id": "mock_id", + "time_range": (datetime.now(), datetime.now()), + "geometry": {"type": "Point", "coordinates": [0, 0]}, + "assets": [], +} + +FARMVIBES_DATA_CLASSES = [ + getattr(vibe_core.data, name) + for name in dir(vibe_core.data) + if inspect.isclass(getattr(vibe_core.data, name)) + and issubclass(getattr(vibe_core.data, name), vibe_core.data.DataVibe) +] + + +def is_optional(t: type) -> bool: + return typing.get_origin(t) is typing.Union and type(None) in typing.get_args(t) # type: ignore + + +def create_mock_instance(cls: type) -> typing.Any: + if cls in BASIC_MOCK_VALUES: + return BASIC_MOCK_VALUES[cls] # type: ignore + + args = {} + params = { + **inspect.signature(cls.__init__).parameters, + **inspect.signature(cls.__new__).parameters, + } + for name, param in params.items(): + if name in ["self", "args", "kwargs", "_cls"]: + continue + elif name in DATAVIBES_MOCK_FIELDS: + args[name] = DATAVIBES_MOCK_FIELDS[name] + else: + args[name] = create_mock_value(param.annotation) + return cls(**args) + + +def create_mock_value(tp: type) -> typing.Any: + # Handle basic types with random or default values + if tp in BASIC_MOCK_VALUES: + return BASIC_MOCK_VALUES[tp] # type: ignore + elif tp is list or getattr(tp, "__origin__", None) is list: + return [] + elif tp is tuple or getattr(tp, "__origin__", None) is tuple: + # Create an empty tuple or a tuple with mock values if types are specified + return tuple(create_mock_value(arg) for arg in getattr(tp, "__args__", [])) + elif tp is dict or getattr(tp, "__origin__", None) is dict: + return {} + elif tp is typing.Any: + return None + elif is_optional(tp): + # check which type is optional and create a mock value for it + return create_mock_value(tp.__args__[0]) # type: ignore + elif inspect.isclass(tp): + # Recursively create instances for complex types + return create_mock_instance(tp) + + raise NotImplementedError(f"Mocking not implemented for type: {tp}") + + +@patch.object(vibe_core.data.HansenProduct, "validate_url", return_value=True) +@pytest.mark.parametrize("cls", FARMVIBES_DATA_CLASSES) +def test_serialization_deserialization( + _: MagicMock, + cls: type, +): + converter = StacConverter() + + mock_instance = create_mock_instance(cls) + stac_item = converter.to_stac_item(mock_instance) + + json_instance = orjson.loads(orjson.dumps(serialize_stac(stac_item))) + deserialized_stac_item = deserialize_stac(json_instance) + deserialized = converter.from_stac_item(deserialized_stac_item) + assert mock_instance == deserialized + + deserialized = converter.from_stac_item(stac_item) + assert mock_instance == deserialized diff --git a/src/vibe_core/vibe_core/cli/constants.py b/src/vibe_core/vibe_core/cli/constants.py index 39e49f34..be6bdb8c 100644 --- a/src/vibe_core/vibe_core/cli/constants.py +++ b/src/vibe_core/vibe_core/cli/constants.py @@ -1,5 +1,5 @@ DEFAULT_IMAGE_PREFIX = "farmai/terravibes/" -DEFAULT_IMAGE_TAG = "2024.05.27" +DEFAULT_IMAGE_TAG = "dev" DEFAULT_REGISTRY_PATH = "mcr.microsoft.com" LOCAL_SERVICE_URL_PATH_FILE = "service_url" diff --git a/src/vibe_core/vibe_core/cli/local.py b/src/vibe_core/vibe_core/cli/local.py index 1f607653..85fb0c30 100644 --- a/src/vibe_core/vibe_core/cli/local.py +++ b/src/vibe_core/vibe_core/cli/local.py @@ -13,7 +13,7 @@ LOCAL_SERVICE_URL_PATH_FILE, ONNX_SUBDIR, ) -from vibe_core.cli.helper import log_should_be_logged_in, verify_to_proceed +from vibe_core.cli.helper import verify_to_proceed from vibe_core.cli.logging import log from vibe_core.cli.osartifacts import InstallType, OSArtifacts from vibe_core.cli.wrappers import ( @@ -292,25 +292,28 @@ def setup( k3d.os_artifacts.check_dependencies(InstallType.ALL) az = AzureCliWrapper(k3d.os_artifacts, "") log( - f"Username and password not provided for {registry}, inferring from Azure CLI", + f"Username and password not provided for {registry}, requesting from Azure CLI", level="warning", ) + password = az.request_registry_token(registry) - try: - az.get_subscription_info() # Needed for confirming subscription - except Exception as e: - log_should_be_logged_in(e) - return False - - username, password = az.infer_registry_credentials(registry) - - if username and password: + if password: log(f"Creating Docker credentials for registry {registry}") try: kubectl.delete_secret("acrtoken") except Exception: pass + if not username: + username = "00000000-0000-0000-0000-000000000000" kubectl.create_docker_token("acrtoken", registry, username, password) + else: + if registry.endswith(AZURE_CR_DOMAIN): + log( + "No registry username and password were provided, and I was unable to " + "get an ACR token. Aborting installation.", + level="error", + ) + return False if not worker_replicas: log( diff --git a/src/vibe_core/vibe_core/cli/remote.py b/src/vibe_core/vibe_core/cli/remote.py index 8318ce5a..4b6644d5 100644 --- a/src/vibe_core/vibe_core/cli/remote.py +++ b/src/vibe_core/vibe_core/cli/remote.py @@ -191,11 +191,10 @@ def setup_or_upgrade( if registry_path and registry_path.endswith(AZURE_CR_DOMAIN): if not registry_username or not registry_password: + try: - ( - registry_username, - registry_password, - ) = az.infer_registry_credentials(registry_path) + registry_username = "00000000-0000-0000-0000-000000000000" + registry_password = az.request_registry_token(registry_path) except Exception: log( f"Couldn't infer registry credentials for {registry_path}. " diff --git a/src/vibe_core/vibe_core/cli/wrappers.py b/src/vibe_core/vibe_core/cli/wrappers.py index 89f21a2b..6c82722a 100644 --- a/src/vibe_core/vibe_core/cli/wrappers.py +++ b/src/vibe_core/vibe_core/cli/wrappers.py @@ -951,35 +951,27 @@ def verify_enough_cores_available( if required > available: raise ValueError(f"{cpu_type} has {available} CPUs. We need {required}.") - def infer_registry_credentials(self, registry: str) -> Tuple[str, str]: - log(f"Inferring credentials for {registry}") + def request_registry_token(self, registry: str) -> str: + """Requests an access token for a given registry using the az CLI. + + Args: + registry: the name of the registry under Azure we want to connect to. + """ + log(f"Getting token credentials for {registry}") registry = registry.replace(".azurecr.io", "") # FIXME: This only works for Azure Public self.refresh_az_creds() - username_command = [ + token_command = [ self.os_artifacts.az, "acr", - "credential", - "show", + "login", "-n", registry, - "--query", - "username", + "--expose-token", ] - password_command = [ - self.os_artifacts.az, - "acr", - "credential", - "show", - "-n", - registry, - "--query", - "passwords[0].value", - ] - error = f"Unable to infer credentials for {registry}" - username = json.loads(execute_cmd(username_command, True, True, error, censor_output=True)) - password = json.loads(execute_cmd(password_command, True, True, error, censor_output=True)) - return username, password + error = f"Unable to get credentials for {registry}" + output = json.loads(execute_cmd(token_command, True, True, error, censor_output=True)) + return output["accessToken"] if "accessToken" in output else "" def get_storage_account_list(self): cmd = [ @@ -1389,16 +1381,24 @@ def get_secret(self, name: str, key: str, cluster_name: str = ""): ) return json.loads(result) - def create_docker_token(self, token: str, registry: str, username: str, password: str): + def create_docker_token(self, token_name: str, registry: str, username: str, token: str): + """Add a secret to the kubernetes cluster. + + Args: + token_name: The name of the token to be added to the cluster + registry: The (Azure Container) registry this token is for + username: The user name to use to connect to the registry + token: The token to use. + """ cmd = [ self.os_artifacts.kubectl, "create", "secret", "docker-registry", - token, + token_name, f"--docker-server={registry}", f"--docker-username={username}", - f"--docker-password={password}", + f"--docker-password={token}", f"--docker-email={username}", ] execute_cmd( diff --git a/src/vibe_core/vibe_core/client.py b/src/vibe_core/vibe_core/client.py index ec33bd3b..56ac95b1 100644 --- a/src/vibe_core/vibe_core/client.py +++ b/src/vibe_core/vibe_core/client.py @@ -829,7 +829,7 @@ def _block_until_status( status_options = " or ".join(block_until_statuses) raise RuntimeError( f"Timeout of {timeout_s}s reached while waiting for the workflow to have a " - f"status of {status_options}." + f"status of {status_options}. Workflow is currently in status {self.status}." ) return self diff --git a/src/vibe_core/vibe_core/data/utils.py b/src/vibe_core/vibe_core/data/utils.py index b6453e5f..a4423853 100644 --- a/src/vibe_core/vibe_core/data/utils.py +++ b/src/vibe_core/vibe_core/data/utils.py @@ -145,18 +145,33 @@ def convert_field( The converted field value. """ t_origin = get_origin(field_type) - t_args = get_args(field_type) - if t_origin is list and len(t_args) == 1: - return [self.convert_field(f, t_args[0], converter) for f in field_value] - if t_origin is dict and t_args: - return {k: self.convert_field(v, t_args[1], converter) for k, v in field_value.items()} - if t_origin is tuple and t_args: - if len(t_args) == 2 and t_args[1] == ...: - return tuple(self.convert_field(f, t_args[0], converter) for f in field_value) - return tuple( - self.convert_field(f, ta, converter) if ta is datetime else f - for f, ta in zip(field_value, t_args) - ) + if t_origin: + t_args = get_args(field_type) + if t_origin is list and len(t_args) == 1: + return [self.convert_field(f, t_args[0], converter) for f in field_value] + if t_origin is dict and t_args: + return { + k: self.convert_field(v, t_args[1], converter) for k, v in field_value.items() + } + if t_origin is tuple and t_args: + if len(t_args) == 2 and t_args[1] == ...: + return tuple(self.convert_field(f, t_args[0], converter) for f in field_value) + return tuple( + self.convert_field(f, ta, converter) if ta is datetime else f + for f, ta in zip(field_value, t_args) + ) + else: + for t in field_type.mro(): + if t in self.field_converters: + return converter(field_value, t) + elif t is list: + return [self.convert_field(f, type(f), converter) for f in field_value] + elif t is dict: + return { + k: self.convert_field(v, type(v), converter) for k, v in field_value.items() + } + elif t is tuple: + return tuple(self.convert_field(f, type(f), converter) for f in field_value) return converter(field_value, field_type) def serialize_fields( @@ -491,7 +506,7 @@ def get_base_type(vibetype: DataVibeType) -> Type[BaseVibe]: if not (is_container_type(vibetype) or isinstance(vibetype, type)): raise ValueError(f"Argument {vibetype} is not a type") if isinstance(vibetype, type): - return cast(Type[T], vibetype) + return cast(Type[T], vibetype) # type: ignore levels = 1 tmp = get_args(vibetype) while tmp is not None and is_container_type(tmp[0]): diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/storage.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/storage.tf index d11b0fea..878098b8 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/storage.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/storage.tf @@ -18,7 +18,7 @@ resource "azurerm_storage_account" "storageaccount" { network_rules, ] } - + } resource "azurerm_storage_container" "userfiles" { diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf new file mode 100644 index 00000000..b6d2953c --- /dev/null +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf @@ -0,0 +1,194 @@ +resource "kubernetes_config_map" "otel" { + count = var.enable_telemetry ? 1 : 0 + metadata { + name = "otel-collector-config" + labels = { + app = "opentelemetry" + component = "otel-collector-conf" + } + } + + data = { + "otel-collector-config.yaml" = < WorkflowCallback: + async def callback(change: WorkflowChange, **kwargs: Any): + if change == WorkflowChange.WORKFLOW_STARTED: + self._status = RunStatus.running + elif change == WorkflowChange.WORKFLOW_FINISHED: + self._status = RunStatus.done + elif change == WorkflowChange.WORKFLOW_FAILED: + self._status = RunStatus.failed + self._reason = kwargs["reason"] + + return callback + + @property + def status(self) -> str: + if self._status == RunStatus.failed: + return f"{self._status}: {self._reason}" + return self._status + + @property + def output(self) -> BaseVibeDict: + return self._output + + +class SubprocessClient(Client): + """ + LocalWorkflowRunner wrapper that runs the workflow and retrieves results as DataVibe. + """ + + def __init__( + self, + factory_spec: OperationFactoryConfig, + raise_exception: bool, + ): + self.factory_spec = factory_spec + self.converter = StacConverter() + self.storage: Storage = instantiate(factory_spec.storage) + self.raise_exception = raise_exception + + def _deserialize_to_datavibe(self, workflow_output: OpIOType) -> BaseVibeDict: + stac_items = {k: deserialize_stac(v) for k, v in workflow_output.items()} + retrieved = self.storage.retrieve(stac_items) + vibe_data = {k: self.converter.from_stac_item(v) for k, v in retrieved.items()} + return vibe_data + + async def run( + self, workflow_name: str, geometry: BaseGeometry, time_range: Tuple[datetime, datetime] + ) -> WorkflowRun: + output = SubprocessWorkflowRun() + callback = output._workflow_callback() + if workflow_name in self.list_workflows(): + # Load workflow by it's name + workflow = load_workflow_by_name(workflow_name) + else: + # Assume it's the path to a YAML file + workflow = Workflow.build(workflow_name) + + runner = LocalWorkflowRunner.build( + workflow, + io_mapper=WorkflowIOHandler(workflow), + factory_spec=self.factory_spec, + update_state_callback=callback, + ) + + stac_item_dict = gen_stac_item_from_bounds( + shpg.mapping(geometry), # type: ignore + *time_range, + ) + input_spec = cast(OpIOType, {k: stac_item_dict for k in runner.workflow.inputs_spec}) + try: + runner_output = await runner.run(input_spec) + output._output = self._deserialize_to_datavibe(runner_output) + await callback(WorkflowChange.WORKFLOW_FINISHED) + except Exception as e: + await callback(WorkflowChange.WORKFLOW_FAILED, reason=str(e)) + if self.raise_exception: + raise + return output + + def list_workflows(self) -> List[str]: + return list_workflows() + + +def get_default_subprocess_client(cache_dir: str) -> SubprocessClient: + tmp_asset_path = os.path.join(cache_dir, "assets") + storage_config = LocalStorageConfig( + local_path=cache_dir, asset_manager=LocalFileAssetManagerConfig(tmp_asset_path) + ) + factory_spec = OperationFactoryConfig(storage_config, AzureSecretProviderConfig()) + return SubprocessClient(factory_spec, False) diff --git a/src/vibe_dev/vibe_dev/local_runner.py b/src/vibe_dev/vibe_dev/local_runner.py new file mode 100644 index 00000000..5951219d --- /dev/null +++ b/src/vibe_dev/vibe_dev/local_runner.py @@ -0,0 +1,75 @@ +from typing import cast +from uuid import UUID + +from vibe_agent.ops import OperationDependencyResolver, OperationFactoryConfig, OpIOType +from vibe_agent.ops_helper import OpIOConverter +from vibe_agent.worker import Worker +from vibe_common.messaging import ( + CacheInfoExecuteRequestContent, + ExecuteRequestMessage, + WorkMessageBuilder, +) +from vibe_common.schemas import CacheInfo +from vibe_server.workflow.runner.runner import ( + NoOpStateChange, + WorkflowCallback, + WorkflowChange, + WorkflowRunner, +) +from vibe_server.workflow.runner.task_io_handler import WorkflowIOHandler +from vibe_server.workflow.workflow import GraphNodeType, Workflow + +MAX_OP_EXECUTION_TIME_S = 60 * 60 * 3 + + +class LocalWorkflowRunner(WorkflowRunner): + timeout_s: float = 1 # in seconds + + def __init__( + self, + workflow: Workflow, + io_mapper: WorkflowIOHandler, + factory_spec: OperationFactoryConfig, + update_state_callback: WorkflowCallback = NoOpStateChange, + max_tries: int = 1, + ): + super().__init__(workflow, io_mapper, update_state_callback) + self.runner = Worker( + termination_grace_period_s=int(self.timeout_s), + control_topic="", + max_tries=max_tries, + factory_spec=factory_spec, + ) + + self.dependency_resolver = OperationDependencyResolver() + + async def _run_op_impl( + self, op: GraphNodeType, input: OpIOType, run_id: UUID, subtask_idx: int + ) -> OpIOType: + try: + message = WorkMessageBuilder.build_execute_request(run_id, "", op.spec, input) + self.runner.current_message = message + stac = OpIOConverter.deserialize_input(input) + dependencies = self.dependency_resolver.resolve(op.spec) + message = WorkMessageBuilder.add_cache_info_to_execute_request( + cast(ExecuteRequestMessage, message), + CacheInfo(op.spec.name, op.spec.version, stac, dependencies), + ) + content = message.content + assert isinstance(content, CacheInfoExecuteRequestContent) + await self._report_state_change( + WorkflowChange.SUBTASK_RUNNING, task=op.name, subtask_idx=subtask_idx + ) + out = self.runner.run_op_with_retry(content, run_id, MAX_OP_EXECUTION_TIME_S) + await self._report_state_change( + WorkflowChange.SUBTASK_FINISHED, task=op.name, subtask_idx=subtask_idx + ) + return out + except Exception as e: + self.logger.exception(f"Failed to run operation {op.name}") + await self._report_state_change( + WorkflowChange.SUBTASK_FAILED, task=op.name, subtask_idx=subtask_idx, reason=str(e) + ) + raise + finally: + self.runner.current_message = None diff --git a/src/vibe_dev/vibe_dev/mock_utils.py b/src/vibe_dev/vibe_dev/mock_utils.py new file mode 100644 index 00000000..436dc6ca --- /dev/null +++ b/src/vibe_dev/vibe_dev/mock_utils.py @@ -0,0 +1,18 @@ +from typing import Dict + +from pydantic import BaseModel + + +class Request(BaseModel): + """Mock Request class for testing purposes.""" + + text: str + """Represents the response of the request.""" + + def raise_for_status(self) -> Dict[str, int]: + """Mock raise_for_status method. + + return: A dictionary with a success code. + """ + + return {"success": 200} diff --git a/src/vibe_dev/vibe_dev/testing/__init__.py b/src/vibe_dev/vibe_dev/testing/__init__.py new file mode 100644 index 00000000..af7e4799 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/__init__.py @@ -0,0 +1,6 @@ +import pytest + + +@pytest.fixture +def anyio_backend(): + return "asyncio" diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_base.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_base.yaml new file mode 100644 index 00000000..d3f48d83 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_base.yaml @@ -0,0 +1,9 @@ +name: base_base +inputs: + user_data: SimpleStrDataType +output: + processed_data: SimpleStrDataType +parameters: +entrypoint: + file: base_op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_op.py new file mode 100644 index 00000000..e8eae5d4 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_op.py @@ -0,0 +1,14 @@ +from dataclasses import asdict +from typing import Any, List, Union + +from vibe_core.data.core_types import BaseVibe + + +def callback(user_data: Union[BaseVibe, List[BaseVibe]]): + if isinstance(user_data, list): + return {"processed_data": [d.__class__(**asdict(d)) for d in user_data]} + return {"processed_data": user_data.__class__(**asdict(user_data))} + + +def callback_builder(**kw: Any): # type: ignore + return callback diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_inheritance.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_inheritance.yaml new file mode 100644 index 00000000..3bf3a395 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_inheritance.yaml @@ -0,0 +1,9 @@ +name: item_inheritance +inputs: + user_data: DataVibe +output: + processed_data: "@INHERIT(user_data)" +parameters: +entrypoint: + file: op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_item.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_item.yaml new file mode 100644 index 00000000..e861cccb --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_item.yaml @@ -0,0 +1,9 @@ +name: item_item +inputs: + user_data: DataVibe +output: + processed_data: DataVibe +parameters: +entrypoint: + file: vibe_op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_list.yaml new file mode 100644 index 00000000..80642036 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/item_list.yaml @@ -0,0 +1,10 @@ +name: item_list +inputs: + user_data: DataVibe +output: + processed_data: List[DataVibe] +parameters: + num_items: 1 +entrypoint: + file: to_list_op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_and_item_inputs.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_and_item_inputs.yaml new file mode 100644 index 00000000..f7463cea --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_and_item_inputs.yaml @@ -0,0 +1,10 @@ +name: list_and_item_inputs +inputs: + list_data: List[DataVibe] + item_data: DataVibe +output: + processed_data: DataVibe +parameters: +entrypoint: + file: op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_inheritance.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_inheritance.yaml new file mode 100644 index 00000000..636515e9 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_inheritance.yaml @@ -0,0 +1,9 @@ +name: list_inheritance +inputs: + user_data: List[DataVibe] +output: + processed_data: "@INHERIT(user_data)" +parameters: +entrypoint: + file: op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_item.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_item.yaml new file mode 100644 index 00000000..3f51dfa0 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_item.yaml @@ -0,0 +1,9 @@ +name: list_item +inputs: + user_data: List[DataVibe] +output: + processed_data: DataVibe +parameters: +entrypoint: + file: to_item_op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_list.yaml new file mode 100644 index 00000000..c07efbf7 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/list_list.yaml @@ -0,0 +1,9 @@ +name: list_list +inputs: + user_data: List[DataVibe] +output: + processed_data: List[DataVibe] +parameters: +entrypoint: + file: vibe_op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/missing_inheritance.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/missing_inheritance.yaml new file mode 100644 index 00000000..0bad0196 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/missing_inheritance.yaml @@ -0,0 +1,9 @@ +name: missing_inheritance +inputs: + user_data: DataVibe +output: + processed_data: "@INHERIT(whatever)" +parameters: +entrypoint: + file: op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/nested_parameters.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/nested_parameters.yaml new file mode 100644 index 00000000..3876b511 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/nested_parameters.yaml @@ -0,0 +1,19 @@ +name: nested_parameters +inputs: + user_data: DataVibe +output: + processed_data: DataVibe +parameters: + overwrite: kept + nested: + overwrite: kept nested + keep: kept nested +entrypoint: + file: vibe_op.py + callback_builder: callback_builder +description: + parameters: + overwrite: param named overwrite + nested: + overwrite: nested overwrite + keep: nested keep diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/op.py new file mode 100644 index 00000000..cb20bb81 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/op.py @@ -0,0 +1,31 @@ +from datetime import datetime +from typing import Any + +from vibe_core.data import DataVibe + + +def print_args(user_data: Any): + try: + now = datetime.now() + user_data.data = "Processed " + user_data.data + print(user_data.data) + return { + "processed_data": [ + DataVibe( + user_data.data, + (now, now), + { + "type": "Point", + "coordinates": [0.0, 0.0], + "properties": {"name": user_data.data}, + }, + [], + ) + ] + } + except Exception: + return {"processed_data": user_data} + + +def callback_builder(**kw: Any): + return print_args diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster.yaml new file mode 100644 index 00000000..6b76cb39 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster.yaml @@ -0,0 +1,9 @@ +name: raster +inputs: + user_data: Raster +output: + processed_data: Raster +parameters: +entrypoint: + file: op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster_list.yaml new file mode 100644 index 00000000..a3ba7dff --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/raster_list.yaml @@ -0,0 +1,9 @@ +name: raster_list +inputs: + user_data: List[Raster] +output: + processed_data: List[Raster] +parameters: +entrypoint: + file: op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/simple_parameter.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/simple_parameter.yaml new file mode 100644 index 00000000..01b743ec --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/simple_parameter.yaml @@ -0,0 +1,11 @@ +name: simple_parameter +inputs: + user_data: DataVibe +output: + processed_data: DataVibe +parameters: + overwrite: kept + keep: kept +entrypoint: + file: vibe_op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/str_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/str_list.yaml new file mode 100644 index 00000000..4b8fbb2d --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/str_list.yaml @@ -0,0 +1,13 @@ +name: str_list +inputs: + user_data: FakeType +output: + processed_data: List[DataVibe] +parameters: + fake_param: 1 + fake_another_param: + fake_nested: 2 + fake_nested_too: 3 +entrypoint: + file: op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/timeseries.yaml b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/timeseries.yaml new file mode 100644 index 00000000..49c779de --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/timeseries.yaml @@ -0,0 +1,9 @@ +name: timeseries +inputs: + user_data: TimeSeries +output: + processed_data: TimeSeries +parameters: +entrypoint: + file: op.py + callback_builder: callback_builder \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_item_op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_item_op.py new file mode 100644 index 00000000..1825a6be --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_item_op.py @@ -0,0 +1,11 @@ +from typing import Any, List + +from vibe_core.data import DataVibe + + +def callback(user_data: List[DataVibe]): + return {"processed_data": DataVibe.clone_from(user_data[0], id=user_data[0].id, assets=[])} + + +def callback_builder(**kw: Any): + return callback diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_list_op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_list_op.py new file mode 100644 index 00000000..f9a30042 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_list_op.py @@ -0,0 +1,17 @@ +from typing import Any + +from vibe_core.data import DataVibe + + +def callback_builder(**kw: Any): + num_items = kw.get("num_items", 1) + + def callback(user_data: DataVibe): + return { + "processed_data": [ + DataVibe.clone_from(user_data, id=f"{user_data.id}_{i}", assets=[]) + for i in range(num_items) + ] + } + + return callback diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/vibe_op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/vibe_op.py new file mode 100644 index 00000000..326ccefc --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/vibe_op.py @@ -0,0 +1,13 @@ +from typing import Any, List, Union + +from vibe_core.data import DataVibe + + +def callback(user_data: Union[DataVibe, List[DataVibe]]): + if isinstance(user_data, list): + return {"processed_data": [DataVibe.clone_from(d, id=d.id, assets=[]) for d in user_data]} + return {"processed_data": DataVibe.clone_from(user_data, id=user_data.id, assets=[])} + + +def callback_builder(**kw: Any): + return callback diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/bad_sink.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/bad_sink.yaml new file mode 100644 index 00000000..b011e2a5 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/bad_sink.yaml @@ -0,0 +1,11 @@ +name: bad_sink +tasks: + task: + op: item_item + op_dir: fake +edges: +sources: + input: + - task.user_data +sinks: + output: task.bad_sink \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/bad_source.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/bad_source.yaml new file mode 100644 index 00000000..fe52a855 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/bad_source.yaml @@ -0,0 +1,11 @@ +name: bad_source +tasks: + task: + op: item_item + op_dir: fake +edges: +sources: + input: + - task.bad_source +sinks: + output: task.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/base_base.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/base_base.yaml new file mode 100644 index 00000000..e6e0101f --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/base_base.yaml @@ -0,0 +1,16 @@ +name: base_base +tasks: + task1: + op: base_base + op_dir: fake + task2: + op: base_base + op_dir: fake +edges: + - origin: task1.processed_data + destination: [task2.user_data] +sources: + input: + - task1.user_data +sinks: + output: task2.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/custom_indices_structure.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/custom_indices_structure.yaml new file mode 100644 index 00000000..5a436f2a --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/custom_indices_structure.yaml @@ -0,0 +1,32 @@ +name: fake_custom_indices_structure +sources: + user_input: + - s2.input +sinks: + s2: s2.nested2 + ndvi: ndvi.output + evi: evi.output + ndmi: ndmi.output + ndre: ndre.output + reci: reci.output +tasks: + s2: + workflow: workflow_inception + ndvi: + workflow: list_list + evi: + workflow: list_list + ndmi: + workflow: list_list + ndre: + workflow: list_list + reci: + workflow: list_list +edges: + - origin: s2.nested2 + destination: + - ndvi.input + - evi.input + - ndmi.input + - ndre.input + - reci.input diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/fan_out_and_in.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/fan_out_and_in.yaml new file mode 100644 index 00000000..e121b0e2 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/fan_out_and_in.yaml @@ -0,0 +1,31 @@ +name: fan_out_and_in +tasks: + to_list: + op: item_list + op_dir: fake + scatter: + op: item_list + op_dir: fake + parallel: + op: list_item + op_dir: fake + gather: + op: list_list + op_dir: fake +edges: + - origin: to_list.processed_data + destination: + - scatter.user_data + - origin: scatter.processed_data + destination: + - parallel.user_data + - origin: parallel.processed_data + destination: + - gather.user_data +sources: + input: + - to_list.user_data +sinks: + scatter: scatter.processed_data + parallel: parallel.processed_data + gather: gather.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel.yaml new file mode 100644 index 00000000..c21dde82 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel.yaml @@ -0,0 +1,36 @@ +name: gather_and_parallel +tasks: + list: + op: list_list + op_dir: fake + scatter: + op: item_item + op_dir: fake + parallel: + op: item_item + op_dir: fake + two_types: + op: list_and_item_inputs + op_dir: fake + still_parallel: + op: item_item + op_dir: fake +edges: + - origin: list.processed_data + destination: + - scatter.user_data + - origin: scatter.processed_data + destination: + - parallel.user_data + - two_types.list_data # gather edge + - origin: parallel.processed_data + destination: + - two_types.item_data # parallel edge + - origin: two_types.processed_data + destination: + - still_parallel.user_data +sources: + input: + - list.user_data +sinks: + still_parallel: still_parallel.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel_input_gather_output.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel_input_gather_output.yaml new file mode 100644 index 00000000..19129e57 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/gather_and_parallel_input_gather_output.yaml @@ -0,0 +1,36 @@ +name: gather_and_parallel_input_gather_output +tasks: + list: + op: list_list + op_dir: fake + scatter: + op: item_item + op_dir: fake + parallel: + op: item_item + op_dir: fake + two_types: + op: list_and_item_inputs + op_dir: fake + gather: + op: list_list + op_dir: fake +edges: + - origin: list.processed_data + destination: + - scatter.user_data + - origin: scatter.processed_data + destination: + - parallel.user_data + - two_types.list_data # gather edge + - origin: parallel.processed_data + destination: + - two_types.item_data # parallel edge + - origin: two_types.processed_data + destination: + - gather.user_data +sources: + inputs: + - list.user_data +sinks: + gather: gather.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/incompatible_source.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/incompatible_source.yaml new file mode 100644 index 00000000..0661b4cd --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/incompatible_source.yaml @@ -0,0 +1,15 @@ +name: incompatible_source +tasks: + raster: + op: raster + op_dir: fake + timeseries: + op: timeseries + op_dir: fake +sources: + input: + - raster.user_data + - timeseries.user_data +sinks: + raster: raster.processed_data + datavibe: timeseries.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance.yaml new file mode 100644 index 00000000..3cceb1df --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance.yaml @@ -0,0 +1,28 @@ +name: inheritance +tasks: + list: + op: item_list + op_dir: fake + inherit_list: + op: list_inheritance + op_dir: fake + item: + op: list_item + op_dir: fake + inherit_item: + op: item_inheritance + op_dir: fake +edges: + - origin: list.processed_data + destination: + - inherit_list.user_data + - origin: item.processed_data + destination: + - inherit_item.user_data +sources: + input: + - list.user_data + - item.user_data +sinks: + list: inherit_list.processed_data + item: inherit_item.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_after_fan_out.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_after_fan_out.yaml new file mode 100644 index 00000000..4a5501ec --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_after_fan_out.yaml @@ -0,0 +1,23 @@ +name: inheritance_after_fan_out +tasks: + list: + op: list_list + op_dir: fake + scatter_inherit: + op: item_inheritance + op_dir: fake + item: + op: item_item + op_dir: fake +edges: + - origin: list.processed_data + destination: + - scatter_inherit.user_data + - origin: scatter_inherit.processed_data + destination: + - item.user_data +sources: + input: + - list.user_data +sinks: + output: item.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_before_fan_out.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_before_fan_out.yaml new file mode 100644 index 00000000..5305230f --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_before_fan_out.yaml @@ -0,0 +1,23 @@ +name: inheritance_before_fan_out +tasks: + list: + op: list_list + op_dir: fake + inherit_list: + op: list_inheritance + op_dir: fake + scatter: + op: item_item + op_dir: fake +edges: + - origin: list.processed_data + destination: + - inherit_list.user_data + - origin: inherit_list.processed_data + destination: + - scatter.user_data +sources: + input: + - list.user_data +sinks: + scatter: scatter.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_from_source.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_from_source.yaml new file mode 100644 index 00000000..c2ffcc50 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/inheritance_from_source.yaml @@ -0,0 +1,22 @@ +name: inheritance_from_source +tasks: + raster: + op: raster + op_dir: fake + inherit_raster: + op: item_inheritance + op_dir: fake + inherit_source: + op: item_inheritance + op_dir: fake +edges: + - origin: raster.processed_data + destination: + - inherit_raster.user_data +sources: + input: + - raster.user_data + - inherit_source.user_data +sinks: + raster: inherit_raster.processed_data + source: inherit_source.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/item_gather.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/item_gather.yaml new file mode 100644 index 00000000..0353505d --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/item_gather.yaml @@ -0,0 +1,17 @@ +name: item_gather +tasks: + item: + op: item_item + op_dir: fake + list: + op: list_list + op_dir: fake +edges: + - origin: item.processed_data + destination: + - list.user_data +sources: + input: + - item.user_data +sinks: + output: list.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/item_item.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/item_item.yaml new file mode 100644 index 00000000..a1d656cf --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/item_item.yaml @@ -0,0 +1,10 @@ +name: item_item +tasks: + task: + op: item_item + op_dir: fake +sources: + input: + - task.user_data +sinks: + output: task.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/list_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/list_list.yaml new file mode 100644 index 00000000..9e217cc5 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/list_list.yaml @@ -0,0 +1,10 @@ +name: list_list +tasks: + task: + op: list_list + op_dir: fake +sources: + input: + - task.user_data +sinks: + output: task.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/missing_edge.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/missing_edge.yaml new file mode 100644 index 00000000..d6a68b9d --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/missing_edge.yaml @@ -0,0 +1,21 @@ +name: missing_edge +tasks: + task1: + op: item_item + op_dir: fake + task2: + op: item_item + op_dir: fake + missing_input: + op: item_item + op_dir: fake +edges: + - origin: task1.processed_data + destination: + - task2.user_data +sources: + input: + - task1.user_data +sinks: + second: task2.processed_data + missing: missing_input.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/missing_inheritance.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/missing_inheritance.yaml new file mode 100644 index 00000000..0e0da03a --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/missing_inheritance.yaml @@ -0,0 +1,17 @@ +name: missing_inheritance +tasks: + list: + op: item_list + op_dir: fake + bad_inherit: + op: missing_inheritance + op_dir: fake +edges: + - origin: list.processed_data + destination: + - bad_inherit.user_data +sources: + input: + - list.user_data +sinks: + bad: bad_inherit.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_fan_out.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_fan_out.yaml new file mode 100644 index 00000000..81a4431b --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_fan_out.yaml @@ -0,0 +1,37 @@ +name: nested_fan_out +tasks: + to_list: + op: str_list + op_dir: fake + scatter: + op: item_list + op_dir: fake + nested_scatter: + op: item_list + op_dir: fake + parallel: + op: list_item + op_dir: fake + gather: + op: list_list + op_dir: fake +edges: + - origin: to_list.processed_data + destination: + - scatter.user_data + - origin: scatter.processed_data + destination: + - nested_scatter.user_data + - origin: nested_scatter.processed_data + - parallel.user_data + - origin: parallel.processed_data + destination: + - gather.user_data +sources: + input: + - to_list.user_data +sinks: + scatter: scatter.processed_data + nested_scatter: nested_scatter.processed_data + parallel: parallel.processed_data + gather: gather.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_task_params.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_task_params.yaml new file mode 100644 index 00000000..f6d484f4 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_task_params.yaml @@ -0,0 +1,14 @@ +name: nested_task_params +tasks: + parameterizable: + op: str_list + op_dir: fake + parameters: + fake_another_param: + fake_nested_too: 4 +edges: +sources: + input: + - parameterizable.user_data +sinks: + output: parameterizable.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_workflow.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_workflow.yaml new file mode 100644 index 00000000..6a07ed0d --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/nested_workflow.yaml @@ -0,0 +1,22 @@ +name: nested_workflow +tasks: + str_list: + op: str_list + op_dir: fake + nested1: + workflow: list_list + nested2: + workflow: list_list +edges: + - origin: str_list.processed_data + destination: + - nested1.input + - origin: nested1.output + destination: + - nested2.input +sources: + input: + - str_list.user_data +sinks: + str: str_list.processed_data + nested2: nested2.output \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params.yaml new file mode 100644 index 00000000..e9be6dc9 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params.yaml @@ -0,0 +1,23 @@ +name: resolve_nested_params +parameters: + new: overwritten + new_nested: overwritten nested +tasks: + simple: + op: simple_parameter + op_dir: fake + parameters: + overwrite: "@from(new)" + nested: + workflow: resolve_params + parameters: + new: "@from(new)" + new_nested: "@from(new_nested)" +edges: +sources: + input: + - simple.user_data + - nested.input +sinks: + simple: simple.processed_data + nested: nested.nested \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_default.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_default.yaml new file mode 100644 index 00000000..c3fc269d --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_default.yaml @@ -0,0 +1,24 @@ +name: resolve_nested_params_default +parameters: + new: + nested_new: + nested_new_nested: +tasks: + simple: + op: simple_parameter + op_dir: fake + parameters: + overwrite: "@from(new)" + nested: + workflow: resolve_params + parameters: + new: "@from(nested_new)" + new_nested: "@from(nested_new_nested)" +edges: +sources: + input: + - simple.user_data + - nested.input +sinks: + simple: simple.processed_data + nested: nested.nested diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_multiple_default.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_multiple_default.yaml new file mode 100644 index 00000000..3756e91c --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_nested_params_multiple_default.yaml @@ -0,0 +1,23 @@ +name: resolve_nested_params_multiple_default +parameters: + new: + new_nested: +tasks: + simple: + op: simple_parameter + op_dir: fake + parameters: + overwrite: "@from(new)" + nested: + workflow: resolve_params + parameters: + new: "@from(new)" + new_nested: "@from(new_nested)" +edges: +sources: + input: + - simple.user_data + - nested.input +sinks: + simple: simple.processed_data + nested: nested.nested diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_params.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_params.yaml new file mode 100644 index 00000000..7d1a5cd6 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/resolve_params.yaml @@ -0,0 +1,25 @@ +name: resolve_params +parameters: + new: overwritten + new_nested: overwritten nested +tasks: + simple: + op: simple_parameter + op_dir: fake + parameters: + overwrite: "@from(new)" + nested: + op: nested_parameters + op_dir: fake + parameters: + overwrite: "@from(new)" + nested: + overwrite: "@from(new_nested)" +edges: +sources: + input: + - simple.user_data + - nested.user_data +sinks: + simple: simple.processed_data + nested: nested.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/single_and_parallel.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/single_and_parallel.yaml new file mode 100644 index 00000000..7998ce2a --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/single_and_parallel.yaml @@ -0,0 +1,36 @@ +name: single_and_parallel +tasks: + list: + op: list_list + op_dir: fake + scatter: + op: item_item + op_dir: fake + parallel: + op: item_item + op_dir: fake + two_types: + op: list_and_item_inputs + op_dir: fake + still_parallel: + op: item_item + op_dir: fake +edges: + - origin: list.processed_data + destination: + - scatter.user_data + - two_types.list_data # single edge + - origin: scatter.processed_data + destination: + - parallel.user_data + - origin: parallel.processed_data + destination: + - two_types.item_data # parallel edge + - origin: two_types.processed_data + destination: + - still_parallel.user_data +sources: + input: + - list.user_data +sinks: + still_parallel: still_parallel.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/source_and_destination.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/source_and_destination.yaml new file mode 100644 index 00000000..4b78c1c7 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/source_and_destination.yaml @@ -0,0 +1,19 @@ +# Invalid workflow where an input port is defined as source and destination of an edge +name: source_and_destination +sources: + input: + - task1.user_data + - task2.user_data +sinks: + output: task2.processed_data +tasks: + task1: + op: list_list + op_dir: fake + task2: + op: list_list + op_dir: fake +edges: + - origin: task1.processed_data + destination: + - task2.user_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source.yaml new file mode 100644 index 00000000..5e811a82 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source.yaml @@ -0,0 +1,15 @@ +name: specific_source +tasks: + raster: + op: raster + op_dir: fake + datavibe: + op: item_item + op_dir: fake +sources: + input: + - raster.user_data + - datavibe.user_data +sinks: + raster: raster.processed_data + datavibe: datavibe.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_item_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_item_list.yaml new file mode 100644 index 00000000..2764bb31 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_item_list.yaml @@ -0,0 +1,15 @@ +name: specific_source_item_list +tasks: + raster: + op: raster + op_dir: fake + datavibe: + op: list_list + op_dir: fake +sources: + input: + - raster.user_data + - datavibe.user_data +sinks: + raster: raster.processed_data + datavibe: datavibe.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_list_list.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_list_list.yaml new file mode 100644 index 00000000..018d8cb8 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/specific_source_list_list.yaml @@ -0,0 +1,15 @@ +name: specific_source_list_list +tasks: + raster: + op: raster_list + op_dir: fake + datavibe: + op: list_list + op_dir: fake +sources: + input: + - raster.user_data + - datavibe.user_data +sinks: + raster: raster.processed_data + datavibe: datavibe.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/str_input.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/str_input.yaml new file mode 100644 index 00000000..a1c294a2 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/str_input.yaml @@ -0,0 +1,11 @@ +name: str_input +tasks: + str: + op: str_list + op_dir: fake +edges: +sources: + input: + - str.user_data +sinks: + output: str.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/task_params.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/task_params.yaml new file mode 100644 index 00000000..39dae4c7 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/task_params.yaml @@ -0,0 +1,13 @@ +name: task_params +tasks: + parameterizable: + op: str_list + op_dir: fake + parameters: + fake_param: 3 +edges: +sources: + input: + - parameterizable.user_data +sinks: + output: parameterizable.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/three_ops.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/three_ops.yaml new file mode 100644 index 00000000..51267057 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/three_ops.yaml @@ -0,0 +1,21 @@ +name: three_ops +tasks: + first: + op: item_item + op_dir: fake + second: + op: item_item + op_dir: fake + third: + op: item_item + op_dir: fake +edges: + - origin: first.processed_data + destination: second.user_data + - origin: second.processed_data + destination: third.user_data +sources: + input: + - first.user_data +sinks: + output: third.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/two_level_inheritance.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/two_level_inheritance.yaml new file mode 100644 index 00000000..205ca0ae --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/two_level_inheritance.yaml @@ -0,0 +1,24 @@ +name: two_level_inheritance +tasks: + ancestor: + op: item_item + op_dir: fake + direct_inherit: + op: item_inheritance + op_dir: fake + indirect_inherit: + op: item_inheritance + op_dir: fake +edges: + - origin: ancestor.processed_data + destination: + - direct_inherit.user_data + - origin: direct_inherit.processed_data + destination: + - indirect_inherit.user_data +sources: + input: + - ancestor.user_data +sinks: + direct: direct_inherit.processed_data + indirect: indirect_inherit.processed_data diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/unknown_task_params.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/unknown_task_params.yaml new file mode 100644 index 00000000..67ca94dd --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/unknown_task_params.yaml @@ -0,0 +1,13 @@ +name: unknown_task_params +tasks: + parameterizable: + op: str_list + op_dir: fake + parameters: + new_param: foo +edges: +sources: + input: + - parameterizable.user_data +sinks: + output: parameterizable.processed_data \ No newline at end of file diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows/workflow_inception.yaml b/src/vibe_dev/vibe_dev/testing/fake_workflows/workflow_inception.yaml new file mode 100644 index 00000000..f0d3ac2d --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows/workflow_inception.yaml @@ -0,0 +1,15 @@ +name: workflow_inception +tasks: + nested: + workflow: nested_workflow + nested2: + workflow: list_list +edges: + - origin: nested.str + destination: + - nested2.input +sources: + input: + - nested.input +sinks: + nested2: nested2.output diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows_fixtures.py b/src/vibe_dev/vibe_dev/testing/fake_workflows_fixtures.py new file mode 100644 index 00000000..3fa3ddcc --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows_fixtures.py @@ -0,0 +1,35 @@ +import os +from dataclasses import dataclass + +import pytest + +from vibe_core.data.core_types import BaseVibe + +HERE = os.path.dirname(os.path.abspath(__file__)) +WORKFLOWS_DIR = os.path.join(HERE, "fake_workflows") +OPS_DIR = os.path.join(HERE, "fake_ops") + + +@dataclass +class FakeType(BaseVibe): + data: str + + +def get_fake_workflow_path(workflow_name: str): + return os.path.join(WORKFLOWS_DIR, f"{workflow_name}.yaml") + + +@pytest.fixture +def fake_workflow_path(request: pytest.FixtureRequest): + workflow_name = request.param # type:ignore + return get_fake_workflow_path(workflow_name) + + +@pytest.fixture +def fake_ops_dir() -> str: + return OPS_DIR + + +@pytest.fixture +def fake_workflows_dir() -> str: + return WORKFLOWS_DIR diff --git a/src/vibe_dev/vibe_dev/testing/op_tester.py b/src/vibe_dev/vibe_dev/testing/op_tester.py new file mode 100644 index 00000000..7a2f0736 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/op_tester.py @@ -0,0 +1,213 @@ +import json +import logging +import os +import shutil +from copy import deepcopy +from tempfile import TemporaryDirectory +from typing import Any, Callable, Dict, List, Optional, Union + +from azure.identity import AzureCliCredential +from hydra_zen import builds +from pystac.item import Item + +from vibe_agent.ops import ( + BaseVibeDict, + Operation, + OperationFactory, + OperationFactoryConfig, + OperationSpec, + OpIOType, + OpResolvedDependencies, + TypeDictVibe, +) +from vibe_agent.ops_helper import OpIOConverter +from vibe_agent.storage import Storage +from vibe_agent.storage.asset_management import LocalFileAssetManager +from vibe_agent.storage.storage import ItemDict, ensure_list +from vibe_common.schemas import CacheInfo, OperationParser, OpRunId +from vibe_common.secret_provider import AzureSecretProvider, SecretProvider +from vibe_core import data +from vibe_core.data.core_types import BaseVibe +from vibe_core.data.json_converter import DataclassJSONEncoder +from vibe_core.data.utils import deserialize_stac, serialize_stac + +LOGGER = logging.getLogger(__name__) +REFERENCE_FILENAME = "reference.json" + + +class FakeStorage(Storage): + def store(self, items: List[Item]) -> List[Item]: + return items + + def retrieve(self, input_item_dicts: List[Item]) -> List[Item]: + return input_item_dicts + + def retrieve_output_from_input_if_exists(self, input_item: Item) -> Optional[Item]: + return input_item + + async def retrieve_output_from_input_if_exists_async( + self, cache_info: CacheInfo, **kwargs: Any + ) -> Optional[ItemDict]: + raise NotImplementedError + + def remove(self, op_run_id: OpRunId): + return None + + +class OpTester: + def __init__(self, path_to_config: str): + self._tmp_dir = TemporaryDirectory() + self.asset_manager = LocalFileAssetManager(self._tmp_dir.name) + self.fake_storage = FakeStorage(self.asset_manager) + self.spec = OperationParser.parse(path_to_config) + + def run(self, **input_dict: Union[BaseVibe, List[BaseVibe]]) -> BaseVibeDict: + self.op = OperationFactory( + self.fake_storage, AzureSecretProvider(credential=AzureCliCredential()) + ).build(self.spec) + return self.op.callback(**input_dict) + + def update_parameters(self, parameters: Dict[str, Any]): + self.spec.parameters.update(parameters) + + def __del__(self): + try: + self._tmp_dir.cleanup() + except (AttributeError, FileNotFoundError): + LOGGER.info(f"Unable to clean temporary directory {self._tmp_dir}") + + +class ReferenceSaver(Operation): + storage: Storage + + def __init__( + self, + name: str, + callback: Callable[..., BaseVibeDict], + storage: Storage, + converter: data.StacConverter, + inputs_spec: TypeDictVibe, + output_spec: TypeDictVibe, + version: str, + dependencies: OpResolvedDependencies, + save_dir: str, + ): + self.root_dir = save_dir + self.dependencies = dependencies + super().__init__(name, callback, storage, converter, inputs_spec, output_spec, version) + + def _get_ref_path(self) -> str: + return os.path.join(self.root_dir, self.name, REFERENCE_FILENAME) + + def _get_reference(self) -> List[Any]: + ref_path = self._get_ref_path() + if os.path.exists(ref_path): + with open(ref_path) as f: + return json.load(f) + return [] + + def _update_reference(self, stac_inputs: ItemDict, stac_outputs: ItemDict): + ref = self._get_reference() + serialized = [ + {k: serialize_stac(v) for k, v in s.items()} for s in (stac_inputs, stac_outputs) + ] + ref.append(serialized) + with open(self._get_ref_path(), "w") as f: + json.dump(ref, f, cls=DataclassJSONEncoder) + + def save_items(self, items: ItemDict) -> ItemDict: + save_items = deepcopy(items) + for item_list in save_items.values(): + item_list = ensure_list(item_list) + for item in item_list: + for k, v in item.assets.items(): + rel_path = os.path.join(self.name, k) + abs_path = os.path.join(self.root_dir, rel_path) + filepath = self.storage.asset_manager.retrieve(k) + try: + os.makedirs(abs_path) + shutil.copy(filepath, abs_path) + except FileExistsError: + # File exists so we don't copy again + pass + v.href = os.path.join(rel_path, os.path.basename(filepath)) + + return save_items + + def save_inputs_and_outputs(self, input_items: ItemDict, output_items: ItemDict): + save_inputs = self.save_items(input_items) + save_outputs = self.save_items(output_items) + self._update_reference(save_inputs, save_outputs) + + def run(self, input_items: OpIOType) -> OpIOType: + stac_inputs = OpIOConverter.deserialize_input(input_items) + cache_info = CacheInfo(self.name, self.version, stac_inputs, self.dependencies) + items_out = super().run(input_items, cache_info) + stac_outputs = {k: deserialize_stac(v) for k, v in items_out.items()} + # Create directory for the op if necessary + os.makedirs(os.path.join(self.root_dir, self.name), exist_ok=True) + self.save_inputs_and_outputs(stac_inputs, stac_outputs) + return items_out + + +class ReferenceSaverFactory(OperationFactory): + storage: Storage + save_dir: str + + def __init__(self, storage: Storage, secret_provider: SecretProvider, save_dir: str): + super().__init__(storage, secret_provider) + self.save_dir = save_dir + + def _build_impl(self, op_config: OperationSpec) -> ReferenceSaver: + parameters = self.resolve_secrets(op_config.parameters) + dependencies = self.dependency_resolver.resolve(op_config) + callable = self.callable_builder.build( + op_config.root_folder, op_config.entrypoint, parameters + ) + return ReferenceSaver( + op_config.name, + callable, + self.storage, + self.converter, + op_config.inputs_spec, + op_config.output_spec, + op_config.version, + dependencies, + self.save_dir, + ) + + +ReferenceSaverFactoryConfig = builds( + ReferenceSaverFactory, + save_dir=str, + builds_bases=(OperationFactoryConfig,), +) + + +class ReferenceRetriever: + def __init__(self, root_dir: str): + self.root_dir = root_dir + self.converter = data.StacConverter() + + def retrieve_assets(self, items: Union[List[Item], Item]): + item: Item + for item in ensure_list(items): + for asset in item.assets.values(): + asset.href = os.path.join(self.root_dir, asset.href) + + def retrieve_op_io(self, item_dict: OpIOType) -> ItemDict: + stac_items = {k: deserialize_stac(v) for k, v in item_dict.items()} + for items in stac_items.values(): + self.retrieve_assets(items) + return stac_items + + def to_terravibes(self, item_dict: ItemDict) -> BaseVibeDict: + return {k: self.converter.from_stac_item(v) for k, v in item_dict.items()} + + def retrieve(self, op_name: str) -> List[List[BaseVibeDict]]: + op_dir = os.path.join(self.root_dir, op_name) + with open(os.path.join(op_dir, REFERENCE_FILENAME)) as f: + pairs = json.load(f) + stac_pairs = [[self.retrieve_op_io(i) for i in pair] for pair in pairs] + output_pairs = [[self.to_terravibes(i) for i in pair] for pair in stac_pairs] + return output_pairs diff --git a/src/vibe_dev/vibe_dev/testing/storage_fixtures.py b/src/vibe_dev/vibe_dev/testing/storage_fixtures.py new file mode 100644 index 00000000..61f9012a --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/storage_fixtures.py @@ -0,0 +1,81 @@ +import os +import uuid +from typing import cast + +import pytest +from azure.cosmos import CosmosClient +from azure.identity import AzureCliCredential +from azure.storage.blob import BlobServiceClient +from hydra_zen import instantiate + +from vibe_agent.storage import ( + BlobAssetManagerConfig, + CosmosStorage, + CosmosStorageConfig, + LocalFileAssetManagerConfig, + LocalStorageConfig, +) +from vibe_common.secret_provider import KeyVaultSecretConfig + +TEST_STORAGE = "https://eywadevtest.blob.core.windows.net" +REMOTE_FILE_CONTAINER = "testdata" +DUMMY_COSMOS_URI = "https://terravibes-db.documents.azure.com:443/" + + +@pytest.fixture(autouse=True, scope="session") +def stac_container() -> str: + stac_container_name: str = "stac" + str(uuid.uuid4())[0:6] + return stac_container_name + + +@pytest.fixture(autouse=True, scope="session") +def asset_container() -> str: + asset_name: str = "asset" + str(uuid.uuid4())[0:6] + return asset_name + + +@pytest.fixture(scope="session") +def storage_spec( + request: pytest.FixtureRequest, + tmp_path_factory: pytest.TempPathFactory, + stac_container: str, + asset_container: str, +): + storage_type: str = request.param # type: ignore + if storage_type == "local": + tmp_path = tmp_path_factory.mktemp("testdir") + tmp_asset_path = os.path.join(tmp_path, "assets") + yield LocalStorageConfig( + local_path=tmp_path, asset_manager=LocalFileAssetManagerConfig(tmp_asset_path) + ) + elif storage_type == "remote": + cosmos_asset_container = asset_container + "cosmos" + key_config = KeyVaultSecretConfig( + "https://eywa-secrets.vault.azure.net/", "stac-cosmos-write-key", AzureCliCredential() + ) + key = instantiate(key_config) + test_db_name = "test-db" + config = CosmosStorageConfig( + key=key, + asset_manager=BlobAssetManagerConfig( + storage_account_url=TEST_STORAGE, + storage_account_connection_string="", + asset_container_name=cosmos_asset_container, + credential=AzureCliCredential(), + ), + cosmos_database_name=test_db_name, + stac_container_name=stac_container, + cosmos_url=DUMMY_COSMOS_URI, + ) + cast(CosmosStorage, instantiate(config)).container_proxy + yield config + cred = AzureCliCredential() + client = BlobServiceClient(TEST_STORAGE, credential=cred) + asset_client = client.get_container_client(cosmos_asset_container) + cosmos_client = CosmosClient(config.cosmos_url, key) + db = cosmos_client.get_database_client(test_db_name) + db.delete_container(stac_container) + if asset_client.exists(): + asset_client.delete_container() + else: + raise ValueError(f"Invalid storage setup {storage_type}") diff --git a/src/vibe_dev/vibe_dev/testing/utils.py b/src/vibe_dev/vibe_dev/testing/utils.py new file mode 100644 index 00000000..7409a474 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/utils.py @@ -0,0 +1,53 @@ +from typing import List +from unittest import TestCase + +import yaml +from azure.identity import AzureCliCredential + +from vibe_agent.ops import OperationFactoryConfig, OpIOType +from vibe_agent.storage import StorageConfig +from vibe_common.secret_provider import AzureSecretProviderConfig +from vibe_server.workflow.runner import WorkflowRunner +from vibe_server.workflow.runner.task_io_handler import WorkflowIOHandler +from vibe_server.workflow.workflow import Workflow + +from ..local_runner import LocalWorkflowRunner + + +class WorkflowTestHelper: + @staticmethod + def get_groundtruth_for_workflow(workflow_path: str) -> List[str]: + with open(workflow_path) as yaml_file: + workflow_def = yaml.safe_load(yaml_file) + + return workflow_def["sinks"] + + @staticmethod + def verify_workflow_result(workflow_path: str, result: OpIOType): + case = TestCase() + expected_output_names = WorkflowTestHelper.get_groundtruth_for_workflow(workflow_path) + + assert len(expected_output_names) == len(result.keys()) + case.assertCountEqual(result.keys(), expected_output_names) + for value in result.values(): + assert isinstance(value, dict) or isinstance(value, list) + assert len(result) > 0 + + @staticmethod + def gen_workflow( + workflow_path: str, + storage_spec: StorageConfig, + ) -> WorkflowRunner: + factory_spec = OperationFactoryConfig( + storage_spec, AzureSecretProviderConfig(credential=AzureCliCredential()) + ) + workflow = Workflow.build(workflow_path) + io_mapper = WorkflowIOHandler(workflow) + runner = LocalWorkflowRunner.build( + io_mapper=io_mapper, + factory_spec=factory_spec, + workflow=workflow, + ) + runner.runner.is_workflow = lambda *args, **kwargs: False # type: ignore + + return runner diff --git a/src/vibe_dev/vibe_dev/testing/workflow_fixtures.py b/src/vibe_dev/vibe_dev/testing/workflow_fixtures.py new file mode 100644 index 00000000..aa0bf2a9 --- /dev/null +++ b/src/vibe_dev/vibe_dev/testing/workflow_fixtures.py @@ -0,0 +1,149 @@ +import uuid +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Type + +import pytest +import yaml +from shapely import geometry as shpg + +from vibe_common.messaging import ( + MessageHeader, + MessageType, + WorkflowExecutionContent, + WorkMessage, + build_work_message, +) +from vibe_common.schemas import EntryPointDict, OperationSpec +from vibe_core.data import DataVibe, TypeDictVibe +from vibe_core.data.core_types import BaseVibe +from vibe_core.datamodel import TaskDescription + +from .fake_workflows_fixtures import get_fake_workflow_path + + +@dataclass +class SimpleStrDataType(BaseVibe): + data: str + + +@pytest.fixture +def SimpleStrData() -> Type[SimpleStrDataType]: + # A fixture that creates a type. Should this be in snake_case, or in CamelCase? + # I went with CamelCase, as there is no way to make this pretty. + return SimpleStrDataType + + +@pytest.fixture +def workflow_execution_message(SimpleStrData: Type[SimpleStrDataType]) -> WorkMessage: + with open(get_fake_workflow_path("item_gather")) as f: + wf_dict = yaml.safe_load(f) + + header = MessageHeader( + type=MessageType.workflow_execution_request, + run_id=uuid.uuid4(), + ) + data = SimpleStrData("some fake data") + content = WorkflowExecutionContent( + name="fake_item_gather", + input={ + "plain_input": {"data": data}, + }, + workflow=wf_dict, + ) + return build_work_message(header=header, content=content) + + +@pytest.fixture +def simple_op_spec(SimpleStrData: Type[SimpleStrDataType], tmp_path: Path) -> OperationSpec: + with open(tmp_path / "fake.py", "w") as fp: + fp.write( + """ +from datetime import datetime +from vibe_core.data import DataVibe +from vibe_dev.testing.workflow_fixtures import SimpleStrDataType as SimpleStrData +def fake_callback(*args, **kwargs): + def callback(**kwargs): + out = { + "processed_data": DataVibe( + id="🍔", + time_range=(datetime.now(), datetime.now()), + geometry={ + "type": "Point", + "coordinates": [0.0, 0.0], + "properties": { + "name": "🤭" + } + }, + assets=[] + ), + "simple_str": SimpleStrData("🍔") + } + return out + return callback + + """ + ) + return OperationSpec( + name="fake", + inputs_spec=TypeDictVibe( + { # type: ignore + "plain_input": SimpleStrData, + "list_input": List[SimpleStrData], + "terravibes_input": DataVibe, + "terravibes_list": List[DataVibe], + } + ), + output_spec=TypeDictVibe({"processed_data": DataVibe, "simple_str": SimpleStrData}), + parameters={}, + entrypoint=EntryPointDict( + {"file": "fake.py", "callback_builder": "fake_callback"} # type: ignore + ), + root_folder=str(tmp_path), + description=TaskDescription(), + ) + + +@pytest.fixture +def workflow_run_config() -> Dict[str, Any]: + return { + "name": "fake workflow run", + "user_input": { + "start_date": "2021-02-02T00:00:00", + "end_date": "2021-02-09T00:00:00", + "geojson": { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-88.068487, 37.058836], + [-88.036059, 37.048687], + [-88.012895, 37.068984], + [-88.026622, 37.085711], + [-88.062482, 37.081461], + [-88.068487, 37.058836], + ] + ], + }, + } + ], + }, + }, + "workflow": "helloworld", + "parameters": None, + } + + +COORDS = (-55, -6) +TIME_RANGE = (datetime.now(), datetime.now()) +THE_DATAVIBE = DataVibe( + id="1", + time_range=TIME_RANGE, + geometry=shpg.mapping(shpg.Point(*COORDS).buffer(0.05, cap_style=3)), + assets=[], +) diff --git a/src/vibe_lib/setup.py b/src/vibe_lib/setup.py new file mode 100644 index 00000000..154d8ad3 --- /dev/null +++ b/src/vibe_lib/setup.py @@ -0,0 +1,14 @@ +from setuptools import find_packages, setup + +setup( + name="vibe_lib", + version="0.0.1", + author="Microsoft", + author_email="terravibes@microsoft.com", + description="TerraVibes Geospatial Platform Package - vibe lib.", + license="Proprietary", + keywords="terravibes geospatial", + packages=find_packages(exclude=["tests*"]), + python_requires="~=3.8", + install_requires=["numpy", "geopandas", "rasterio~=1.2"], +) diff --git a/src/vibe_lib/tests/test_airbus_api.py b/src/vibe_lib/tests/test_airbus_api.py new file mode 100644 index 00000000..50cc531c --- /dev/null +++ b/src/vibe_lib/tests/test_airbus_api.py @@ -0,0 +1,52 @@ +from typing import Dict +from unittest.mock import Mock, patch + +import pytest + +from vibe_lib.airbus import AirBusAPI, Constellation + + +@pytest.fixture(scope="module") +def api(): + with patch("vibe_lib.airbus.AirBusAPI._get_api_key") as mock_key: + with patch("vibe_lib.airbus.AirBusAPI._authenticate") as mock_token: + mock_key.return_value = "mock_api_key" + mock_token.return_value = "mock_token" + yield AirBusAPI("mock_filepath", False, [Constellation.PHR], 0.1, 0.4) + + +@pytest.fixture +def ordered_status(): + return {"id": "0", "status": "ordered"} + + +@pytest.fixture +def delivered_status(): + return {"id": "0", "status": "delivered"} + + +@pytest.fixture +def unkown_status(): + return {"id": "0", "status": "unknown"} + + +@patch("vibe_lib.airbus.AirBusAPI.get_order_by_id") +def test_ok_order(mock_handle: Mock, api: AirBusAPI, delivered_status: Dict[str, str]): + mock_handle.return_value = delivered_status + api.block_until_order_delivered("order_id") + mock_handle.assert_called_once_with("order_id") + + +@patch("vibe_lib.airbus.AirBusAPI.get_order_by_id") +def test_unexpected_order_status(mock_handle: Mock, api: AirBusAPI, unkown_status: Dict[str, str]): + mock_handle.return_value = unkown_status + with pytest.raises(ValueError): + api.block_until_order_delivered("order_id") + + +@patch("vibe_lib.airbus.AirBusAPI.get_order_by_id") +def test_timeout(mock_handle: Mock, api: AirBusAPI, ordered_status: Dict[str, str]): + mock_handle.return_value = ordered_status + with pytest.raises(RuntimeError): + api.block_until_order_delivered("order_id") + assert mock_handle.call_count == 5 diff --git a/src/vibe_lib/tests/test_earthdata.py b/src/vibe_lib/tests/test_earthdata.py new file mode 100644 index 00000000..c3aa3858 --- /dev/null +++ b/src/vibe_lib/tests/test_earthdata.py @@ -0,0 +1,113 @@ +import math +from datetime import datetime +from typing import Any, Optional, Tuple, cast +from unittest.mock import Mock, patch + +import pytest +import requests +from shapely import geometry as shpg + +from vibe_core.data.core_types import BBox +from vibe_lib.earthdata import EarthDataAPI, format_geometry + +FMT_BOX = "2.0,1.0,2.0,3.0,0.0,3.0,0.0,1.0,2.0,1.0" +PROCESSING_LEVEL = "GEDI02_B.002" + + +@pytest.fixture +def test_box(): + return shpg.box(0, 1, 2, 3) + + +def fake_responses(num_items: int, page_size: int): + def foo(*args: Any, **kwargs: Any): + nonlocal num_items + num_return = min(num_items, page_size) + num_items = num_items - num_return + return {"feed": {"entry": [None for _ in range(num_return)]}} + + return foo + + +def test_format_geometry(test_box: shpg.Polygon): + fmt_geoms = format_geometry(test_box) + assert len(fmt_geoms) == 1 + assert fmt_geoms[0] == FMT_BOX + + +def test_format_cw_geometry(test_box: shpg.Polygon): + # Make sure we orient geometry properly (counter-clockwise) + test_geom = shpg.polygon.orient(test_box, sign=-1) + fmt_cw = format_geometry(test_geom)[0] + assert fmt_cw == FMT_BOX + + +def test_format_multipoly(test_box: shpg.Polygon): + test_geom = cast(shpg.MultiPolygon, test_box.union(shpg.box(10, 10, 11, 11))) + fmt_geoms = format_geometry(test_geom) + assert len(fmt_geoms) == 2 + assert fmt_geoms[0] == FMT_BOX + + +def test_api_wrapper_base_payload(): + api = EarthDataAPI(PROCESSING_LEVEL) + payload = api._get_payload(geometry=None, bbox=None, time_range=None, id=None) + assert len(payload) == 3 + assert payload["provider"] == api.provider + assert payload["concept_id"] == api.concept_ids[PROCESSING_LEVEL] + assert payload["page_size"] == api.page_size + + +@pytest.mark.parametrize("id", (None, "test_id")) +@pytest.mark.parametrize("time_range", (None, (datetime.now(), datetime.now()))) +@pytest.mark.parametrize("bbox", (None, (0, 0, 1, 1))) +@pytest.mark.parametrize("geometry", (None, shpg.box(0, 0, 1, 1))) +def test_api_wrapper_payload_keys( + geometry: Optional[shpg.Polygon], + bbox: Optional[BBox], + time_range: Optional[Tuple[datetime, datetime]], + id: Optional[str], +): + api = EarthDataAPI(PROCESSING_LEVEL) + payload = api._get_payload(geometry=geometry, bbox=bbox, time_range=time_range, id=id) + if geometry is not None: + assert "polygon[]" in payload + assert "options[polygon][or]" in payload + if bbox is not None: + assert "bounding_box" in payload + if time_range is not None: + assert "temporal" in payload + if id is not None: + assert "producer_granule_id" in payload + + +@pytest.mark.parametrize("num_items", (1, 2000, 2001, 9000)) +@patch.object(requests, "post") +def test_api_wrapper_paging(post: Mock, num_items: int): + api = EarthDataAPI(PROCESSING_LEVEL) + response_mock = Mock() + response_mock.configure_mock(**{"json.side_effect": fake_responses(num_items, api.page_size)}) + post.return_value = response_mock + api.query() + expected_calls = math.ceil((num_items + 1) / api.page_size) + assert post.call_count == expected_calls + for i, call_args in enumerate(post.call_args_list, 1): + assert call_args[1]["data"]["pageNum"] == i + + +@patch.object(requests, "post") +def test_api_wrapper_max_pages(post: Mock): + api = EarthDataAPI(PROCESSING_LEVEL) + response_mock = Mock() + response_mock.configure_mock( + **{"json.side_effect": fake_responses(api.max_items, api.page_size)} + ) + post.return_value = response_mock + api.query() + + response_mock.configure_mock( + **{"json.side_effect": fake_responses(api.max_items + api.page_size, api.page_size)} + ) + + with pytest.raises(RuntimeError): + api.query() diff --git a/src/vibe_lib/tests/test_predict_chips.py b/src/vibe_lib/tests/test_predict_chips.py new file mode 100644 index 00000000..407e4b32 --- /dev/null +++ b/src/vibe_lib/tests/test_predict_chips.py @@ -0,0 +1,91 @@ +from datetime import datetime +from pathlib import Path +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest +import rasterio +from rasterio.windows import Window +from shapely import geometry as shpg + +from vibe_core.data import AssetVibe, Raster +from vibe_lib.spaceeye import chip + +RASTER_SIZE = 256 +RASTER_BANDS = 2 + + +class MockDataset: + spatial_size: int = 256 + channels: int = 3 + nodata: int = 100 + + def __init__(self, start_idx: int, length: int): + self.start_idx = start_idx + self.length = length + self.get_filename = None + self.zeros = np.zeros((MockDataset.channels, self.spatial_size, self.spatial_size)) + self.ones = np.ones((MockDataset.channels, self.spatial_size, self.spatial_size)) + + def __getitem__(self, idx: int): + if idx < self.start_idx: + return self.ones, self.zeros, {} + return self.zeros, self.ones, {} + + def __len__(self): + return self.length + + +@pytest.fixture() +def test_raster(tmp_path: Path): + geom = shpg.mapping(shpg.box(0, 0, 1, 1)) + now = datetime.now() + filepath = tmp_path / "test_raster.tif" + with rasterio.open( + filepath, + "w", + driver="GTiff", + width=RASTER_SIZE, + height=RASTER_SIZE, + count=RASTER_BANDS, + dtype="float32", + nodata=-1, + ) as dst: + dst.write(np.arange(RASTER_SIZE**2 * RASTER_BANDS).reshape(2, RASTER_SIZE, RASTER_SIZE)) + asset = AssetVibe(reference=str(filepath), type="image/tiff", id="asset_id") + return Raster(id="1", geometry=geom, time_range=(now, now), bands={}, assets=[asset]) + + +@pytest.mark.parametrize("start_idx, length", ((0, 5), (1, 5), (5, 5), (0, 100), (50, 100))) +@patch.object(chip, "write_prediction_to_file") +def test_skip_nodata(write_patch: MagicMock, start_idx: int, length: int): + dataset = MockDataset(start_idx, length) + loader = chip.get_loader(dataset, 1, num_workers=0) # type: ignore + model = MagicMock() + model.run.return_value = 10 * np.ones((1, 5, dataset.spatial_size, dataset.spatial_size)) + chip.predict_chips(model, loader, "anything", skip_nodata=True) + assert model.run.call_count == max(start_idx, 1) + + +@pytest.mark.filterwarnings("ignore: Dataset has no geotransform") +@pytest.mark.parametrize("downsampling", (1, 2, 8)) +def test_in_memory_reader(downsampling: int, test_raster: Raster): + out_shape = (16, 16) + reader = chip.InMemoryReader(downsampling) + reader._cache_raster = MagicMock(side_effect=reader._cache_raster) + for offset in (0, 0, 1, 2): + win = Window( + offset * downsampling, # type: ignore + 0, + *(o * downsampling for o in out_shape), + ) + x, m = reader(test_raster, win, out_shape=out_shape) + assert x.shape[1:] == out_shape + assert m.shape[1:] == out_shape + x, m = reader(test_raster, win, out_shape=out_shape) + reader._cache_raster.assert_called_once() + assert reader.rasters[test_raster.id]["data"].shape == ( + RASTER_BANDS, + RASTER_SIZE // downsampling, + RASTER_SIZE // downsampling, + ) diff --git a/src/vibe_lib/tests/test_raster_chipping.py b/src/vibe_lib/tests/test_raster_chipping.py new file mode 100644 index 00000000..420e2e7f --- /dev/null +++ b/src/vibe_lib/tests/test_raster_chipping.py @@ -0,0 +1,117 @@ +from datetime import datetime +from tempfile import TemporaryDirectory + +import numpy as np +import pytest +import xarray as xr +from shapely import geometry as shpg + +from vibe_core.data import Raster +from vibe_lib.raster import save_raster_to_asset +from vibe_lib.spaceeye.chip import ChipDataset +from vibe_lib.spaceeye.dataset import Dims, get_read_intervals, get_write_intervals + +RASTER_SIZE = 256 + + +@pytest.mark.parametrize("dim_size", [500, 10000]) +@pytest.mark.parametrize("chip_ratio", [1, 2, 10, 100]) +@pytest.mark.parametrize("step_ratio", [0.3, 0.5, 1.0]) +@pytest.mark.parametrize("offset", [0, 5000]) +def test_read_intervals(dim_size: int, chip_ratio: int, step_ratio: int, offset: int): + chip_size = dim_size // chip_ratio + step = int(step_ratio * chip_size) + read_start, read_end = get_read_intervals(dim_size, chip_size, step, offset) + assert np.all(read_end > read_start) + # No empty space in reads + assert np.all(read_start[1:] <= read_end[:-1]) + # All windows have the correct size + assert np.all((read_end - read_start) == chip_size) + # Don't make the step larger when adjusting + assert np.all((read_start[1:] - read_start[:-1]) <= step) + # Cover the whole thing + assert read_start[0] == offset + assert read_end[-1] == dim_size + offset + + +@pytest.mark.parametrize("dim_size", [500, 10000]) +@pytest.mark.parametrize("chip_ratio", [1, 2, 10, 100]) +@pytest.mark.parametrize("step_ratio", [0.3, 0.5, 1.0]) +@pytest.mark.parametrize("offset", [0, 5000]) +def test_write_intervals(dim_size: int, chip_ratio: int, step_ratio: int, offset: int): + chip_size = dim_size // chip_ratio + step = int(step_ratio * chip_size) + read_start, read_end = get_read_intervals(dim_size, chip_size, step, offset) + (write_start, write_end), (chip_start, chip_end) = get_write_intervals( + dim_size, chip_size, step, offset + ) + assert np.all(write_end > write_start) + # Chip and window sizes are the same + assert np.allclose(write_end - write_start, chip_end - chip_start) + # No empty space and no intersection in writes + assert np.all(write_start[1:] == write_end[:-1]) + # Don't try to write where we didn't read + assert np.all(write_start >= read_start) + assert np.all(write_end <= read_end) + # Cover the whole thing + assert write_start[0] == offset + assert write_end[-1] == dim_size + offset + + +def test_chip_size_too_large(): + dim_size = 447 + chip_size = 448 + step = 0 + offset = 0 + with pytest.raises(ValueError): + get_read_intervals(dim_size, chip_size, step, offset) + with pytest.raises(ValueError): + get_write_intervals(dim_size, chip_size, step, offset) + + +@pytest.fixture +def tmp_dir_name(): + _tmp_dir = TemporaryDirectory() + yield _tmp_dir.name + _tmp_dir.cleanup() + + +@pytest.fixture() +def test_raster(tmp_dir_name: str): + geom = shpg.mapping(shpg.box(0, 0, RASTER_SIZE, RASTER_SIZE)) + now = datetime.now() + raster_dim = (1, RASTER_SIZE, RASTER_SIZE) + + fake_data = np.zeros(raster_dim).astype(np.float32) + fake_da = xr.DataArray( + fake_data, + coords={ + "bands": np.arange(raster_dim[0]), + "x": np.linspace(0, 1, raster_dim[1]), + "y": np.linspace(0, 1, raster_dim[2]), + }, + dims=["bands", "y", "x"], + ) + fake_da.rio.write_crs("epsg:4326", inplace=True) + + asset = save_raster_to_asset(fake_da, tmp_dir_name) + return Raster(id="1", geometry=geom, time_range=(now, now), bands={}, assets=[asset]) + + +def test_window_smaller_than_chip(test_raster: Raster): + chip_size = RASTER_SIZE // 2 + + # window of size 0.25 * RASTER_SIZE, while chip is 0.5 * RASTER_SIZE + # RoI will need to be adjusted to match chip size + roi_geometry = shpg.Polygon(shpg.box(0.25, 0.25, 0.5, 0.5)) + + dataset = ChipDataset( + rasters=[test_raster], + chip_size=Dims(chip_size, chip_size, 1), + step_size=Dims(chip_size, chip_size, 1), + geometry_or_chunk=roi_geometry, + ) + + assert (dataset.raster_width, dataset.raster_height) == (RASTER_SIZE, RASTER_SIZE) + assert (dataset.width, dataset.height) == (chip_size, chip_size) + assert (dataset.roi_window.width, dataset.roi_window.height) == (chip_size, chip_size) diff --git a/src/vibe_lib/vibe_lib/__init__.py b/src/vibe_lib/vibe_lib/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/vibe_lib/vibe_lib/airbus.py b/src/vibe_lib/vibe_lib/airbus.py new file mode 100644 index 00000000..e7002c3a --- /dev/null +++ b/src/vibe_lib/vibe_lib/airbus.py @@ -0,0 +1,234 @@ +import json +import os +import time +from datetime import datetime +from enum import auto +from typing import Any, Dict, List, Sequence, Tuple +from zipfile import ZipFile + +import requests +from fastapi_utils.enums import StrEnum +from shapely import geometry as shpg +from shapely.geometry.base import BaseGeometry + +from vibe_core.file_downloader import download_file + +from .geometry import wgs_to_utm + +DEFAULT_DELAY = 60 +DEFAULT_TIMEOUT = 1200 +IMAGE_FORMAT = "image/jp2" +LIVING_ATLAS_PROCESSING_LEVEL = "SENSOR" +PRODUCT_TYPE = "pansharpened" +RADIOMETRIC_PROCESSING = "DISPLAY" + + +class Constellation(StrEnum): + SPOT = auto() + PHR = auto() + PNEO = auto() + + +class GeometryRelation(StrEnum): + intersects = auto() + contains = auto() + + +class OrderStatus(StrEnum): + ordered = auto() + delivered = auto() + + +class AirBusAPI: + authentication_url: str = ( + "https://authenticate.foundation.api.oneatlas.airbus.com/" + "auth/realms/IDP/protocol/openid-connect/token" + ) + search_url: str = "https://search.foundation.api.oneatlas.airbus.com/api/v2/opensearch" + price_url: str = "https://data.api.oneatlas.airbus.com/api/v1/prices" + order_url: str = "https://data.api.oneatlas.airbus.com/api/v1/orders" + item_url: str = "https://access.foundation.api.oneatlas.airbus.com/api/v1/items" + + def __init__( + self, + api_key: str, + projected_crs: bool, + constellations: List[Constellation], + delay: float = DEFAULT_DELAY, + timeout: float = DEFAULT_TIMEOUT, + ): + self.api_key = api_key + self.token = self._authenticate() + self.projected_crs = projected_crs + self.constellations = constellations + self.delay = delay # in seconds + self.timeout = timeout + + @staticmethod + def _get_api_key(api_key_filepath: str) -> str: + with open(api_key_filepath) as f: + return f.read().strip() + + def _get(self, url: str, **kwargs: Any) -> Dict[str, Any]: + response = requests.get(url, **kwargs) + response.raise_for_status() + return json.loads(response.text) + + def _post(self, url: str, **kwargs: Any) -> Dict[str, Any]: + response = requests.post(url, **kwargs) + response.raise_for_status() + return json.loads(response.text) + + def _authenticate(self): + headers = {"Content-Type": "application/x-www-form-urlencoded"} + data = [ + ("apikey", self.api_key), + ("grant_type", "api_key"), + ("client_id", "IDP"), + ] + response = self._post(self.authentication_url, headers=headers, data=data) + return response["access_token"] + + def _get_workspace_id(self) -> str: + headers = {"Authorization": f"Bearer {self.token}", "Cache-Control": "no-cache"} + response = self._get("https://data.api.oneatlas.airbus.com/api/v1/me", headers=headers) + return response["contract"]["workspaceId"] + + def _search(self, payload: Dict[str, Any]) -> List[Dict[str, Any]]: + headers = { + "Authorization": f"Bearer {self.token}", + "Cache-Control": "no-cache", + "Content-Type": "application/json", + } + + response = self._post(self.search_url, headers=headers, json=payload) + products = [{**r["properties"], "geometry": r["geometry"]} for r in response["features"]] + return products + + def query( + self, + geometry: BaseGeometry, + date_range: Tuple[datetime, datetime], + max_cloud_cover: int, + my_workspace: bool = False, + ) -> List[Dict[str, Any]]: + """ + Only get results that contain all the geometry (instead of intersecting) + + constellations + PNEO 0.3m (Neo Pléiades) + SPOT 1.5m + PHR 0.5m (Pléiades) + + Cloud cover values used for filtering are for the whole product + irrespective of the given geometry 😢 + """ + + formatted_date = ",".join( + [dt.astimezone().isoformat().replace("+00:00", "Z") for dt in date_range] + ) + payload: Dict[str, str] = { + "geometry": shpg.mapping(geometry), + "acquisitionDate": f"[{formatted_date}]", + "constellation": ",".join(self.constellations), + "cloudCover": f"[0,{max_cloud_cover:d}]", + "relation": GeometryRelation.intersects if my_workspace else GeometryRelation.contains, + } + if my_workspace: + payload["workspace"] = self._get_workspace_id() + else: + payload["processingLevel"] = LIVING_ATLAS_PROCESSING_LEVEL + + return self._search(payload) + + def query_owned(self, geometry: BaseGeometry, acquisition_id: str) -> List[Dict[str, Any]]: + """ + Query workspace for owned products that match the reference product + """ + payload: Dict[str, str] = { + "acquisitionIdentifier": acquisition_id, + "geometry": shpg.mapping(geometry), + "relation": GeometryRelation.intersects, + "workspace": self._get_workspace_id(), + } + return self._search(payload) + + def get_product_by_id(self, product_id: str) -> Dict[str, Any]: + payload: Dict[str, str] = {"id": product_id} + return self._search(payload)[0] + + def _get_order_params( + self, product_ids: Sequence[str], roi: BaseGeometry + ) -> Tuple[Dict[str, Any], Dict[str, Any]]: + headers = { + "Authorization": f"Bearer {self.token}", + "Cache-Control": "no-cache", + "Content-Type": "application/json", + } + epsg_code = wgs_to_utm(roi) if self.projected_crs else "4326" + payload = { + "kind": "order.data.gb.product", + "products": [ + { + "crsCode": f"urn:ogc:def:crs:EPSG::{epsg_code}", + "productType": PRODUCT_TYPE, + "radiometricProcessing": RADIOMETRIC_PROCESSING, + "aoi": shpg.mapping(roi), + "id": pid, + "imageFormat": IMAGE_FORMAT, + } + for pid in product_ids + ], + } + return headers, payload + + def get_price(self, product_ids: Sequence[str], roi: BaseGeometry) -> Dict[str, Any]: + headers, payload = self._get_order_params(product_ids, roi) + + response = self._post(self.price_url, headers=headers, json=payload) + return response + + def place_order(self, product_ids: Sequence[str], roi: BaseGeometry) -> Dict[str, Any]: + headers, payload = self._get_order_params(product_ids, roi) + + response = self._post(self.order_url, headers=headers, json=payload) + return response + + def get_order_by_id(self, order_id: str) -> Dict[str, Any]: + headers = {"Authorization": f"Bearer {self.token}"} + return self._get(f"{self.order_url}/{order_id}", headers=headers) + + def block_until_order_delivered(self, order_id: str) -> Dict[str, Any]: + start = time.time() + order = self.get_order_by_id(order_id) + while order["status"] != OrderStatus.delivered: + try: + OrderStatus(order["status"]) + except ValueError: + raise ValueError( + f"Received unexpected status {order['status']} from order {order_id}" + ) + waiting_time = time.time() - start + if waiting_time > self.timeout: + raise RuntimeError( + f"Timed out after {waiting_time:.1f}s waiting for order {order_id}" + ) + time.sleep(self.delay) + order = self.get_order_by_id(order_id) + return order + + def download_product(self, product_id: Sequence[str], out_dir: str) -> str: + headers = {"Authorization": f"Bearer {self.token}"} + + download_url = f"{self.item_url}/{product_id}/download" + zip_path = os.path.join(out_dir, f"{product_id}.zip") + + download_file(download_url, zip_path, headers=headers) + with ZipFile(zip_path) as zf: + zip_member = [f for f in zf.filelist if f.filename.endswith(".JP2")][0] + # Trick to extract file without the whole directory tree + # https://stackoverflow.com/questions/4917284/ + zip_member.filename = os.path.basename(zip_member.filename) + filepath = zf.extract(zip_member, path=out_dir) + + return filepath diff --git a/src/vibe_lib/vibe_lib/archive.py b/src/vibe_lib/vibe_lib/archive.py new file mode 100644 index 00000000..d984f82c --- /dev/null +++ b/src/vibe_lib/vibe_lib/archive.py @@ -0,0 +1,28 @@ +import os +import shutil + + +def create_flat_archive(directory_path: str, archive_name: str) -> str: + """Create a flat file directory zip archive containing all files under the given directory. + Traverses subdirectories to find all files. + + Args: + directory_path: directory to archive + archive_name: name to give the archive (without .zip extension) + + Returns: + Path to zipped archive containing all files at the root level + """ + files_to_move = [] + for dirpath, _, filenames in os.walk(directory_path): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + files_to_move.append(filepath) + + archive_dir = os.path.join(directory_path, archive_name) + os.mkdir(archive_dir) + for file in files_to_move: + shutil.move(file, archive_dir) + + archive_path = os.path.join(directory_path, archive_name) + return shutil.make_archive(archive_path, "zip", archive_dir) diff --git a/src/vibe_lib/vibe_lib/bing_maps.py b/src/vibe_lib/vibe_lib/bing_maps.py new file mode 100644 index 00000000..d23863f3 --- /dev/null +++ b/src/vibe_lib/vibe_lib/bing_maps.py @@ -0,0 +1,229 @@ +""" +BingMaps API interface and auxiliary method to query tiles, download basemaps, +and manipulate between lat-lon coordinates and tile x-y coordinates. Part of the code +is adapted from the following source: +https://learn.microsoft.com/en-us/bingmaps/articles/bing-maps-tile-system +""" + +import logging +from datetime import datetime +from typing import Dict, List, Tuple, cast + +import numpy as np +import requests +import shapely.geometry as shpg +from pystac.item import Item + +from vibe_core.data import BBox + +MIN_LATITUDE = -85.05112878 +MAX_LATITUDE = 85.05112878 +MIN_LONGITUDE = -180 +MAX_LONGITUDE = 180 +MIN_ZOOM_LEVEL = 1 +MAX_ZOOM_LEVEL = 20 +NO_TILE_AVALABILITY_KEY, NO_TILE_AVAILABILITY_VALUE = "X-VE-Tile-Info", "no-tile" +LOGGER = logging.getLogger(__name__) + + +def tile_xy_from_latlon(lat: float, lon: float, zoom_level: int) -> Tuple[int, int]: + """ + Get the tile x-y coordinates given a lat/lon pair and a zoom level. + """ + # Clip lat/lon to the valid range + lat = min(max(lat, MIN_LATITUDE), MAX_LATITUDE) + lon = min(max(lon, MIN_LONGITUDE), MAX_LONGITUDE) + + # Compute the world map size in pixels for a zoom level + map_size = 256 * (2**zoom_level) + + # Calculate x-y coordinates from the lat/lon (x-y are float values + # representing positions as ratio of the map size) + x = (lon + 180) / 360 + sin_lat = np.sin(lat * np.pi / 180) + y = 0.5 - np.log((1 + sin_lat) / (1 - sin_lat)) / (4 * np.pi) + + # Transform x-y coordinates to pixel positions and clip to a valid range + pixel_x = min(max(x * map_size, 0), map_size - 1) + pixel_y = min(max(y * map_size, 0), map_size - 1) + + # As each tile is 256x256 pixels, get tile x-y coordinates from pixel coordinates + tile_x = int(np.floor(pixel_x / 256)) + tile_y = int(np.floor(pixel_y / 256)) + + return tile_x, tile_y + + +def latlon_from_tile_xy(tile_x: int, tile_y: int, zoom_level: int) -> Tuple[float, float]: + """ + Given a tile x-y coordinates and a zoom level, return the lat/lon pair of the + tile's upper-left corner. + """ + + # Compute the world map size in pixels for a zoom level + map_size = 256 * (2**zoom_level) + + # Get upper-left corner pixel coordinates for the tile + pixel_x = tile_x * 256 + pixel_y = tile_y * 256 + + # Calculate x-y coordinates from pixel coordinates (x-y are float values + # representing positions as ratio of the map size) + x = min(max(pixel_x, 0), map_size - 1) / map_size - 0.5 + y = 0.5 - min(max(pixel_y, 0), map_size - 1) / map_size + + # Convert x-y coordinates to lat/lon + lat = 90 - 360 * np.arctan(np.exp(-y * 2 * np.pi)) / np.pi + lon = 360 * x + + return lat, lon + + +def tiles_from_bbox(bbox: BBox, zoom_level: int) -> List[Tuple[int, int]]: + """ + Get a list of tile x-y coordinates for all tiles covering the given bounding box + for a given zoom level. + """ + lon_bottom_left, lat_bottom_left, lon_top_right, lat_top_right = bbox + + # Get tile x-y coordinates for the bottom-left and top-right corners of the bbox + tile_x_bottom_left, tile_y_bottom_left = tile_xy_from_latlon( + lat_bottom_left, lon_bottom_left, zoom_level + ) + + # Do the same for the top-right corner of the bbox + tile_x_top_right, tile_y_top_right = tile_xy_from_latlon( + lat_top_right, lon_top_right, zoom_level + ) + + tiles = [ + (tile_x, tile_y) + for tile_x in range(tile_x_bottom_left, tile_x_top_right + 1) + for tile_y in range( + tile_y_top_right, tile_y_bottom_left + 1 + ) # top-right to bottom-left instead because y-axis is inverted + ] + return tiles + + +def quadkey_from_tile_xy(tile_x: int, tile_y: int, zoom_level: int) -> str: + """ + Build the quadkey string that uniquely identifies a tile with x-y coordinates + for a given zoom level. + + For more information, please refer to the 'Tile Coordinates and Quadkeys' section of + https://learn.microsoft.com/en-us/bingmaps/articles/bing-maps-tile-system + """ + quadkey = "" + for i in range(zoom_level, 0, -1): + digit = 0 + mask = 1 << (i - 1) + if tile_x & mask: + digit += 1 + if tile_y & mask: + digit += 2 + quadkey += str(digit) + return quadkey + + +def get_geometry_for_tile(tile_x: int, tile_y: int, zoom_level: int) -> shpg.Polygon: + """ + Get the geometry of the tile with x-y coordinates for a given zoom level. + """ + # Max lat, min lon because it is the upper-left corner of the tile + max_lat, min_lon = latlon_from_tile_xy(tile_x, tile_y, zoom_level) + # Min lat, max lon because it is the bottom-right corner of the tile + # (computed as the upper-left of x+1, y+1) + min_lat, max_lon = latlon_from_tile_xy(tile_x + 1, tile_y + 1, zoom_level) + bbox = shpg.box(min_lon, min_lat, max_lon, max_lat) + return bbox + + +def tile_is_available(url: str) -> bool: + """ + Make a request to BingMaps API to verify if tile represented by url is available for download. + """ + with requests.get(url, stream=True) as r: + try: + r.raise_for_status() + headers = cast(Dict[str, str], r.headers) + return (NO_TILE_AVALABILITY_KEY not in headers) or ( + headers[NO_TILE_AVALABILITY_KEY] != NO_TILE_AVAILABILITY_VALUE + ) + except requests.HTTPError: + error_details = r.json()["errorDetails"] + raise ValueError("Error when verifying tile availablity: " + "\n".join(error_details)) + + +class BingMapsCollection: + """ + BingMaps collection interface to query tiles and download basemaps. + Reference: https://learn.microsoft.com/en-us/bingmaps/articles/bing-maps-tile-system + """ + + METADATA_URL: str = ( + "http://dev.virtualearth.net/REST/V1/Imagery/Metadata/Aerial" + "?output=json&include=ImageryProviders&key={BING_MAPS_API_KEY}" + ) + + def __init__(self, api_key: str): + if not api_key: + raise ValueError("No API key provided.") + self.api_key = api_key + self.tile_download_url, self.subdomains = self.get_download_url_and_subdomains() + + def get_download_url_and_subdomains(self) -> Tuple[str, List[str]]: + """Fetch the download URL and subdomains using BingMaps API.""" + try: + with requests.get(self.METADATA_URL.format(BING_MAPS_API_KEY=self.api_key)) as r: + r.raise_for_status() + metadata = r.json() + url = metadata["resourceSets"][0]["resources"][0]["imageUrl"] + subdomains = metadata["resourceSets"][0]["resources"][0]["imageUrlSubdomains"] + return url, subdomains + except (requests.HTTPError, requests.ConnectionError) as e: + raise ValueError("Error when retrieving Bing Maps metadata.") from e + + def query_tiles(self, roi: BBox, zoom_level: int) -> List[Item]: + """Query the collection for tiles that intersect with the given bounding box.""" + tiles = tiles_from_bbox(roi, zoom_level) + + items = [] + for subdomain_idx, tile in enumerate(tiles): + tile_x, tile_y = tile + subdomain = self.subdomains[subdomain_idx % len(self.subdomains)] + + quadkey = quadkey_from_tile_xy(tile_x, tile_y, zoom_level) + url = self.tile_download_url.format( + quadkey=quadkey, + api_key=self.api_key, + subdomain=subdomain, + ) + if tile_is_available(url): + geometry = get_geometry_for_tile(tile_x, tile_y, zoom_level) + item = Item( + id=quadkey, + geometry=shpg.mapping(geometry), + bbox=list(geometry.bounds), + datetime=datetime.now(), + properties={"url": url}, + ) + items.append(item) + else: + LOGGER.info( + f"Tile {quadkey} (x {tile_x}, y {tile_y}, ZL {zoom_level}) " + "is not available for download. Skipping it." + ) + return items + + def download_tile(self, url: str, out_path: str): + """Download a tile from the given URL.""" + with requests.get(url, stream=True) as r: + try: + r.raise_for_status() + with open(out_path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + except requests.HTTPError: + error_details = r.json()["errorDetails"] + raise ValueError("Error when downloading basemap: " + "\n".join(error_details)) diff --git a/src/vibe_lib/vibe_lib/climatology_lab.py b/src/vibe_lib/vibe_lib/climatology_lab.py new file mode 100644 index 00000000..9e8d2f17 --- /dev/null +++ b/src/vibe_lib/vibe_lib/climatology_lab.py @@ -0,0 +1,93 @@ +import hashlib +from datetime import datetime +from typing import List, Tuple + +import shapely.geometry as shpg +from pystac.item import Item + +from vibe_core.file_downloader import verify_url + + +class ClimatologyLabCollection: + asset_keys: List[str] + download_url: str + geometry_box: Tuple[float, float, float, float] + + def check_url_variable_year(self, variable: str, year: int) -> bool: + url = self.download_url.format(variable, year) + return verify_url(url) + + def query(self, variable: str, time_range: Tuple[datetime, datetime]) -> List[Item]: + start_date, end_date = time_range + year_range = range(start_date.year, end_date.year + 1) + + items = [ + self._create_item(variable, year) + for year in year_range + if self.check_url_variable_year(variable, year) + ] + return items + + def _create_item(self, variable: str, year: int) -> Item: + url = self.download_url.format(variable, year) + + item = Item( + id=hashlib.sha256(f"{variable}_{year}".encode()).hexdigest(), + geometry=shpg.mapping(shpg.box(*self.geometry_box)), + bbox=self.geometry_box, # type: ignore + datetime=datetime(year, 1, 1), + properties={"variable": variable, "url": url}, + ) + + return item + + +class TerraClimateCollection(ClimatologyLabCollection): + asset_keys: List[str] = [ + "aet", + "def", + "pet", + "ppt", + "q", + "soil", + "srad", + "swe", + "tmax", + "tmin", + "vap", + "ws", + "vpd", + "PDSI", + ] + + download_url = "https://climate.northwestknowledge.net/TERRACLIMATE-DATA/TerraClimate_{}_{}.nc" + geometry_box = (-180, -90, 180, 90) + + +class GridMETCollection(ClimatologyLabCollection): + asset_keys: List[str] = [ + "bi", + "erc", + "etr", + "fm1000", + "fm100", + "pet", + "pr", + "rmax", + "rmin", + "sph", + "srad", + "th", + "tmmn", + "tmmx", + "vpd", + "vs", + ] + + download_url = "https://www.northwestknowledge.net/metdata/data/{}_{}.nc" + geometry_box = ( + -124.76666663333334, + 25.066666666666666, + -67.05833330000002, + 49.400000000000006, + ) # Geometry for contiguous US (from gridMET products) diff --git a/src/vibe_lib/vibe_lib/comet_farm/__init__.py b/src/vibe_lib/vibe_lib/comet_farm/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/vibe_lib/vibe_lib/comet_farm/comet_model.py b/src/vibe_lib/vibe_lib/comet_farm/comet_model.py new file mode 100644 index 00000000..2a51400e --- /dev/null +++ b/src/vibe_lib/vibe_lib/comet_farm/comet_model.py @@ -0,0 +1,157 @@ +from typing import Any, Dict, List, Union + +from pydantic import BaseModel, Field + + +class MapUnit(BaseModel): + id: str = Field(alias="@id") + area: Union[None, str] = Field(alias="@area") + year: Union[None, str] = Field(alias="Year") + inputCrop: Union[None, str] = Field(alias="InputCrop") + irrigated: Union[None, str] = Field(alias="Irrigated") + agcprd: Union[None, str] + abgdefac: Union[None, str] + accrste_1_: Union[None, str] + crpval: Union[None, str] + rain: Union[None, str] + cgrain: Union[None, str] + cinput: Union[None, str] + eupacc_1_: Union[None, str] + fertot_1_1_: Union[None, str] + fertac_1_: Union[None, str] + irrtot: Union[None, str] + metabe_1_1_: Union[None, str] + metabe_2_1_: Union[None, str] + nfixac: Union[None, str] + omadae_1_: Union[None, str] + petann: Union[None, str] + stdede_1_: Union[None, str] + struce_1_1_: Union[None, str] + struce_2_1_: Union[None, str] + tnetmn_1_: Union[None, str] + tminrl_1_: Union[None, str] + gromin_1_: Union[None, str] + somse_1_: Union[None, str] + somsc: Union[None, str] + strmac_2_: Union[None, str] + volpac: Union[None, str] + aagdefac: Union[None, str] + accrst: Union[None, str] + aglivc: Union[None, str] + bgdefac: Union[None, str] + bglivcm: Union[None, str] + crmvst: Union[None, str] + crootc: Union[None, str] + fbrchc: Union[None, str] + frootcm: Union[None, str] + metabc_1_: Union[None, str] + metabc_2_: Union[None, str] + omadac: Union[None, str] + rlwodc: Union[None, str] + stdedc: Union[None, str] + strmac_1_: Union[None, str] + strmac_6_: Union[None, str] + strucc_1_: Union[None, str] + n2oflux: Union[None, str] + annppt: Union[None, str] + noflux: Union[None, str] + + class Config: + allow_population_by_field_name = True + + +class CarbonResponse(BaseModel): + soilCarbon: str = Field(alias="SoilCarbon") + biomassBurningCarbon: str = Field(alias="BiomassBurningCarbon") + soilCarbonStock2000: str = Field(alias="SoilCarbonStock2000") + soilCarbonStockBegin: str = Field(alias="SoilCarbonStockBegin") + soilCarbonStockEnd: str = Field(alias="SoilCarbonStockEnd") + + class Config: + allow_population_by_field_name = True + + +class Co2Response(BaseModel): + limingCO2: str = Field(alias="LimingCO2") + ureaFertilizationCO2: str = Field(alias="UreaFertilizationCO2") + drainedOrganicSoilsCO2: str = Field(alias="DrainedOrganicSoilsCO2") + + class Config: + allow_population_by_field_name = True + + +class N2OResponse(BaseModel): + soilN2O: str = Field(alias="SoilN2O") + soilN2O_Direct: str = Field(alias="SoilN2O_Direct") + soilN2O_Indirect_Volatilization: str = Field(alias="SoilN2O_Indirect_Volatilization") + soilN2O_Indirect_Leaching: str = Field(alias="SoilN2O_Indirect_Leaching") + wetlandRiceCultivationN2O: str = Field(alias="WetlandRiceCultivationN2O") + biomassBurningN2O: str = Field(alias="BiomassBurningN2O") + drainedOrganicSoilsN2O: str = Field(alias="DrainedOrganicSoilsN2O") + + class Config: + allow_population_by_field_name = True + + +class CH4Response(BaseModel): + soilCH4: str = Field(alias="SoilCH4") + wetlandRiceCultivationCH4: str = Field(alias="WetlandRiceCultivationCH4") + biomassBurningCH4: str = Field(alias="BiomassBurningCH4") + + class Config: + allow_population_by_field_name = True + + +class CometOutput(BaseModel): + name: str = Field(alias="@name") + carbon: CarbonResponse = Field(alias="Carbon") + co2: Co2Response = Field(alias="CO2") + n20: N2OResponse = Field(alias="N2O") + ch4: CH4Response = Field(alias="CH4") + + class Config: + allow_population_by_field_name = True + + +class ScenarioMapUnit(BaseModel): + name: str = Field(alias="@name") + mapUnit: Union[List[MapUnit], MapUnit] = Field(alias="MapUnit") + + class Config: + allow_population_by_field_name = True + + +class ModelRunChild(BaseModel): + name: str = Field(alias="@name") + scenario: List[Union[ScenarioMapUnit, CometOutput]] = Field(alias="Scenario") + + class Config: + allow_population_by_field_name = True + + +class ModelRun(BaseModel): + modelRun: ModelRunChild = Field(alias="ModelRun") + + class Config: + allow_population_by_field_name = True + + +class CometDay(BaseModel): + cometEmailID: str = Field(alias="@cometEmailId") + cFARMVersion: str = Field(alias="@CFARMVersion") + cropland: ModelRun = Field(alias="Cropland") + + class Config: + allow_population_by_field_name = True + + +class CometResponse(BaseModel): + day: CometDay = Field(alias="Day") + + class Config: + allow_population_by_field_name = True + + +class CarbonOffset(BaseModel): + id: str + data: Dict[str, Any] diff --git a/src/vibe_lib/vibe_lib/comet_farm/comet_requester.py b/src/vibe_lib/vibe_lib/comet_farm/comet_requester.py new file mode 100644 index 00000000..8947fb18 --- /dev/null +++ b/src/vibe_lib/vibe_lib/comet_farm/comet_requester.py @@ -0,0 +1,58 @@ +import json +from queue import Queue +from typing import Any, Dict, Optional + +import xmltodict +from pyngrok import ngrok + +from vibe_lib.comet_farm.comet_model import CometOutput, CometResponse +from vibe_lib.comet_farm.comet_server import CometHTTPServer, CometServerParameters + +TIMEOUT_IN_SECONDS = 120 + + +class CometRequester: + def __init__(self, comet_request: CometServerParameters): + self.comet_request = comet_request + + def get_comet_raw_output(self, queue: "Queue[str]") -> str: + return queue.get(timeout=TIMEOUT_IN_SECONDS * 60) + + def parse_comet_response(self, raw_comet_response: str) -> Dict[str, Any]: + comet_xml = xmltodict.parse(raw_comet_response) + comet_json = json.loads(json.dumps(comet_xml)) + return comet_json + + def run_comet_request(self, request_str: str) -> str: + queue: "Queue[str]" = Queue() + server = CometHTTPServer(queue, self.comet_request, request_str) + comet_response = "" + try: + server.start() + comet_response = self.get_comet_raw_output(queue) + comet_json = self.parse_comet_response(comet_response) + + carbon_offset: Optional[str] = None + # deriving the carbon offset + cr = CometResponse(**comet_json) + cLand = cr.day.cropland + for scenario in cLand.modelRun.scenario: + if type(scenario) == CometOutput and "scenario" in scenario.name: + co = CometOutput(**scenario.dict()) + carbon_offset = co.carbon.soilCarbon + " Mg Co2e/year" + break + + if carbon_offset is None: + raise RuntimeError("Missing carbon offset from COMET-Farm API") + + return carbon_offset + except Exception as err: + raise RuntimeError( + f"Error when building comet response. Comet Response: {comet_response}" + ) from err + finally: + server.shutdown() + try: + ngrok.kill() + except Exception: + pass diff --git a/src/vibe_lib/vibe_lib/comet_farm/comet_server.py b/src/vibe_lib/vibe_lib/comet_farm/comet_server.py new file mode 100644 index 00000000..6fd4dab3 --- /dev/null +++ b/src/vibe_lib/vibe_lib/comet_farm/comet_server.py @@ -0,0 +1,119 @@ +import io +import logging +import os +import traceback +import uuid +from http import HTTPStatus +from http.server import BaseHTTPRequestHandler, HTTPServer +from queue import Queue +from tempfile import TemporaryDirectory +from threading import Thread +from typing import Any, Optional, cast + +import requests +from pydantic.main import BaseModel +from pyngrok import conf, ngrok + +HTTP_SERVER_PORT: int = 1108 +HTTP_SERVER_HOST: str = "0.0.0.0" + + +class CometServerParameters(BaseModel): + url: str + webhook: str + ngrokToken: str + supportEmail: str + + +class CometHTTPServer(Thread): + def __init__( + self, outqueue: "Queue[str]", comet_request: CometServerParameters, request_str: str + ): + def handler(*args: Any, **kwargs: Any): + return CometHTTPRequestHandler(outqueue, *args, **kwargs) + + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self.outqueue = outqueue + self.comet_request = comet_request + self.ngrok_token = comet_request.ngrokToken + self.server = HTTPServer((HTTP_SERVER_HOST, HTTP_SERVER_PORT), handler) + self.tunnel: Optional[Any] = None + self.tmpdir = TemporaryDirectory() + self.ngrok_config = conf.get_default() + self.ngrok_config.ngrok_path = os.path.join(self.tmpdir.name, "ngrok") # type: ignore + self.started_server = False + self.request_str = request_str + + super().__init__() + + def start_ngrok(self): + ngrok.set_auth_token(self.ngrok_token, self.ngrok_config) + self.tunnel = ngrok.connect(HTTP_SERVER_PORT, bind_tls=True) + self.comet_request.webhook = self.tunnel.public_url + + def submit_job(self, xml_string: str, reference_id: str = ""): + xml_file = io.StringIO(xml_string) + postUrl = self.comet_request.url + webhookUrl = self.comet_request.webhook + "/" + reference_id + + payload = { + "LastCropland": "-1", + "FirstCropland": "-1", + "email": self.comet_request.supportEmail, + "url": webhookUrl, + "LastDaycentInput": "0", + "FirstDaycentInput": "0", + } + + files = {"file": ("file.xml", xml_file, "application/xml")} + headers = {} + + self.logger.info(f"Submitting {payload} to COMET-Farm API") + r = requests.request("POST", postUrl, headers=headers, data=payload, files=files) + + # raise exception on error + r.raise_for_status() + + return r.text + + def run(self): + try: + self.start_ngrok() + request_id = str(uuid.uuid4()) + self.submit_job(self.request_str, reference_id=request_id) + self.started_server = True + self.server.serve_forever() + except Exception: + self.outqueue.put(f"Failed to submit job to COMET-Farm API: {traceback.format_exc()}") + raise + + def shutdown(self): + if self.started_server: + self.server.shutdown() + if self.tunnel is not None: + ngrok.disconnect(self.tunnel.public_url) + self.tmpdir.cleanup() + + +class CometHTTPRequestHandler(BaseHTTPRequestHandler): + def __init__(self, outqueue: "Queue[str]", *args: Any, **kwargs: Any): + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self.outqueue = outqueue + super().__init__(*args, **kwargs) + + def _send_ok(self): + self.send_response(HTTPStatus.OK) + self.send_header("Content-Type", "text/plain") + self.end_headers() + self.wfile.write(b"OK") + + def do_POST(self): + content_len_str = cast(str, self.headers.get("Content-Length")) + content_len = int(content_len_str, 0) + post_body = self.rfile.read(content_len).decode("utf-8") + self.logger.info(f"Received data {post_body} from COMET-Farm API") + self.outqueue.put(post_body) + self._send_ok() + + def do_GET(self): + self._send_ok() diff --git a/src/vibe_lib/vibe_lib/deepmc/encoder.py b/src/vibe_lib/vibe_lib/deepmc/encoder.py new file mode 100644 index 00000000..c0dacfcd --- /dev/null +++ b/src/vibe_lib/vibe_lib/deepmc/encoder.py @@ -0,0 +1,71 @@ +from typing import Optional + +from torch import Tensor, nn + +from .helpers import point_wise_feed_forward_network, positional_encoding +from .transform import MultiHeadAttention + + +class EncoderLayer(nn.Module): + def __init__(self, d_model: int, num_heads: int, d_ff: int, rate: float): + super().__init__() + self.mha = MultiHeadAttention(d_model, num_heads) + self.ffn = point_wise_feed_forward_network( + in_features=d_model, out_features=d_model, d_ff=d_ff + ) + self.layernorm1 = nn.LayerNorm(d_model, eps=1e-6) + self.layernorm2 = nn.LayerNorm(d_model, eps=1e-6) + + self.dropout1 = nn.Dropout(rate) + self.dropout2 = nn.Dropout(rate) + + def forward(self, x: Tensor, mask: Tensor): + attn_output = self.mha(x, x, x, mask) # (batch_size, input_seq_len, d_model) + attn_output = self.dropout1(attn_output) + out1 = self.layernorm1(x + attn_output) # (batch_size, input_seq_len, d_model) + + ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model) + ffn_output = self.dropout2(ffn_output) + out2 = self.layernorm2(out1 + ffn_output) # (batch_size, input_seq_len, d_model) + + return out2 + + +class Encoder(nn.Sequential): + def __init__( + self, + in_features: int, + num_layers: int, + d_model: int, + num_heads: int, + d_ff: int, + max_seq_len: int, + dropout: float = 0.1, + ): + super().__init__() + + self.d_model = d_model + self.num_layers = num_layers + self.embedding = nn.Sequential(nn.Linear(in_features, self.d_model), nn.ReLU()) + self.pos_encoding = positional_encoding(max_seq_len, d_model) + + self.enc_layers = nn.ModuleList( + [EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)] + ) + + self.dropout = nn.Dropout(dropout) + + def forward(self, x: Tensor, mask: Optional[Tensor] = None): + seq_len = x.size(1) + + # adding embedding and position encoding. + x = self.embedding(x) # (batch_size, input_seq_len, d_model) + x = x * self.d_model**0.5 + x = x + self.pos_encoding[:, :seq_len, :] + + x = self.dropout(x) + + for enc_layer in self.enc_layers: + x = enc_layer(x, mask) + + return x # (batch_size, input_seq_len, d_model) diff --git a/src/vibe_lib/vibe_lib/deepmc/helpers.py b/src/vibe_lib/vibe_lib/deepmc/helpers.py new file mode 100644 index 00000000..cec74153 --- /dev/null +++ b/src/vibe_lib/vibe_lib/deepmc/helpers.py @@ -0,0 +1,51 @@ +from typing import Any, Optional + +import numpy as np +import torch +import torch.nn as nn +from numpy._typing import NDArray +from torch import Tensor +from torch.nn import Sequential + + +def get_angles(pos: NDArray[Any], i: NDArray[Any], d_model: int): + angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model)) + return pos * angle_rates + + +def positional_encoding(position: int, d_model: int) -> Tensor: + angle_rads = get_angles( + np.arange(position)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model + ) + + # apply sin to even indices in the array; 2i + angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2]) + + # apply cos to odd indices in the array; 2i+1 + angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2]) + + pos_encoding = angle_rads[np.newaxis, ...] + + return torch.tensor(pos_encoding, dtype=torch.float32) + + +def attn( + q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, mask: Optional[torch.Tensor] = None +) -> Tensor: + sim = torch.einsum("b i d, b j d -> b i j", q, k) + + if mask is not None: + max_neg_value = -torch.finfo(sim.dtype).max + sim.masked_fill_(~mask, max_neg_value) + + attn = sim.softmax(dim=-1) + out = torch.einsum("b i j, b j d -> b i d", attn, v) + return out + + +def point_wise_feed_forward_network(in_features: int, out_features: int, d_ff: int) -> Sequential: + return Sequential( + nn.Linear(in_features, d_ff), + nn.ReLU(), + nn.Linear(d_ff, out_features), + ) diff --git a/src/vibe_lib/vibe_lib/deepmc/locally_connected.py b/src/vibe_lib/vibe_lib/deepmc/locally_connected.py new file mode 100644 index 00000000..b9dc8e80 --- /dev/null +++ b/src/vibe_lib/vibe_lib/deepmc/locally_connected.py @@ -0,0 +1,58 @@ +from typing import Optional, Tuple, Union + +import torch +from torch import nn +from torch.nn import functional as F +from torch.nn.parameter import Parameter +from torch.types import _dtype + + +class LocallyConnected1d(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + seq_len: int, + kernel_size: int, + stride: int = 1, + padding: Union[int, Tuple[int, int]] = 0, + bias: bool = True, + device: Optional[str] = None, + dtype: Optional[_dtype] = None, + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.seq_len = seq_len + self.kernel_size = kernel_size + self.stride = stride + self.padding = (padding, padding) if isinstance(padding, int) else padding + out_seq_len = (seq_len + sum(self.padding) - (kernel_size - 1) - 1) // stride + 1 + self.weight = Parameter( + torch.empty( + (in_channels, out_channels, kernel_size, out_seq_len), # type: ignore + device=device, + dtype=dtype, # type: ignore + ) + ) + + if bias: + self.bias = Parameter((torch.empty(out_channels, out_seq_len))) + else: + self.register_parameter("bias", None) + + self.reset_parameters() + + def reset_parameters(self): + # Do normal initialization for now, but can use something smarter + nn.init.normal_(self.weight, std=0.1) + if self.bias is not None: + nn.init.normal_(self.bias, std=0.1) + + def forward(self, x: torch.Tensor): + x = F.pad(x, self.padding) + x = x.unfold(-1, self.kernel_size, self.stride) + x = torch.einsum("b i l k, i o k l -> bol", x, self.weight) + if self.bias is not None: + x = x + self.bias + return x diff --git a/src/vibe_lib/vibe_lib/deepmc/models.py b/src/vibe_lib/vibe_lib/deepmc/models.py new file mode 100644 index 00000000..99fab6d8 --- /dev/null +++ b/src/vibe_lib/vibe_lib/deepmc/models.py @@ -0,0 +1,126 @@ +from typing import Any, List, Tuple, Union + +import torch +import torch.nn.functional as F +from einops.layers.torch import Rearrange +from torch import nn + +from .encoder import Encoder +from .locally_connected import LocallyConnected1d + + +class MyLSTM(nn.LSTM): + def forward(self, *args: Any, **kwargs: Any): + return super().forward(*args, **kwargs)[0] + + +class DeepMCModel(nn.Module): + def __init__( + self, + first_channels: int, # 3 + rest_channels: int, # 1 + first_encoder_channels: int, # 3 + rest_encoder_channels: Tuple[int, int, int], # [4, 8, 16] + sequence_length: int, # 24 + kernel_size: int, # 2 + num_inputs: int, # 6 + encoder_layers: int = 2, + encoder_features: int = 4, + encoder_heads: int = 4, + encoder_ff_features: int = 16, + encoder_dropout: float = 0.1, + decoder_features: Tuple[int, int] = (20, 16), + dropout: float = 0.2, + batch_first: bool = True, + return_sequence: bool = True, + ): + super(DeepMCModel, self).__init__() + self.return_sequence = return_sequence + self.num_inputs = num_inputs + out_seq_len = sequence_length - kernel_size + 1 + self.encoders = nn.ModuleList( + [ + nn.Sequential( + Rearrange("b l d -> b d l"), + LocallyConnected1d( + in_channels=first_channels, + out_channels=first_encoder_channels, + seq_len=sequence_length, + kernel_size=kernel_size, + ), + nn.BatchNorm1d(first_encoder_channels), + Rearrange("b d l -> b l d"), + Encoder( + in_features=first_encoder_channels, + num_layers=encoder_layers, + d_model=encoder_features, + num_heads=encoder_heads, + d_ff=encoder_ff_features, + max_seq_len=out_seq_len, + dropout=encoder_dropout, + ), + nn.Flatten(), + ) + ] + ) + + re1, re2, re3 = rest_encoder_channels + for _ in range(num_inputs - 1): + self.encoders.append( + nn.Sequential( + Rearrange("b l d -> b d l"), + LocallyConnected1d( + in_channels=rest_channels, + out_channels=re1, + seq_len=sequence_length, + kernel_size=kernel_size, + ), + nn.ReLU(), + nn.BatchNorm1d(re1), + LocallyConnected1d( + in_channels=re1, + out_channels=re2, + seq_len=out_seq_len, + kernel_size=kernel_size, + ), + nn.ReLU(), + nn.BatchNorm1d(re2), + Rearrange("b d l -> b l d"), + MyLSTM( + input_size=re2, + hidden_size=re3, + num_layers=1, + batch_first=batch_first, + dropout=dropout, + ), + # nn.ReLU(), # Do ReLU outside the model + ) + ) + + dec_input_features = out_seq_len * encoder_features + (self.num_inputs - 1) * re3 + df1, df2 = decoder_features + self.decoder = nn.Sequential( + nn.BatchNorm1d(dec_input_features), + Rearrange("b d -> b 1 d"), + MyLSTM( + input_size=dec_input_features, + hidden_size=df1, + batch_first=batch_first, + dropout=dropout, + ), + Rearrange("b 1 d -> b d"), + nn.ReLU(), + nn.BatchNorm1d(df1), + nn.Linear(df1, df2), + nn.ReLU(), + nn.Linear(df2, 1), + ) + + def forward(self, x: Union[torch.Tensor, List[torch.Tensor]]): + sliced_encoders = nn.ModuleList(list(self.encoders)[1:]) + x = [self.encoders[0](x[0])] + [ + F.relu(encoder(xi)[:, -1]) for encoder, xi in zip(sliced_encoders, x[1:]) + ] + x = torch.cat(x, dim=1) + x = self.decoder(x) + return x diff --git a/src/vibe_lib/vibe_lib/deepmc/time.py b/src/vibe_lib/vibe_lib/deepmc/time.py new file mode 100644 index 00000000..3183c1f0 --- /dev/null +++ b/src/vibe_lib/vibe_lib/deepmc/time.py @@ -0,0 +1,25 @@ +from torch import Tensor, nn + + +class TimeDistributed(nn.Module): + def __init__(self, module: nn.Module, batch_first: bool = False): + super().__init__() + self.module = module + self.batch_first = batch_first + + def forward(self, x: Tensor): + if len(x.size()) <= 2: + return self.module(x) + + # Squash samples and timesteps into a single axis + # COMMENT: Can use rearrange here :) + x_reshape = x.contiguous().view(-1, x.size(-1)) # (samples * timesteps, input_size) + y = self.module(x_reshape) + + # We have to reshape Y + if self.batch_first: + y = y.contiguous().view(x.size(0), -1, y.size(-1)) # (samples, timesteps, output_size) + else: + y = y.view(-1, x.size(1), y.size(-1)) # (timesteps, samples, output_size) + + return y diff --git a/src/vibe_lib/vibe_lib/deepmc/transform.py b/src/vibe_lib/vibe_lib/deepmc/transform.py new file mode 100644 index 00000000..4552eea0 --- /dev/null +++ b/src/vibe_lib/vibe_lib/deepmc/transform.py @@ -0,0 +1,43 @@ +import torch.nn as nn +from einops import rearrange +from torch import Tensor + +from .helpers import attn + + +class MultiHeadAttention(nn.Module): + def __init__(self, d_model: int, num_heads: int): + super().__init__() + self.num_heads = num_heads + self.d_model = d_model + + assert d_model % self.num_heads == 0 + + self.d_head = d_model // self.num_heads + self.scale = self.d_head**-0.5 + + self.wq = nn.Linear(d_model, d_model) + self.wk = nn.Linear(d_model, d_model) + self.wv = nn.Linear(d_model, d_model) + + self.dense = nn.Linear(d_model, d_model) + + def forward(self, v: Tensor, k: Tensor, q: Tensor, mask: Tensor): + # (batch_size, seq_len, d_model) + q = self.wq(q) + k = self.wq(k) + v = self.wq(v) + + # (batch_size, num_heads, seq_len_q, depth) + q, k, v = (rearrange(x, "b l (h d) -> (b h) l d", h=self.num_heads) for x in (q, k, v)) + + q *= self.scale + # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth) + # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k) + scaled_attention = attn(q, k, v, mask) + + concat_attention = rearrange(scaled_attention, "(b h) l d -> b l (h d)", h=self.num_heads) + + output = self.dense(concat_attention) # (batch_size, seq_len_q, d_model) + + return output diff --git a/src/vibe_lib/vibe_lib/earthdata.py b/src/vibe_lib/vibe_lib/earthdata.py new file mode 100644 index 00000000..1b79ca4f --- /dev/null +++ b/src/vibe_lib/vibe_lib/earthdata.py @@ -0,0 +1,110 @@ +""" +Interact with NASA's EarthData platform's API +""" + +import logging +import math +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple, Union + +import requests +from requests.exceptions import HTTPError +from shapely import geometry as shpg +from shapely import ops as shpo +from shapely.geometry.base import BaseGeometry + +from vibe_core.data.core_types import BBox + + +def format_geometry(geometry: Union[shpg.Polygon, shpg.MultiPolygon]) -> List[str]: + def format_poly(poly: shpg.Polygon): + # Make sure it is a 2D geometry, and buffer 0 to make it more well-behaved + # Orient to have the exterior go counter-clockwise + poly = shpg.polygon.orient(shpo.transform(lambda *args: args[:2], poly.buffer(0))) + assert poly.exterior is not None + return ",".join(str(c) for p in poly.exterior.coords for c in p) + + if isinstance(geometry, shpg.MultiPolygon): + geoms = [format_poly(p) for p in geometry.geoms] + else: + geoms = [format_poly(geometry)] + return geoms + + +class EarthDataAPI: + url: str = "https://cmr.earthdata.nasa.gov/search/granules.json" + concept_ids: Dict[str, str] = { + "GEDI01_B.002": "C1908344278-LPDAAC_ECS", + "GEDI02_A.002": "C1908348134-LPDAAC_ECS", + "GEDI02_B.002": "C1908350066-LPDAAC_ECS", + } + provider: str = "LPDAAC_ECS" + page_size: int = 2000 + max_items: int = 1_000_000 + + def __init__(self, processing_level: str): + self.processing_level = processing_level + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + + def _get_payload( + self, + *, + geometry: Optional[BaseGeometry], + bbox: Optional[BBox], + time_range: Optional[Tuple[datetime, datetime]], + id: Optional[str], + ): + """ + Build query parameters + """ + # Format time range + payload: Dict[str, Any] = { + "provider": self.provider, + "concept_id": self.concept_ids[self.processing_level], + "page_size": self.page_size, + } + if time_range is not None: + fmt_tr = ",".join( + (t.astimezone().isoformat().replace("+00:00", "Z") for t in time_range) + ) + payload["temporal"] = fmt_tr + # Format spatial query + if geometry is not None: + assert isinstance(geometry, (shpg.Polygon, shpg.MultiPolygon)) + # Set option to get data that intersects with any of the geometries + payload.update({"polygon[]": format_geometry(geometry), "options[polygon][or]": "true"}) + if bbox is not None: + payload["bounding_box"] = ",".join(str(i) for i in bbox) + if id is not None: + payload["producer_granule_id"] = id + return payload + + def query( + self, + *, + geometry: Optional[BaseGeometry] = None, + bbox: Optional[BBox] = None, + time_range: Optional[Tuple[datetime, datetime]] = None, + id: Optional[str] = None, + ) -> List[Dict[str, Any]]: + items = [] + max_pages = math.ceil(self.max_items / self.page_size) + # Go to max_pages + 1 in case we have the maximum number of items possible + # In practice we'll accept up to page_size - 1 extra items + for page_num in range(1, max_pages + 2): + payload = self._get_payload(geometry=geometry, bbox=bbox, time_range=time_range, id=id) + payload["pageNum"] = page_num + response = requests.post(self.url, data=payload) + try: + response.raise_for_status() + except HTTPError as e: + error_message = response.text + msg = f"{e}. {error_message}" + raise HTTPError(msg, response=e.response) + page_items = response.json()["feed"]["entry"] + num_items = len(page_items) + self.logger.debug(f"Found {num_items} granules on page {page_num}") + items.extend(page_items) + if num_items < self.page_size: + return items + raise RuntimeError("Went through the maximum number of pages and did not return") diff --git a/src/vibe_lib/vibe_lib/gaussian_mixture.py b/src/vibe_lib/vibe_lib/gaussian_mixture.py new file mode 100644 index 00000000..17479b96 --- /dev/null +++ b/src/vibe_lib/vibe_lib/gaussian_mixture.py @@ -0,0 +1,85 @@ +from typing import Any, Tuple + +import numpy as np +from numpy.typing import NDArray +from sklearn.mixture import GaussianMixture + + +def low_rank_precision( + cov: NDArray[Any], thr: float +) -> Tuple[NDArray[Any], NDArray[Any], NDArray[Any]]: + """ + Compute (pseudo?)inverse of low-rank approximation of covariance matrix. + Approximation is computed by using considering only + the top eigenvalues so that total energy is around thr. + """ + w, v = np.linalg.eigh(cov) + wi = 1 / w + mask = np.cumsum(w[::-1] / w.sum())[::-1] < thr + wi[~mask] = 0 + precision = v @ (wi * v.T) + return precision, w, mask + + +def component_log_likelihood( + x: NDArray[Any], mix: GaussianMixture, idx: int, thr: float = 0.99 +) -> NDArray[Any]: + """ + Pass in the curves (N, T), mixture object, and component index + Output is size N containing the log-likelihood of each curve under the component + Does the normalization part make sense? Should check with someone smarter + """ + + x = x - mix.means_[idx] # type: ignore + cov = mix.covariances_[idx] # type: ignore + # Invert covariance matrix but erasing bad eigenvalues + precision, w, mask = low_rank_precision(cov, thr) # type: ignore + # Numerator + n = (x * (precision @ x.T).T).sum(axis=1) + # Denominator + # We compute the denominator considering only the kept eigenvalues + d = mask.sum() * np.log(2 * np.pi) + np.sum(np.log(w[mask])) # type: ignore + return -(n + d) / 2 + + +def mixture_log_likelihood( + x: NDArray[Any], mix: GaussianMixture, thr: float = 0.99 +) -> NDArray[Any]: + """ + Compute the mixture log-likelihood (max of each component log-likelihood) + """ + return np.stack( + [component_log_likelihood(x, mix, i, thr) for i in range(mix.n_components)] # type: ignore + ).max(axis=0) + + +def cluster_data(x: NDArray[Any], mix: GaussianMixture, thr: float = 0.99) -> NDArray[Any]: + """ + Assign data to cluster with maximum likelihood + """ + return np.argmax( + [component_log_likelihood(x, mix, i, thr) for i in range(mix.n_components)], # type: ignore + axis=0, + ) + + +def train_mixture_with_component_search( + x: NDArray[Any], max_components: int = 10, thr: float = 0.2 +) -> GaussianMixture: + """ + Train mixture of gaussians with stopping criterion to try and figure out how + many components should be used + """ + + base_mixture = GaussianMixture(n_components=1).fit(x) + base_ll = mixture_log_likelihood(x, base_mixture).mean() + mixture = base_mixture + ll = base_ll + for n in range(2, max_components + 1): + new_mixture = GaussianMixture(n_components=n).fit(x) + new_ll = mixture_log_likelihood(x, new_mixture).mean() + if (new_ll - ll) < np.abs(thr * base_ll): + return mixture + mixture = new_mixture + ll = new_ll + return mixture diff --git a/src/vibe_lib/vibe_lib/geometry.py b/src/vibe_lib/vibe_lib/geometry.py new file mode 100644 index 00000000..aee11271 --- /dev/null +++ b/src/vibe_lib/vibe_lib/geometry.py @@ -0,0 +1,98 @@ +from enum import auto +from functools import reduce +from operator import add +from typing import Any, Dict, List, cast + +import geopandas as gpd +import numpy as np +from geopandas import GeoDataFrame +from shapely import geometry as shpg +from shapely.geometry import Point, Polygon +from shapely.geometry.base import BaseGeometry +from strenum import StrEnum + +FEATURE = "feature" +FEATURE_COLLECTION = "featurecollection" + + +class SimplifyBy(StrEnum): + simplify = auto() + convex = auto() + none = auto() + + +def geojson_to_wkt(json: Dict[str, Any]) -> List[str]: + "Recursively extracts WKTs from geojson features" + + if "type" not in json: + return [] + + if json["type"].lower() == FEATURE: + return [shpg.shape(json["geometry"]).wkt] + + if json["type"].lower() == FEATURE_COLLECTION: + return reduce(add, [geojson_to_wkt(f) for f in json["features"]]) + + raise ValueError("Unable to parse GeoJSON input") + + +def norm_intersection(g1: BaseGeometry, g2: BaseGeometry) -> float: + """ + Compute normalized intersection area between two geometries + Area(G1 ∩ G2) / Area(G1) + """ + return g1.intersection(g2).area / g1.area + + +def is_approx_within(small_geom: BaseGeometry, big_geom: BaseGeometry, threshold: float) -> bool: + """ + Maybe not within, but close enough + """ + return norm_intersection(small_geom, big_geom) > threshold + + +def is_approx_equal(geom1: BaseGeometry, geom2: BaseGeometry, threshold: float) -> bool: + return is_approx_within(geom1, geom2, threshold) and is_approx_within(geom2, geom1, threshold) + + +def wgs_to_utm(geometry: BaseGeometry) -> str: + """ + Compute UTM sector for a geometry in WGS84 (EPSG:4326) + """ + c = cast(Point, geometry.centroid) + lon, lat = c.x, c.y + assert abs(lon) < 180.0 and abs(lat) < 90.0 + utm_band = str(int(lon + 180 + 6) // 6).zfill(2) + if lat >= 0: + epsg_code = "326" + utm_band + else: + epsg_code = "327" + utm_band + return epsg_code + + +def create_mesh_grid(boundary: Polygon, resolution: int, raster_crs: int = 32611) -> GeoDataFrame: + boundary_df = gpd.GeoDataFrame(geometry=[boundary], crs=4326).to_crs(raster_crs) # type: ignore + + if boundary_df is not None and not boundary_df.empty and boundary_df.bounds is not None: + # Extract the bounds of the polygon + xmin, ymin, xmax, ymax = list(boundary_df.bounds.itertuples(index=False, name=None))[0] + + # Calculate the number of points in each dimension + num_x = int((xmax - xmin) / resolution) + 1 + num_y = int((ymax - ymin) / resolution) + 1 + + # Generate the coordinate arrays + x = np.linspace(xmin, xmax, num_x) + y = np.linspace(ymin, ymax, num_y) + + # Create the mesh grid + x_, y_ = np.meshgrid(x, y) + + g_df = gpd.GeoDataFrame( + geometry=gpd.points_from_xy(x_.flatten(), y_.flatten()), crs=raster_crs + ).to_crs(4326) # type: ignore + if g_df is not None and not g_df.empty: + intersecting_locations = cast(GeoDataFrame, g_df[g_df.intersects(boundary)]) # type: ignore + return intersecting_locations + + raise Exception("Unable to create mesh grid") diff --git a/src/vibe_lib/vibe_lib/gfs_blob_utils.py b/src/vibe_lib/vibe_lib/gfs_blob_utils.py new file mode 100644 index 00000000..f2a4545e --- /dev/null +++ b/src/vibe_lib/vibe_lib/gfs_blob_utils.py @@ -0,0 +1,21 @@ +from datetime import datetime + +""" +Utilities for dealing with NOAA GFS data in Azure Blob Store +""" + +# Blob container URI for GFS data +NOAA_BLOB_URI = "https://noaagfs.blob.core.windows.net/gfs" + + +def get_sas_uri(sas_token: str) -> str: + return "{uri}?{sas}".format(uri=NOAA_BLOB_URI, sas=sas_token) + + +def blob_url_from_offset(publish_date: datetime, offset: int) -> str: + date_str = publish_date.date().isoformat().replace("-", "") + hour_str = str(publish_date.hour).rjust(2, "0") + offset_str = str(offset).rjust(3, "0") + return "gfs.{date}/{hour}/atmos/gfs.t{hour}z.pgrb2.0p25.f{offset}".format( + date=date_str, hour=hour_str, offset=offset_str + ) diff --git a/src/vibe_lib/vibe_lib/glad.py b/src/vibe_lib/vibe_lib/glad.py new file mode 100644 index 00000000..6dcbf703 --- /dev/null +++ b/src/vibe_lib/vibe_lib/glad.py @@ -0,0 +1,51 @@ +from typing import Any, Dict, Iterable, List + +import geopandas as gpd +from shapely import geometry as shpg + +from vibe_core.file_downloader import verify_url + +GLAD_DOWNLOAD_URL = ( + "https://glad.umd.edu/users/Potapov/GLCLUC2020/Forest_extent_{year}/{tile_name}.tif" +) + + +def check_glad_for_year(tile_name: str, year: int) -> bool: + """Verify if there is a GLAD file available for that year""" + url = GLAD_DOWNLOAD_URL.format(year=year, tile_name=tile_name) + return verify_url(url) + + +def get_tile_geometry(tiles_gdf: gpd.GeoDataFrame, tile_name: str) -> Dict[str, Any]: + selected_tile = tiles_gdf[tiles_gdf["NAME"] == tile_name] + if not isinstance(selected_tile, gpd.GeoDataFrame) or "geometry" not in selected_tile.columns: + raise RuntimeError(f"Tile {tile_name} not found in GLAD/Hansen tiles shapefile.") + + selected_geometries = selected_tile["geometry"] + + if not isinstance(selected_geometries, Iterable): + raise RuntimeError( + "Failed to load the GLAD/Hansen tiles shapefile. 'geometry' field is not iterable." + ) + + if len(selected_geometries) != 1: + raise RuntimeError( + f"Failed to load the GLAD/Hansen tiles shapefile. " + f"Expected 1 geometry for tile {tile_name}, found {len(selected_geometries)}." + ) + + return shpg.mapping(selected_geometries.iloc[0]) + + +def intersecting_tiles(tiles_gdf: gpd.GeoDataFrame, user_polygon: Dict[str, Any]) -> List[str]: + user_gdf = gpd.GeoDataFrame({"geometry": [shpg.shape(user_polygon)]}) + intersection = gpd.overlay(user_gdf, tiles_gdf, how="intersection") + + name_intersections = intersection["NAME"] + + if not isinstance(name_intersections, Iterable): + raise RuntimeError( + "Failed to load the GLAD/Hansen tiles shapefile. 'NAME' field is not iterable." + ) + + return [str(name) for name in name_intersections] diff --git a/src/vibe_lib/vibe_lib/heatmap_neighbor.py b/src/vibe_lib/vibe_lib/heatmap_neighbor.py new file mode 100644 index 00000000..28f0a48f --- /dev/null +++ b/src/vibe_lib/vibe_lib/heatmap_neighbor.py @@ -0,0 +1,89 @@ +from typing import cast + +import geopandas as gpd +import numpy as np +import pandas as pd +import skgstat as skg +from geopandas import GeoDataFrame +from skgstat import OrdinaryKriging +from sklearn.neighbors import NearestNeighbors + + +def run_cluster_overlap( + attribute_name: str, + reduced_samples: GeoDataFrame, + minimum_sample_polygons: GeoDataFrame, + geo_locations: GeoDataFrame, +) -> GeoDataFrame: + # perform spatial join between minimum sample locations and polygons + df_overlap = gpd.sjoin(reduced_samples, minimum_sample_polygons) + df_overlap.rename( + columns={ + "index_right": "index_overlap", + "geometry": "geometry_overlap", + }, + inplace=True, + ) + df_overlap = df_overlap[["index_overlap", f"{attribute_name}", "geometry_overlap"]] + # perform spatial join between geolocation points and minimum sample polygons + geo_locations = gpd.sjoin(geo_locations, minimum_sample_polygons) + geo_locations.rename( + columns={ + "index_right": "index_geo_locations", + }, + inplace=True, + ) + # assign nutrient values to geolocation points + out = pd.merge( + df_overlap, + geo_locations, + how="right", + left_on="index_overlap", + right_on="index_geo_locations", + ) + out = out[~out.isna().any(axis=1)] + out = GeoDataFrame(out[[attribute_name, "geometry"]], geometry="geometry", crs=4326) # type: ignore + return out + + +def run_nearest_neighbor( + attribute_name: str, + reduced_samples: GeoDataFrame, + geo_locations: GeoDataFrame, +) -> GeoDataFrame: + # preprocess data + + x_ = np.array([reduced_samples.geometry.x, reduced_samples.geometry.y]).T + y_ = reduced_samples[attribute_name].values + reduced_samples.drop(columns=["geometry"], inplace=True) + # train nearest neighbor model + neigh = NearestNeighbors(n_neighbors=1) + neigh.fit(x_, y=y_) + # inference nearest neighbor + locations = np.array([geo_locations.geometry.x, geo_locations.geometry.y]).T + _, geo_locations["index_nearest"] = neigh.kneighbors(locations) + # assign nutrient values to geolocation points + geo_locations = cast( + GeoDataFrame, + geo_locations.merge(reduced_samples, left_on="index_nearest", right_index=True), + ) + geo_locations = cast(GeoDataFrame, geo_locations[[attribute_name, "geometry"]]) + return geo_locations + + +def run_kriging_model( + attribute_name: str, + reduced_samples: GeoDataFrame, + geo_locations: GeoDataFrame, +) -> GeoDataFrame: + # preprocess data + x_ = np.array([reduced_samples.geometry.x, reduced_samples.geometry.y]).T + y_ = reduced_samples[attribute_name].values + # train Variogram using gaussian model + V = skg.Variogram(x_, y_, model="gaussian", fit_method="trf") + # train Ordinary Kriging model + ok = OrdinaryKriging(V, min_points=1, max_points=2, mode="exact") + # inference Ordinary Krigging + out_k = ok.transform(geo_locations.geometry.x, geo_locations.geometry.y) + geo_locations[attribute_name] = out_k + return geo_locations diff --git a/src/vibe_lib/vibe_lib/overlap_clustering.py b/src/vibe_lib/vibe_lib/overlap_clustering.py new file mode 100644 index 00000000..18f4f055 --- /dev/null +++ b/src/vibe_lib/vibe_lib/overlap_clustering.py @@ -0,0 +1,158 @@ +import logging +import math +from typing import Any + +import numpy as np +import torch as T +from numpy.typing import NDArray +from torch.nn.functional import avg_pool2d, interpolate + +POSTERIOR_SMOOTHING = 0.001 + +LOGGER = logging.getLogger(__name__) + + +# compute 2D average pooling of data in squares of side 2*half_side_length+1 +def compute_local_average(data: T.Tensor, half_side_length: int, stride: int = 1): + if half_side_length == 0: + return data + w, h = data.shape[-2:] + mean = avg_pool2d( + data.reshape(-1, 1, w, h), + 2 * half_side_length + 1, + stride=stride, + padding=half_side_length, + count_include_pad=False, + ) + + # if pooling was strided (for speedup), upsample to original raster size + if stride > 1: + mean = interpolate(mean, size=(w, h), mode="bilinear", align_corners=False) + return mean.view(data.shape) + + +# compute mean and variance in local windows of data in each cluster c weighted by q[c] +def compute_weighted_average_and_variance( + data: T.Tensor, + weights: T.Tensor, + half_side_length: int, + stride: int = 1, + var_min: float = 0.0001, + mq_min: float = 0.000001, +): + # compute probability normalization constants per class + mq = compute_local_average(weights, half_side_length, stride) + mq.clamp(min=mq_min) + + # instantiate data and data**2 weighted by weights[c] for each c + # future todo: investigate whether replacing einsum by broadcast ops gives a speedup + weighted = T.einsum("zij,cij->czij", data, weights) # class,channel,x,y + weighted_sq = T.einsum("zij,cij->czij", data**2, weights) + + # mean = E_[x~weights[c]] data[x] + # var = E_x (data[x]^2) - (E_x data[x])^2 + mean = compute_local_average(weighted, half_side_length, stride) / mq.unsqueeze(1) + var = compute_local_average(weighted_sq, half_side_length, stride) / mq.unsqueeze(1) - mean**2 + var = var.clamp(min=var_min) + + return mean, var + + +# batched log-pdf of a diagonal Gaussian +def lp_gaussian( + data: T.Tensor, mean: T.Tensor, var: T.Tensor, half_side_length: int, stride: int = 1 +): + m0 = -compute_local_average(1 / var, half_side_length, stride) + m1 = compute_local_average(2 * mean / var, half_side_length, stride) + m2 = -compute_local_average(mean**2 / var, half_side_length, stride) + L = compute_local_average(T.log(var), half_side_length, stride) + return (m0 * data**2 + m1 * data + m2 - 1 * L).sum(1) / 2 + + +# batched posterior over components in a Gaussian mixture +def gaussian_mixture_posterior( + data: T.Tensor, + prior: T.Tensor, + mean: T.Tensor, + var: T.Tensor, + half_side_length: int, + stride: int = 1, +): + # compute unnormalized log-pdf + lp = lp_gaussian(data, mean, var, half_side_length, stride) + + # posterior proportional to density*prior + p = lp.softmax(0) * prior + p /= p.sum(0) + p += POSTERIOR_SMOOTHING + p /= p.sum(0) + + return p + + +# one iteration of EM algorithm for Gaussian mixture +def perform_iteration_expectation_maximization( + data: T.Tensor, p: T.Tensor, half_side_length: int, stride: int = 1 +): + # M step: compute optimal GMM parameters in each raster window + prior = compute_local_average(p, half_side_length, stride) + mean, var = compute_weighted_average_and_variance(data, p, half_side_length, stride) + + # E step: recompute posteriors + p_new = gaussian_mixture_posterior(data, prior, mean, var, half_side_length, stride) + + return p_new, mean, var, prior + + +# run EM algorithm for Gaussian mixture +def run_clustering( + image: NDArray[Any], + number_classes: int, + half_side_length: int, + number_iterations: int, + stride: int, + warmup_steps: int, + warmup_half_side_length: int, + window: int, +) -> NDArray[Any]: + _, x_size, y_size = image.shape + result = np.zeros(shape=(x_size, y_size), dtype="uint8") + + for row in range(math.ceil(x_size / window)): + for col in range(math.ceil(y_size / window)): + xmin = row * window + xmax = (row + 1) * window + if xmax > x_size: + xmax = x_size + ymin = col * window + ymax = (col + 1) * window + if ymax > y_size: + ymax = y_size + + partial_image = image[:, xmin:xmax, ymin:ymax] + + logging.info( + f"Computing clusters for row: {row}, col: {col}, [{xmin}, {xmax}, {ymin}, {ymax}]" + ) + + with T.inference_mode(): + # convert image to Torch object + data = T.as_tensor(partial_image) + + # randomly initialize posterior matrix + p = T.rand((number_classes,) + partial_image.shape[1:]) + p /= p.sum(0) + + # EM + for i in range(number_iterations): + p.mean().item() # trigger synchronization + p, _, _, _ = perform_iteration_expectation_maximization( + data, + p, + warmup_half_side_length if i < warmup_steps else half_side_length, + stride, + ) + + # return np.argmax(p.numpy(), axis=0) + result[xmin:xmax, ymin:ymax] = np.argmax(p.numpy(), axis=0) + return result diff --git a/src/vibe_lib/vibe_lib/planetary_computer.py b/src/vibe_lib/vibe_lib/planetary_computer.py new file mode 100644 index 00000000..28e425b1 --- /dev/null +++ b/src/vibe_lib/vibe_lib/planetary_computer.py @@ -0,0 +1,506 @@ +""" +Planetary computer model for TerraVibes. Helps query and download items and assets. +""" + +import io +import logging +import os +import re +import time +import xml.etree.ElementTree as ET +from datetime import datetime +from itertools import product +from typing import Any, Dict, List, Optional, Tuple +from urllib.parse import urljoin + +import planetary_computer as pc +import requests +from azure.storage.blob import BlobProperties, ContainerClient +from planetary_computer.sas import get_token +from pystac.asset import Asset +from pystac.item import Item +from pystac_client import Client +from requests.exceptions import RequestException +from shapely import geometry as shpg +from shapely.geometry.base import BaseGeometry + +from vibe_core.data import S2ProcessingLevel, Sentinel1Product, Sentinel2Product +from vibe_core.data.core_types import BBox +from vibe_core.file_downloader import download_file + +CATALOG_URL = "https://planetarycomputer.microsoft.com/api/stac/v1" +DATE_FORMAT = "%Y-%m-%d" +RETRY_WAIT = 10 +MAX_RETRIES = 5 + +# https://sentinel.esa.int/web/sentinel/user-guides/sentinel-1-sar/naming-conventions +MODE_SLICE = slice(4, 6) +POLARIZATION_SLICE = slice(14, 16) +YEAR_SLICE = slice(17, 21) +MONTH_SLICE = slice(21, 23) +DAY_SLICE = slice(23, 25) +LOGGER = logging.getLogger(__name__) + + +class PlanetaryComputerCollection: + collection: str = "" + filename_regex: str = r".*/(.*\.\w{3,4})(?:\?|$)" + asset_keys: List[str] = ["image"] + + def __init__(self): + self.logger = logging.getLogger(self.__class__.__name__) + self.available_collections = get_available_collections() + + if self.collection not in self.available_collections: + message = ( + f"Invalid collection '{self.collection}'. " + f"Available collections: {self.available_collections}" + ) + self.logger.error(message) + raise ValueError(message) + + def query_by_id(self, id: str) -> Item: + items = query_catalog_by_ids([self.collection], [id]) + if not items: + message = f"There is no item with id {id} on collection {self.collection}." + self.logger.error(message) + raise KeyError(message) + return items[0] + + def query( + self, + geometry: Optional[BaseGeometry] = None, + roi: Optional[BBox] = None, + time_range: Optional[Tuple[datetime, datetime]] = None, + ids: Optional[List[str]] = None, + query: Optional[Dict[str, Any]] = None, + ) -> List[Item]: + return query_catalog( + [self.collection], + geometry=geometry, + roi=roi, + time_range=time_range, + ids=ids, + query=query, + ) + + def download_asset(self, asset: Asset, out_path: str) -> str: + """ + Download asset from the planetary computer and save it into the desired path. + If the output path is a directory, try to infer the filename from the asset href. + """ + if os.path.isdir(out_path): + # Resolve name from href + match = re.match(self.filename_regex, asset.href) + if match is None: + raise ValueError(f"Unable to parse filename from asset href: {asset.href}") + filename = match.groups()[0] + out_path = os.path.join(out_path, filename) + for retry in range(MAX_RETRIES): + href = pc.sign(asset.href) + try: + download_file(href, out_path) + return out_path + except RequestException as e: + LOGGER.warning( + f"Exception {e} downloading from {href}." + f" Retrying after {RETRY_WAIT}s ({retry+1}/{MAX_RETRIES})." + ) + time.sleep(RETRY_WAIT) + raise RuntimeError(f"Failed asset {asset.href} after {MAX_RETRIES} retries.") + + def download_item(self, item: Item, out_dir: str): + """ + Download assets from planetary computer. + """ + os.makedirs(out_dir) + asset_paths: List[str] = [] + for k in self.asset_keys: + asset_paths.append(self.download_asset(item.assets[k], out_dir)) + return asset_paths + + +class Sentinel2Collection(PlanetaryComputerCollection): + collection = "sentinel-2-l2a" + filename_regex = r".*/(.*\.\w{3,4})(?:\?|$)" + asset_keys: List[str] = [ + "B01", + "B02", + "B03", + "B04", + "B05", + "B06", + "B07", + "B08", + "B8A", + "B09", + "B11", + "B12", + ] + + def get_cloud_mask(self, item: Item) -> str: + return pc.sign(urljoin(item.assets["granule-metadata"].href, "QI_DATA/MSK_CLOUDS_B00.gml")) + + +class Sentinel1GRDCollection(PlanetaryComputerCollection): + collection: str = "sentinel-1-grd" + + +class Sentinel1RTCCollection(PlanetaryComputerCollection): + collection: str = "sentinel-1-rtc" + asset_keys: List[str] = ["vh", "vv"] + + +class USGS3DEPCollection(PlanetaryComputerCollection): + collection = "3dep-seamless" + asset_keys: List[str] = ["data"] + + +class CopernicusDEMCollection(PlanetaryComputerCollection): + collection = "cop-dem-glo-30" + asset_keys: List[str] = ["data"] + + +class NaipCollection(PlanetaryComputerCollection): + collection = "naip" + asset_keys: List[str] = ["image"] + + +class LandsatCollection(PlanetaryComputerCollection): + collection = "landsat-c2-l2" + asset_keys: List[str] = [ + "qa", + "red", + "blue", + "drad", + "emis", + "emsd", + "trad", + "urad", + "atran", + "cdist", + "green", + "nir08", + "swir16", + "swir22", + "qa_pixel", + "qa_radsat", + "lwir11", + ] + + +class Era5Collection(PlanetaryComputerCollection): + collection = "era5-pds" + asset_keys: List[str] = [ + "msl", + "2t", + "mx2t", + "mn2t", + "2d", + "100u", + "10u", + "ssrd", + "100v", + "10v", + "t0", + "sst", + "sp", + ] + + +class Modis8DaySRCollection(PlanetaryComputerCollection): + """ + MODIS Surface Reflectance generated every 8 days. + Available resolutions are 250m and 500m. + https://planetarycomputer.microsoft.com/dataset/modis-09Q1-061 + https://planetarycomputer.microsoft.com/dataset/modis-09A1-061 + """ + + collections: Dict[int, str] = {250: "modis-09Q1-061", 500: "modis-09A1-061"} + + def __init__(self, resolution: int): + if resolution not in self.collections: + raise ValueError( + f"Expected resolution to be one of {list(self.collections)}, got {resolution}." + ) + self.collection = self.collections[resolution] + super().__init__() + + +class Modis16DayVICollection(PlanetaryComputerCollection): + """ + MODIS Vegetation Indices generated every 16 days. + Pixels are chosen from all acquisitions in the 16-day period. + Available resolutions are 250m and 500m. + https://planetarycomputer.microsoft.com/dataset/modis-13Q1-061 + """ + + collections: Dict[int, str] = {250: "modis-13Q1-061", 500: "modis-13A1-061"} + + def __init__(self, resolution: int): + if resolution not in self.collections: + raise ValueError( + f"Expected resolution to be one of {list(self.collections)}, got {resolution}." + ) + self.collection = self.collections[resolution] + super().__init__() + + +class AlosForestCollection(PlanetaryComputerCollection): + """ + ALOS Forest/Non-Forest Classification is derived from the ALOS PALSAR Annual + Mosaic, and classifies the pixels to detect forest cover. + """ + + collection = "alos-fnf-mosaic" + asset_keys: List[str] = ["C"] + categories: List[str] = [ + "No data", + "Forest (>90% canopy cover)", + "Forest (10-90% canopy cover)", + "Non-forest", + "Water", + ] + + +class GNATSGOCollection(PlanetaryComputerCollection): + collection = "gnatsgo-rasters" + depth_variables = ["aws{}", "soc{}", "tk{}a", "tk{}s"] + soil_depths = [ + "0_5", + "0_20", + "0_30", + "5_20", + "0_100", + "0_150", + "0_999", + "20_50", + "50_100", + "100_150", + "150_999", + ] + + soil_assets = [d.format(v) for (d, v) in product(depth_variables, soil_depths)] + + additional_assets = [ + "mukey", + "droughty", + "nccpi3sg", + "musumcpct", + "nccpi3all", + "nccpi3cot", + "nccpi3soy", + "pwsl1pomu", + "rootznaws", + "rootznemc", + "musumcpcta", + "musumcpcts", + "nccpi3corn", + "pctearthmc", + ] + + asset_keys: List[str] = soil_assets + additional_assets + + +class EsriLandUseLandCoverCollection(PlanetaryComputerCollection): + collection = "io-lulc-9-class" + asset_keys: List[str] = ["data"] + categories: List[str] = [ + "No Data", + "Water", + "Trees", + "Flooded vegetation", + "Crops", + "Built area", + "Bare ground", + "Snow/ice", + "Clouds", + "Rangeland", + ] + + +def query_catalog( + collections: List[str], + geometry: Optional[BaseGeometry] = None, + roi: Optional[BBox] = None, + time_range: Optional[Tuple[datetime, datetime]] = None, + ids: Optional[List[str]] = None, + query: Optional[Dict[str, Any]] = None, +) -> List[Item]: + """ + Query the planetary computer for items that intersect with the desired RoI in the time range + """ + catalog = Client.open(CATALOG_URL) + datetime = ( + "/".join(i.strftime(DATE_FORMAT) for i in time_range) if time_range is not None else None + ) + search = catalog.search( + collections=collections, + intersects=shpg.mapping(geometry) if geometry is not None else None, + bbox=roi, + datetime=datetime, + ids=ids, + query=query, + ) + + items = [item for item in list(search.get_items())] + return items + + +def query_catalog_by_ids(collections: List[str], ids: List[str]) -> List[Item]: + """ + Query the planetary computer for items given a list of ids + """ + catalog = Client.open(CATALOG_URL) + search = catalog.search(collections=collections, ids=ids) + items = [item for item in list(search.get_items())] + return items + + +def get_available_collections() -> List[str]: + cat = Client.open(CATALOG_URL) + return [collection.id for collection in cat.get_collections()] + + +def map_sentinel_product_args(item: Item) -> Dict[str, Any]: + props = item.properties + kwargs = { + "geometry": item.geometry, + "time_range": (item.datetime, item.datetime), + "relative_orbit_number": props["sat:relative_orbit"], + "orbit_direction": props["sat:orbit_state"], + "platform": props["platform"].upper().replace("SENTINEL-", ""), + "extra_info": {}, + "assets": [], + } + return kwargs + + +def map_s1_product_args(item: Item) -> Dict[str, Any]: + kwargs = map_sentinel_product_args(item) + props = item.properties + kwargs.update( + { + "id": item.id, + "product_name": item.id, # Name without the unique identifier + "orbit_number": props["sat:absolute_orbit"], + "sensor_mode": props["sar:instrument_mode"], + "polarisation_mode": " ".join(props["sar:polarizations"]), + } + ) + return kwargs + + +def convert_to_s1_product(item: Item) -> Sentinel1Product: + kwargs = map_s1_product_args(item) + return Sentinel1Product(**kwargs) + + +def convert_to_s2_product(item: Item) -> Sentinel2Product: + kwargs = map_sentinel_product_args(item) + props = item.properties + product_name = props["s2:product_uri"].replace(".SAFE", "") + kwargs.update( + { + "id": product_name, + "product_name": product_name, + "orbit_number": get_absolute_orbit(item), + "tile_id": props["s2:mgrs_tile"], + "processing_level": S2ProcessingLevel.L2A, + } + ) + return Sentinel2Product(**kwargs) + + +def get_absolute_orbit(item: Item) -> int: + href = item.assets["safe-manifest"].href + signed_href = pc.sign(href) + response = requests.get(signed_href) + tree = ET.parse(io.BytesIO(response.content)) + orbit_element = [e for e in tree.iter() if "orbitNumber" in e.tag] + if not orbit_element: + raise RuntimeError( + f"Could not find orbit element when parsing manifest XML for item {item.id}" + ) + orbit = orbit_element[0].text + assert orbit is not None + return int(orbit) + + +def get_sentinel1_scene_name(item: Sentinel1Product) -> str: + collection = Sentinel1GRDCollection() + stac_item = collection.query_by_id(item.product_name) + scene_name = stac_item.assets["safe-manifest"].href.split("/")[-2] + return scene_name + + +# From example in: +# https://nbviewer.org/github/microsoft/AIforEarthDataSets/blob/main/data/sentinel-1-grd.ipynb +def generate_sentinel1_blob_path(item: Sentinel1Product) -> str: + scene_name = get_sentinel1_scene_name(item) + root = "GRD" + mode = scene_name[MODE_SLICE] + polarization = scene_name[POLARIZATION_SLICE] # "DV", for example, is "dual VV/VH" + year = scene_name[YEAR_SLICE] + month = scene_name[MONTH_SLICE].lstrip("0") + day = scene_name[DAY_SLICE].lstrip("0") + + azure_scene_prefix = "/".join([root, year, month, day, mode, polarization, scene_name]) + + return azure_scene_prefix + + +def get_sentinel1_container_client() -> ContainerClient: + storage_account_name = "sentinel1euwest" + container_name = "s1-grd" + + storage_account_url = "https://" + storage_account_name + ".blob.core.windows.net/" + + token = get_token(storage_account_name, container_name).token + container_client = ContainerClient( + account_url=storage_account_url, container_name=container_name, credential=token + ) + return container_client + + +def get_sentinel1_scene_files(item: Sentinel1Product) -> List[BlobProperties]: + blob_prefix = generate_sentinel1_blob_path(item) + + container_client = get_sentinel1_container_client() + blob_generator = container_client.list_blobs(name_starts_with=blob_prefix) + return list(blob_generator) + + +def get_complete_s1_prefix(scene_files: List[BlobProperties]) -> str: + prefixes = {"/".join(f["name"].split("/")[:7]) for f in scene_files} + if len(prefixes) > 1: + base_pref = next(iter(prefixes))[:-5] + raise RuntimeError(f"Found multiple prefixes matching '{base_pref}': {prefixes}") + prefix = next(iter(prefixes)) + return prefix + + +def validate_dem_provider(name: str, resolution: int) -> PlanetaryComputerCollection: + valid_providers = { + "USGS3DEP": { + "class": USGS3DEPCollection, + "resolutions": [10, 30], + }, + "COPERNICUSDEM30": { + "class": CopernicusDEMCollection, + "resolutions": [30], + }, + } + if name in valid_providers: + if resolution in valid_providers[name]["resolutions"]: + return valid_providers[name]["class"]() + else: + raise RuntimeError( + f"Wrong resolution for dem provider {name}. " + f"Valid resolution(s) is/are {valid_providers[name]['resolutions']}" + ) + else: + raise RuntimeError( + f"Invalid DEM parameter 'provider': {name}. " + f"Valid providers are {', '.join(valid_providers.keys())}" + ) diff --git a/src/vibe_lib/vibe_lib/raster.py b/src/vibe_lib/vibe_lib/raster.py new file mode 100644 index 00000000..dfd3a3c2 --- /dev/null +++ b/src/vibe_lib/vibe_lib/raster.py @@ -0,0 +1,697 @@ +import json +import logging +import mimetypes +import os +import shutil +import tempfile +from concurrent.futures import ThreadPoolExecutor, TimeoutError, as_completed +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + NamedTuple, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +import matplotlib.pyplot as plt +import numpy as np +import rasterio +import rioxarray as rio +import scipy.ndimage +import xarray as xr +from matplotlib.colors import LinearSegmentedColormap, to_rgba_array +from numpy.lib.stride_tricks import as_strided +from numpy.typing import NDArray +from rasterio import Affine +from rasterio.crs import CRS +from rasterio.enums import Resampling +from rasterio.io import DatasetWriter +from rasterio.vrt import WarpedVRT +from rasterio.warp import reproject +from rasterio.windows import Window +from rio_cogeo.cogeo import cog_translate, cog_validate +from rio_cogeo.profiles import cog_profiles + +from vibe_core.data import AssetVibe, CategoricalRaster, Raster, gen_guid +from vibe_core.data.rasters import ChunkLimits + +if TYPE_CHECKING: + MaskedArrayType = np.ma.MaskedArray[Any, np.dtype[Any]] +else: + MaskedArrayType = np.ma.MaskedArray + +LOGGER = logging.getLogger(__name__) +# https://kokoalberti.com/articles/geotiff-compression-optimization-guide/ +COMPRESSION_KWARGS: Dict[str, Any] = { + "tiled": True, + "compress": "ZSTD", + "zstd_level": 9, +} + +FLOAT_COMPRESSION_KWARGS: Dict[str, Any] = {**COMPRESSION_KWARGS, "predictor": 3} + +INT_COMPRESSION_KWARGS: Dict[str, Any] = {**COMPRESSION_KWARGS, "predictor": 2} + +DEFAULT_NODATA = 100 + + +class RGBA(NamedTuple): + """ + Int RGBA + """ + + red: int + green: int + blue: int + alpha: int + + +class FRGB(NamedTuple): + """ + Float RGB + """ + + red: float + green: float + blue: float + + +class FRGBA(FRGB): + """ + Float RGBA + """ + + alpha: float + + +def get_crs(raster: Raster) -> CRS: + with rasterio.open(raster.raster_asset.url) as src: + crs = src.crs + return crs + + +def open_raster(raster: Raster, *args: Any, **kwargs: Any) -> rasterio.DatasetReader: + return open_raster_from_ref(raster.raster_asset.url, *args, **kwargs) + + +def open_raster_from_ref(raster_ref: str, *args: Any, **kwargs: Any) -> rasterio.DatasetReader: + return rasterio.open(raster_ref, *args, **kwargs) # type: ignore + + +def load_raster_from_url( + raster_url: str, + band_indices: Optional[Sequence[int]] = None, + crs: Optional[Any] = None, + transform: Optional[rasterio.Affine] = None, + shape: Optional[Tuple[int, int]] = None, + resampling: Resampling = Resampling.nearest, + geometry: Optional[Any] = None, + geometry_crs: Optional[Any] = None, + dtype: Optional[Any] = None, +) -> xr.DataArray: + with rasterio.open(raster_url) as src: + if crs or transform or shape: + if shape: + height, width = shape + if not transform: + # Fix bug from rasterio https://github.com/rasterio/rasterio/issues/2346 + scale_x, scale_y = src.meta["width"] / width, src.meta["height"] / height + transform = src.transform * Affine.scale(scale_x, scale_y) + else: + height, width = None, None + dtype = dtype if dtype is not None else src.meta["dtype"] + src = WarpedVRT( + src, + crs=crs, + transform=transform, + height=height, + width=width, + resampling=resampling, + dtype=dtype, + ) + with src: + data = rio.open_rasterio(src, masked=True) + if band_indices: # Read only the desired bands + data = data[band_indices] + if geometry: + data = data.rio.clip([geometry], crs=geometry_crs, all_touched=True, from_disk=True) + return data + + +def load_raster( + raster: Raster, + bands: Optional[Sequence[Union[int, str]]] = None, + use_geometry: bool = False, + crs: Optional[Any] = None, + transform: Optional[rasterio.Affine] = None, + shape: Optional[Tuple[int, int]] = None, + resampling: Resampling = Resampling.nearest, +) -> xr.DataArray: + """ + Open file and read desired raster bands. + Bands may be specified as integers (band indices from the TIFF) or strings (band names). + Band names are mapped to indices by looking up the Raster metadata. + If desired CRS, transform, and/or shape are defined, the raster will be lazily resampled using + rasterio's WarpedVRT according to the chosen resampling algorithm. + Finally, if `use_geometry` is True, the transformed raster will be clipped to the geometry + in the Raster. + """ + raster_url = raster.raster_asset.url + if bands: + # Map band names to indices if necessary + band_indices = [raster.bands[b] if isinstance(b, str) else b for b in bands] + else: + band_indices = None + if use_geometry: + geometry = raster.geometry + geometry_crs = "epsg:4326" + else: + geometry = None + geometry_crs = None + data = load_raster_from_url( + raster_url, + band_indices, + crs=crs, + transform=transform, + shape=shape, + resampling=resampling, + geometry=geometry, + geometry_crs=geometry_crs, + ) + return data + + +def load_raster_match( + raster: Raster, + match_raster: Raster, + bands: Optional[Sequence[Union[int, str]]] = None, + use_geometry: bool = False, + resampling: Resampling = Resampling.nearest, +) -> xr.DataArray: + """ + Load a resampled raster that matches the `match_raster`'s CRS, shape, and transform. + """ + match_file = match_raster.raster_asset.url + with rasterio.open(match_file) as ref: + meta = ref.meta + return load_raster( + raster, + bands, + use_geometry=use_geometry, + crs=meta["crs"], + transform=meta["transform"], + shape=(meta["height"], meta["width"]), + resampling=resampling, + ) + + +def get_profile_from_ref(ref_filepath: str, **kwargs: int) -> Dict[str, Any]: + """ + Get the TIFF profile from a reference file and update it with the given kwargs. + """ + with rasterio.open(ref_filepath) as src: + profile = src.profile + # We'll store all bands in the same file + profile.update(kwargs) + return profile + + +def check_valid_cog_raster(output_path: str): + is_valid, errors, warnings = cog_validate(output_path, strict=False) + if not is_valid and errors: + message = f"Raster is not a valid COG. Errors: {errors}" + LOGGER.warning(message) + return + if is_valid and warnings: + message = f"Raster is valid COG, but there are the following warnings {warnings}" + LOGGER.info(message) + return + if is_valid: + LOGGER.info(f"{output_path} is a valid COG Raster. No Warnings") + + +def save_raster_to_path(array: xr.DataArray, output_path: str) -> None: + """ + Save raster to file + """ + dtype = array.encoding.get("dtype", str(array.dtype)) + if np.issubdtype(dtype, np.floating): + predictor = 3 + else: + # For integers + predictor = 2 + + array.rio.to_raster(output_path, tiled=True, compress="ZSTD", zstd_level=9, predictor=predictor) + + +def save_raster_to_asset(array: xr.DataArray, output_dir: str) -> AssetVibe: + """ + Save raster to file and return the corresponding asset + """ + out_id = gen_guid() + filepath = os.path.join(output_dir, f"{out_id}.tif") + save_raster_to_path(array, filepath) + new_asset = AssetVibe(reference=filepath, type=mimetypes.types_map[".tif"], id=out_id) + return new_asset + + +def save_raster_from_ref(array: xr.DataArray, output_dir: str, ref_raster: Raster) -> Raster: + """ + Save raster to file and create a Raster type by copying metadata from a reference raster. + """ + new_asset = save_raster_to_asset(array, output_dir) + # Instantiate Raster by copying metadata from reference raster + return Raster.clone_from(ref_raster, id=gen_guid(), assets=[new_asset]) + + +def get_cmap(cmap_name: str) -> List[RGBA]: + color_map = plt.get_cmap(cmap_name.lower()) + return [RGBA(*color_map(i)) for i in range(256)] # type: ignore + + +def get_categorical_cmap(cmap_name: str, num_classes: int) -> List[RGBA]: + colors = plt.get_cmap(cmap_name).colors # type: ignore + intervals = np.linspace(0, 255, num_classes + 1).round().astype(int)[1:-1] + return step_cmap_from_colors(colors, intervals) + + +def step_cmap_from_colors( + colors: Union[Sequence[Union[FRGB, FRGBA]], NDArray[Any]], + intervals: Union[Sequence[int], NDArray[Any]], +) -> List[RGBA]: + interval_array = np.asarray(intervals) + idx = interval_array.shape - (np.arange(256) < interval_array[:, None]).sum(axis=0) + # Get RGBA values + rgba = to_rgba_array(np.asarray(colors)[idx]) + # Convert to RGBA in range 0 - 255 + rgba = np.round(255 * rgba).astype(int).tolist() + rgba = [RGBA(*c) for c in rgba] + return rgba + + +def interpolated_cmap_from_colors(colors: Sequence[RGBA], intervals: Sequence[float]) -> List[RGBA]: + colors = np.asarray(colors) / 255 # type: ignore + intervals = np.asarray(intervals) # type: ignore + imin, imax = intervals.min(), intervals.max() # type: ignore + norm_int = (intervals - imin) / (imax - imin) + ndvi_cmap = LinearSegmentedColormap.from_list( + "interpolated_cmap", + [(i, c) for (i, c) in zip(norm_int, colors)], # type: ignore + ) + rgba = np.round(ndvi_cmap(np.linspace(0, 1, 256)) * 255).astype(int).tolist() # type: ignore + return [RGBA(*c) for c in rgba] + + +def json_to_asset(json_dict: Dict[str, Any], output_dir: str) -> AssetVibe: + uid = gen_guid() + filepath = os.path.join(output_dir, f"{uid}.json") + with open(filepath, "w") as f: + json.dump(json_dict, f) + return AssetVibe(reference=filepath, type=mimetypes.types_map[".json"], id=uid) + + +def load_vis_dict(raster: Raster) -> Dict[str, Any]: + local_path = raster.visualization_asset.local_path + with open(local_path) as f: + vis_dict = json.load(f) + vis_dict["colormap"] = {i: c for i, c in enumerate(vis_dict["colormap"])} + if isinstance(raster, CategoricalRaster): + vis_dict["labels"] = raster.categories + # Position ticks in the middle of the class section + ticks = np.linspace(0, 255, len(raster.categories) + 1) + ticks = as_strided( + ticks, + shape=(len(raster.categories), 2), + strides=(ticks.strides[0], ticks.strides[0]), + writeable=False, + ) + ticks = ticks.mean(axis=1) # type: ignore + vis_dict["ticks"] = ticks + else: + num_ticks = 5 + vis_dict["ticks"] = np.linspace(0, 255, num_ticks) + vis_dict["labels"] = np.linspace( + vis_dict["range"][0], vis_dict["range"][1], num_ticks + ).round(1) + return vis_dict + + +def compute_index( + raster: Raster, + bands: Optional[Sequence[Union[int, str]]], + index_fun: Callable[[xr.DataArray], xr.DataArray], + index_name: str, + output_dir: str, +) -> Raster: + """ + Open raster, load specified bands, compute index, save a 1-band raster with indices. + bands can be a sequence of integers (direct band indices) or strings (band names). + """ + bands_array = load_raster(raster, bands, use_geometry=True) + # Convert to reflectance values, add minimum value to avoid division by zero + bands_array = (bands_array.astype(np.float32) * raster.scale + raster.offset).clip(min=1e-6) + index_array = index_fun(bands_array) + + index_raster = save_raster_from_ref(index_array, output_dir, raster) + index_raster.bands = {index_name: 0} + return index_raster + + +def compute_sobel_gradient(x: NDArray[Any]) -> NDArray[Any]: + """Use a Sobel filter to compute the magnitude of the gradient in input + + Args: + x (np.array): Input image (height, width) + Returns: + grad_mag (np.array): Gradient magnitude of input + """ + if len(x.shape) > 2: + x = np.squeeze(x) + + if len(x.shape) != 2: + raise ValueError( + "Invalid NumPy array. Valid arrays have two dimensions or more dimensions of " + "length 1. E.g. (100, 100) or (1, 100, 100) or (1, 1, 100, 100)" + ) + + grad_y: NDArray[Any] = cast(NDArray[Any], scipy.ndimage.sobel(x, axis=1)) + grad_x: NDArray[Any] = cast(NDArray[Any], scipy.ndimage.sobel(x, axis=0)) + + return np.sqrt(grad_x**2 + grad_y**2) + + +def tile_to_utm(tile_id: str) -> str: + """ + Get EPSG for a sentinel 2 tile + """ + utm_band = tile_id[:2] + is_north = tile_id[2] > "M" + epsg_code = f"32{'6' if is_north else '7'}{utm_band}" + return epsg_code + + +def write_window_to_file( + data_ar: NDArray[Any], + mask_ar: Optional[NDArray[Any]], + write_window: Window, + filepath: str, + meta: Dict[str, Any], +) -> None: + """Helper function to write a window of data to file. + + The function will create the file if it does not exist or will open it in + `r+` mode if it does. The data array will then be written in the window. + """ + if mask_ar is not None: + data_ar[:, mask_ar] = meta["nodata"] + if os.path.exists(filepath): + kwargs = {"mode": "r+"} + else: + kwargs = { + "mode": "w", + **meta, + } + kwargs["count"] = data_ar.shape[0] + with rasterio.open(filepath, **kwargs) as dst: + dst.write(data_ar, window=write_window) + + +def read_chunk_series(limits: ChunkLimits, rasters: List[Raster]) -> xr.Dataset: + rasters = sorted(rasters, key=lambda x: x.time_range[0], reverse=True) + ref_path = rasters[0].raster_asset.path_or_url + + with rasterio.open(ref_path) as src: + meta = src.meta + + vrt_options = { + "resampling": Resampling.bilinear, + "crs": meta["crs"], + "transform": meta["transform"], + "height": meta["height"], + "width": meta["width"], + } + + col_off, row_off, width, height = limits + s0 = row_off + e0 = row_off + height + s1 = col_off + e1 = col_off + width + res = [] + time = [] + for raster in rasters: + asset = raster.raster_asset + t = raster.time_range[0] + path = asset.path_or_url + time.append(t) + with rasterio.open(path) as src: + with WarpedVRT(src, **vrt_options) as vrt: + res.append(rio.open_rasterio(vrt, masked=True)[:, s0:e0, s1:e1]) + return xr.concat(res, xr.DataArray(time, name="time", dims="time")) + + +def get_meta( + in_path: str, + width: int, + height: int, + transform: Affine, + nodata: Optional[Union[int, float]] = None, +) -> Dict[str, Any]: + """ + Get input metadata from input raster and adjust width, height, and transform + """ + with rasterio.open(in_path) as src: + kwargs = src.meta.copy() + if nodata is not None: + kwargs["nodata"] = nodata + compression_kwargs = ( + INT_COMPRESSION_KWARGS + if np.issubdtype(src.meta["dtype"], np.integer) + else FLOAT_COMPRESSION_KWARGS + ) + kwargs.update( + { + "width": width, + "height": height, + "transform": transform, + "BIGTIFF": "IF_SAFER", + **compression_kwargs, + } + ) + return kwargs + + +def resample_raster( + in_path: str, + out_dir: str, + width: int, + height: int, + transform: Affine, + resampling: Resampling, + nodata: Optional[Union[int, float]] = None, +) -> str: + """ + Compress file and resample (if necessary) to the desired resolution + """ + kwargs = get_meta(in_path, width, height, transform, nodata) + out_path = os.path.join(out_dir, f"{gen_guid()}.tif") + with rasterio.open(in_path) as src: + with rasterio.open(out_path, "w", **kwargs) as dst: + for i in range(1, src.count + 1): + if width != src.width or height != src.height: + reproject( + source=rasterio.band(src, i), + destination=rasterio.band(dst, i), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=transform, + dst_crs=src.crs, + resampling=resampling, + ) + else: + dst.write(src.read(i), i) + + return out_path + + +def compress_raster( + src_path: str, dst_path: str, num_threads: Union[int, str] = "all_cpus", **kwargs: Any +) -> None: + """Load a tif raster and save it in compressed format""" + with rasterio.open(src_path) as src: + with rasterio.open(dst_path, "w", **src.meta, **kwargs, num_threads=num_threads) as dst: + for _, win in src.block_windows(): + dst.write(src.read(window=win), window=win) + + +def include_raster_overviews(src_path: str): + """Convert image to COG.""" + + with tempfile.TemporaryDirectory() as tmp_dir: + tmpfile_name = os.path.join(tmp_dir, "tmp_file.tif") + # Format creation option (see gdalwarp `-co` option) + output_profile = cog_profiles.get("deflate") + output_profile.update(dict(BIGTIFF="IF_SAFER")) + + # Dataset Open option (see gdalwarp `-oo` option) + config = dict( + GDAL_NUM_THREADS="ALL_CPUS", + GDAL_TIFF_OVR_BLOCKSIZE="128", + ) + + LOGGER.info("Starting raster COG translation") + cog_translate( + src_path, + tmpfile_name, + output_profile, + config=config, + in_memory=False, + quiet=True, + ) + + LOGGER.info("Finished raster COG translation") + shutil.move(tmpfile_name, src_path) + + +def get_windows(width: int, height: int, win_width: int, win_height: int): + """ + Returns non-overlapping windows that cover the raster + """ + wins = [] + for start_r in range(0, height, win_height): + for start_c in range(0, width, win_width): + end_c = min(start_c + win_width, width) + end_r = min(start_r + win_height, height) + wins.append(Window.from_slices(rows=(start_r, end_r), cols=(start_c, end_c))) + return wins + + +def parallel_stack_bands( + raster_refs: Sequence[str], + out_path: str, + num_workers: int, + block_size: Tuple[int, int], + resampling: Resampling, + timeout_s: float = 120.0, + **kwargs: Any, +): + """ + Stack bands by reading different band files and writing them into a single file. + All bands are resampled to the output CRS and affine transform. + + Arguments: + raster_refs: sequence of references for the files containing band data + out_path: output filepath + num_workers: number of threads used to read data + block_size: size of the block (width, height) that is read by each thread + resampling: rasterio resampling method used to resample band data + timeout_s: timeout in seconds for each band read operation (default: 120) + **kwargs: other keyword arguments will be used to create the output raster. + Should include things like driver, height, width, transform, crs + """ + + def read_block(raster_url: str, win: Window): + LOGGER.debug(f"Reading block {win} from {raster_url}") + with rasterio.open(raster_url) as src: + with WarpedVRT( + src, + crs=kwargs["crs"], + width=kwargs["width"], + height=kwargs["height"], + transform=kwargs["transform"], + resampling=resampling, + ) as vrt: + win_data = vrt.read(window=win) + LOGGER.debug(f"Done reading block {win} from {raster_url}") + return win_data, win + + def write_bands(raster_ref: str, wins: List[Window], band_idx: List[int], dst: DatasetWriter): + with ThreadPoolExecutor(max_workers=num_workers) as pool: + futures = [pool.submit(read_block, raster_ref, win) for win in wins] + for future in as_completed(futures, timeout=timeout_s): + try: + ar, w = future.result() + LOGGER.debug(f"Writing block {w}, bands {band_idx}, to {out_path}") + dst.write(ar, band_idx, window=w) + LOGGER.debug(f"Done writing block {w}, bands {band_idx}, to {out_path}") + except Exception as e: + LOGGER.exception(f"Exception while processing block from {raster_ref}: {e}") + raise e + + wins = [w for w in get_windows(kwargs["width"], kwargs["height"], *block_size)] + with rasterio.open(out_path, "w", **kwargs, num_threads="all_cpus") as dst: + offset = 1 + for raster_ref in raster_refs: + with rasterio.open(raster_ref) as src: + band_idx = [i + offset for i in range(src.count)] + try: + write_bands(raster_ref, wins, band_idx, dst) + offset = band_idx[-1] + 1 + except TimeoutError: + msg = f"Timeout while reading raster data from {raster_ref}" + LOGGER.exception(msg) + raise TimeoutError(msg) + + +def serial_stack_bands( + raster_refs: Sequence[str], + out_path: str, + block_size: Tuple[int, int], + resampling: Resampling, + **kwargs: Any, +): + def read_block(raster_ref: str, win: Window): + LOGGER.debug(f"Reading block {win} from {raster_ref}") + with rasterio.open(raster_ref) as src: + with WarpedVRT( + src, + crs=kwargs["crs"], + width=kwargs["width"], + height=kwargs["height"], + transform=kwargs["transform"], + resampling=resampling, + ) as vrt: + win_data = vrt.read(window=win) + LOGGER.debug(f"Done reading block {win} from {raster_ref}") + return win_data + + def write_bands(raster_ref: str, wins: List[Window], band_idx: List[int], dst: DatasetWriter): + for w in wins: + try: + ar = read_block(raster_ref, w) + LOGGER.debug(f"Writing block {w}, bands {band_idx}, to {out_path}") + dst.write(ar, band_idx, window=w) + LOGGER.debug(f"Done writing block {w}, bands {band_idx}, to {out_path}") + except Exception as e: + LOGGER.exception(f"Exception while processing block from {raster_ref}: {e}") + raise e + + with rasterio.open(out_path, "w", **kwargs, num_threads="all_cpus") as dst: + offset = 1 + wins = [w for w in get_windows(kwargs["width"], kwargs["height"], *block_size)] + for raster_ref in raster_refs: + with rasterio.open(raster_ref) as src: + band_idx = [i + offset for i in range(src.count)] + write_bands(raster_ref, wins, band_idx, dst) + offset = band_idx[-1] + 1 + + +def write_to_raster(data: NDArray[Any], tr: Affine, raster_path: str, raster_crs: CRS) -> AssetVibe: + with rasterio.open( + raster_path, + "w", + driver="GTiff", + transform=tr, + dtype=rasterio.float32, + count=1, + width=data.shape[1], + height=data.shape[0], + crs=raster_crs, + ) as dst: + dst.write(data, indexes=1) + return AssetVibe(reference=raster_path, type="image/tiff", id=gen_guid()) diff --git a/src/vibe_lib/vibe_lib/segment_anything.py b/src/vibe_lib/vibe_lib/segment_anything.py new file mode 100644 index 00000000..648acb74 --- /dev/null +++ b/src/vibe_lib/vibe_lib/segment_anything.py @@ -0,0 +1,646 @@ +import logging +from itertools import product +from math import ceil +from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast + +import geopandas as gpd +import numpy as np +import onnxruntime as ort +import shapely.geometry as shpg +import torch +from geopandas import GeoDataFrame +from numpy.typing import NDArray +from rasterio import Affine +from shapely.geometry.base import BaseGeometry +from torchvision.transforms.functional import resize + +from vibe_core.data import GeometryCollection, Raster +from vibe_core.data.core_types import BBox, Point +from vibe_lib.spaceeye.chip import ChipDataset, Dims, Window + +LOGGER = logging.getLogger(__name__) + +SAM_CHIP_SIZE = 1024 +SAM_PIXEL_RGB_MEAN = [123.675, 116.28, 103.53] +SAM_PIXEL_RGB_STD = [58.395, 57.12, 57.375] +BACKGROUND_VALUE = 0 +MASK_LOGIT_THRESHOLD = 0.0 + +Prompt = Tuple[Union[Point, BBox], int] + + +# +# PROMPT VALIDATION and PREPROCESSING METHODS +# + + +def is_valid_prompt(prompt: List[Prompt], n_original_fg_pnts: int) -> bool: + """Check if prompt is valid for SAM inference. + + Valid prompts within a chip: + - Prompt contains at least one foreground point (with or without bbox). + - Prompt contains bbox without foreground points in the original prompt group/id. + + Args: + prompt: List of prompts. + n_original_fg_pnts: Number of original foreground points in the prompt group/id. + + Returns: + True if prompt is valid, False otherwise. + """ + if prompt: + pts_in_chip = [p for p in prompt if len(p[0]) == 2] + bbox_in_chip = [p for p in prompt if len(p[0]) == 4] + return (1 in [p[1] for p in pts_in_chip]) or ( + len(bbox_in_chip) > 0 and n_original_fg_pnts == 0 + ) + return False + + +def validate_prompt_geometry_collection(prompt_df: GeoDataFrame, roi: shpg.Polygon): + """Validate a GeoDataFrame representing a geometry collection with points or bbox as prompts. + + Args: + prompt_df: GeoDataFrame with columns 'prompt_id', 'label', and 'geometry'. + roi: Polygon representing the region of interest. + + Raises: + ValueError: If prompts are invalid. + """ + if not all(col in prompt_df.columns for col in ["prompt_id", "label", "geometry"]): + raise ValueError( + "Geometry collection must have columns 'prompt_id', 'label', and 'geometry'. " + f"Columns found: {prompt_df.columns}" + ) + + if not prompt_df.geometry.apply(lambda g: isinstance(g, (shpg.Point, shpg.Polygon))).all(): + prompt_types = list( + set( + [ + type(g) + for g in prompt_df.geometry + if not (isinstance(g, (shpg.Point, shpg.Polygon))) + ] + ) + ) + raise ValueError( + f"Expected each geometry to be a shapely Point or Polygon. Found: {prompt_types}" + ) + + prompts_within_roi = prompt_df.geometry.within(roi) + if not prompts_within_roi.all(): + prompts_outside_roi = prompt_df.geometry[~prompts_within_roi] + coords = [ + (p.x, p.y) if isinstance(p, shpg.Point) else p.bounds for p in prompts_outside_roi + ] + raise ValueError( + "Expected all prompts to be contained within the ROI of input_geometry. Prompts " + f"outside of ROI: {coords}" + ) + + if not prompt_df.prompt_id.apply(lambda i: isinstance(i, (int, str))).all(): + prompts = [i for i in prompt_df.prompt_id if not isinstance(i, (int, str))] + raise ValueError(f"Expected prompt_ids as integers or strings. Found: {prompts}") + + if not prompt_df.label.apply(lambda i: isinstance(i, int) and i in (0, 1)).all(): + raise ValueError( + "Expected labels to be integers, with 0 or 1 values. " + f"Found: {[i for i in prompt_df.label if not isinstance(i, int) or i not in (0, 1)]}" + ) + + for prompt_id, group in prompt_df.groupby("prompt_id"): + nbbox = sum([isinstance(g, shpg.Polygon) for g in group.geometry]) + if nbbox > 1: + raise ValueError( + "Expected at most one bounding box per prompt. " + f"Found {nbbox} for prompt_id '{prompt_id}'" + ) + + +def adjust_bounding_box(prompts: List[Prompt]) -> List[Prompt]: + """Adjust bounding box coordinates to contain all foreground points in the prompt + + Args: + prompts: List of prompts. + + Returns: + Adjusted list of prompts. + """ + bbox = [p for p in prompts if len(p[0]) == 4] + foreground_points = [point for point, label in prompts if len(point) == 2 and label == 1] + if not bbox or not foreground_points: + return prompts + + bbox_coords, bbox_label = bbox[0] + xmin, ymin, xmax, ymax = cast(BBox, bbox_coords) + + x_pts, y_pts = zip(*foreground_points) + + xmin, xmax = np.min([xmin, np.min(x_pts)]), np.max([xmax, np.max(x_pts)]) + ymin, ymax = np.min([ymin, np.min(y_pts)]), np.max([ymax, np.max(y_pts)]) + + adjusted_prompts = [cast(Prompt, ((xmin, ymin, xmax, ymax), bbox_label))] + [ + p for p in prompts if len(p[0]) == 2 + ] + + return adjusted_prompts + + +def convert_coords_to_pixel_position( + geometry: Union[shpg.Point, shpg.Polygon], transform: Affine +) -> Union[Point, BBox]: + """Convert point/bbox coordinates to pixel position. + + If bounding box, returns the pixel positions as a tuple of (xmin, ymin, xmax, ymax), + as expected by SAM. + + Args: + geometry: Point or Polygon geometry. + transform: Affine transformation matrix. + + Returns: + Coordinates in pixel position. + + Raises: + ValueError: If geometry is not a Point or Polygon. + """ + + if isinstance(geometry, shpg.Point): + return ~transform * (geometry.x, geometry.y) # type: ignore + elif isinstance(geometry, shpg.Polygon): + bounds = geometry.bounds + pixel_pos = ~transform * bounds[:2] + ~transform * bounds[2:] # type: ignore + xmin, xmax = sorted(pixel_pos[::2]) + ymin, ymax = sorted(pixel_pos[1::2]) + return (xmin, ymin, xmax, ymax) + else: + raise ValueError(f"Invalid prompt geometry: {geometry}") + + +def preprocess_geometry_collection( + geometry_collection: GeometryCollection, + dataset: ChipDataset, + roi_geometry: BaseGeometry, +) -> Tuple[Dict[int, List[Prompt]], Dict[int, str]]: + """Preprocess input geometry collection. + + Args: + geometry_collection: Geometry collection with prompts. + dataset: ChipDataset object. + roi_geometry: Region of interest geometry. + Returns: + Tuple of prompts and prompt mapping. + """ + prompt_df = cast( + gpd.GeoDataFrame, + gpd.read_file(geometry_collection.assets[0].path_or_url).to_crs(dataset.meta["crs"]), # type: ignore + ) + # Assert GeoDataFrame format and field values + roi_polygon = cast( + shpg.Polygon, + gpd.GeoSeries(roi_geometry, crs="epsg:4326") # type: ignore + .to_crs(dataset.crs) + .iloc[0] + .envelope, + ) + try: + validate_prompt_geometry_collection(prompt_df, roi_polygon) + except ValueError as e: + raise ValueError(f"Failed to parse prompts for segmentation. {e}") from e + + # Group by prompt_id and build tuple of transformed points and label pairs + groups = prompt_df.groupby("prompt_id") + grouped_prompts = groups.apply( + lambda x: [ + (convert_coords_to_pixel_position(geometry, dataset.transform), label) + for geometry, label in zip(x.geometry, x.label) + ] + ) + grouped_prompts = cast(Dict[Union[int, str], List[Prompt]], grouped_prompts.to_dict()) + + # Adjust bounding box to cover all points within the same prompt + grouped_prompts = { + prompt_id: adjust_bounding_box(prompts) for prompt_id, prompts in grouped_prompts.items() + } + + # Remapping prompt_ids to 0, 1, 2, ... + prompt_dict = { + new_id: cast(List[Prompt], grouped_prompts[prompt_id]) + for new_id, prompt_id in enumerate(grouped_prompts.keys()) + } + prompt_mapping = { + new_id: str(prompt_id) for new_id, prompt_id in enumerate(grouped_prompts.keys()) + } + return prompt_dict, prompt_mapping + + +def get_normalized_prompts_within_chip( + prompts: Dict[int, List[Prompt]], read_window: Window, geometry_offset: Dims +) -> Dict[int, List[Prompt]]: + """Filter and normalize prompts within chip. + + Output prompts will include only prompts within the chip with normalized coordinates relative + to the chip read window. + + Args: + prompts: Dictionary of prompts. + read_window: Chip read window. + geometry_offset: Chip geometry offset. + Returns: + Dictionary of normalized prompts. + """ + col_min = read_window.col_off - geometry_offset.width + col_max = col_min + read_window.width + + row_min = read_window.row_off - geometry_offset.height + row_max = row_min + read_window.height + + normalized_prompts = {} + for prompt_id, prompt in prompts.items(): + new_prompt, n_foreground_points = [], 0 + for coords, lb in prompt: + if len(coords) == 2: # Point + n_foreground_points += lb + x, y = cast(Point, coords) + if (col_min <= x <= col_max) and (row_min <= y <= row_max): + new_prompt.append(((x - col_min, y - row_min), lb)) + elif len(coords) == 4: # Bounding box + xmin, ymin, xmax, ymax = cast(BBox, coords) + if xmin < col_max and xmax > col_min and ymin < row_max and ymax > row_min: + xmin = max(xmin, col_min) - col_min + ymin = max(ymin, row_min) - row_min + xmax = min(xmax, col_max) - col_min + ymax = min(ymax, row_max) - row_min + new_prompt.append(((xmin, ymin, xmax, ymax), lb)) + else: + raise ValueError( + "Invalid prompt format. Expected either a point or a bounding box." + f"Got the following prompt instead: {prompt}" + ) + + if is_valid_prompt(new_prompt, n_foreground_points): + normalized_prompts[prompt_id] = new_prompt + + return normalized_prompts + + +# +# AUTOMATIC SEGMENTATION METHODS +# + + +def build_point_grid(points_per_side: int, img_size: int) -> List[Prompt]: + """Build a grid of points within the image. + + The grid is composed of points spaced evenly across the image, with a total number of points + equal to points_per_side**2. + + Args: + points_per_side: Number of points per side. + img_size: Image size. + + Returns: + List of points forming the grid. + """ + offset = img_size / (2 * points_per_side) + points_one_side = np.linspace(offset, img_size - offset, points_per_side) + grid_points = [cast(Prompt, ((x, y), 1)) for x, y in product(points_one_side, points_one_side)] + return grid_points + + +def generate_crop_boxes( + chip_size: int, n_layers: int, overlap_ratio: float = 0.0 +) -> Tuple[List[BBox], List[int]]: + """Generate a list of crop boxes of different sizes. + + Each layer has (2**i)**2 boxes for the ith layer. + + Args: + chip_size: Size of the chip. + n_layers: Number of layers. + overlap_ratio: Overlap ratio between crops. + Returns: + Tuple of crop boxes and associated layer indices. + """ + crop_boxes, layer_idxs = [], [] + + # Original chip + crop_boxes.append([0, 0, chip_size, chip_size]) + layer_idxs.append(0) + + def crop_len(orig_len: int, n_crops: int, overlap: int) -> int: + return int(ceil((overlap * (n_crops - 1) + orig_len) / n_crops)) + + for i_layer in range(n_layers): + n_crops_per_side = 2 ** (i_layer + 1) + overlap = int(overlap_ratio * chip_size * (2 / n_crops_per_side)) + + crop_w = crop_len(chip_size, n_crops_per_side, overlap) + crop_h = crop_len(chip_size, n_crops_per_side, overlap) + + crop_box_x0 = [int((crop_w - overlap) * i) for i in range(n_crops_per_side)] + crop_box_y0 = [int((crop_h - overlap) * i) for i in range(n_crops_per_side)] + + # Crops in XYWH format + for x0, y0 in product(crop_box_x0, crop_box_y0): + box = (x0, y0, min(x0 + crop_w, chip_size), min(y0 + crop_h, chip_size)) + crop_boxes.append(box) + layer_idxs.append(i_layer + 1) + + return crop_boxes, layer_idxs + + +def mask_to_bbox(mask: NDArray[Any]) -> NDArray[Any]: + """Build the bounding box of a binary mask. + + Args: + mask: Binary mask. + Returns: + Bounding box coordinates (col_min, row_min, col_max, row_max) of the mask. + """ + bbox = [] + for m in np.squeeze(mask, axis=1): + rows = np.any(m, axis=1) + cols = np.any(m, axis=0) + rmin, rmax = np.where(rows)[0][[0, -1]] + cmin, cmax = np.where(cols)[0][[0, -1]] + bbox.append([cmin, rmin, cmax, rmax]) + return np.array(bbox, dtype=np.float32) + + +def translate_bbox(mask_bbox: NDArray[Any], x_offset: float, y_offset: float) -> NDArray[Any]: + """Translate a mask bounding box by an offset. + + Args: + mask_bbox: Mask bounding box. + x_offset: X offset. + y_offset: Y offset. + Returns: + Translated bounding box. + """ + offset = [[x_offset, y_offset, x_offset, y_offset]] + return mask_bbox + offset + + +def uncrop_masks( + mask: NDArray[Any], mask_bbox: NDArray[Any], crop_box: BBox, chip_size: int +) -> Tuple[NDArray[Any], NDArray[Any]]: + """Translate and scale a mask from a crop to the original chip size. + + Args: + mask: Binary mask. + mask_bbox: Bounding box of the mask. + crop_box: Crop box. + chip_size: Chip size. + Returns: + Tuple of translated mask and bounding box numpy arrays. + """ + x0, y0, x1, y1 = map(int, crop_box) + crop_width = x1 - x0 + crop_height = y1 - y0 + resized_mask = cast( + torch.Tensor, + resize(torch.from_numpy(mask), size=[crop_height, crop_width]), + ) + pad_x, pad_y = chip_size - crop_width, chip_size - crop_height + pad = (x0, pad_x - x0, y0, pad_y - y0) + + mask = torch.nn.functional.pad(resized_mask, pad, value=0).numpy() + + scale_x, scale_y = crop_width / chip_size, crop_height / chip_size + mask_bbox = mask_bbox.astype(np.float64) * np.array([scale_y, scale_x, scale_y, scale_x]) + return mask, np.round(mask_bbox).astype(np.float32) + + +def calculate_stability_score( + masks: NDArray[Any], mask_threshold: float, threshold_offset: float +) -> NDArray[Any]: + """Compute the stability score for a batch of masks. + + The stability score is the IoU between the binary masks obtained by thresholding + the predicted mask logits at high and low values. + + Args: + masks: Mask logits. + mask_threshold: Mask threshold. + threshold_offset: Threshold offset. + + Returns: + Stability score. + """ + intersections = np.sum(masks > (mask_threshold + threshold_offset), axis=(2, 3)) + unions = np.sum(masks > (mask_threshold - threshold_offset), axis=(2, 3)) + return intersections / unions + + +# +# ENCODER/DECODER PREPROCESSING +# + + +def build_chip_preprocessing_operation( + raster: Raster, + band_names: Optional[List[str]], + band_scaling: Optional[List[float]], + band_offset: Optional[List[float]], +) -> Callable[[NDArray[Any]], NDArray[Any]]: + if band_names: + if len(band_names) == 1: + LOGGER.info( + "Got only a single band name. " + "Will replicate it to build a 3-channeled chip for SAM." + ) + band_names = band_names * 3 + elif len(band_names) != 3: + raise ValueError( + f"Invalid number of bands. Expected one or three band names. Got {band_names}" + ) + else: + LOGGER.info("No bands selected. Using ['R', 'G', 'B']") + band_names = ["R", "G", "B"] + + if not all([b in raster.bands for b in band_names]): + raise ValueError( + f"Band not found in input raster. Expected band names {band_names} " + f"to be among raster bands {list(raster.bands.keys())}" + ) + band_idx = [raster.bands[b] for b in band_names] + + if band_scaling: + if len(band_scaling) == 1: + LOGGER.info("Got a single scaling parameter. Will use it for all bands.") + band_scaling = band_scaling * 3 + elif len(band_scaling) != len(band_names): + raise ValueError(f"Expected one or three scaling parameters. Got {band_scaling}") + else: + band_scaling = [raster.scale] * 3 + scale = np.array(band_scaling).reshape(1, 3, 1, 1) + + if band_offset: + if len(band_offset) == 1: + LOGGER.info("Got a single offset parameter. Will use it for all bands.") + band_offset = band_offset * 3 + elif len(band_offset) != len(band_names): + raise ValueError(f"Expected one or three offset parameters. Got {band_offset}") + else: + band_offset = [raster.offset] * 3 + offset = np.array(band_offset).reshape(1, 3, 1, 1) + + def preprocessing_operation(chip: NDArray[Any]) -> NDArray[Any]: + normalized_chip = chip[:, band_idx, :, :] * scale + offset + if np.min(normalized_chip) >= 0 and np.max(normalized_chip) <= 1: + normalized_chip = normalized_chip * 255.0 + return normalized_chip.astype(np.float32) + + return preprocessing_operation + + +def img_encoder_preprocess( + chip: NDArray[Any], preprocessing_operation: Callable[[NDArray[Any]], NDArray[Any]] +) -> NDArray[Any]: + """Preprocesses the input chip for the image encoder model. + + Args: + chip: Input chip. + preprocessing_operation: Preprocessing function (depending on the chip type). + + Returns: + Preprocessed chip. + """ + processed_chip = preprocessing_operation(chip) + input_tensor = torch.from_numpy(processed_chip.clip(0, 255)) + + # Normalizing input tensor by subtracting pixel mean and dividing by pixel std + pixel_mean = torch.Tensor(SAM_PIXEL_RGB_MEAN).view(-1, 1, 1) + pixel_std = torch.Tensor(SAM_PIXEL_RGB_STD).view(-1, 1, 1) + x = (input_tensor - pixel_mean) / pixel_std + return x.numpy() + + +def prompt_encoder_preprocess( + prompt: List[Prompt], +) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: + """Preprocesses the input prompt to the expected decoder format. + + Args: + prompt: List of prompts. + + Returns: + Tuple of preprocessed coordinates and labels. + """ + point_prompt = [p for p in prompt if len(p[0]) == 2] + bbox_prompt = [p for p in prompt if len(p[0]) == 4] + + if point_prompt: + coords, labels = zip(*point_prompt) + point_batch, point_label = np.array(coords), np.array(labels) + else: + point_batch, point_label = None, None + + if bbox_prompt: + coords, _ = zip(*bbox_prompt) + bbox_batch = np.array(coords).reshape(2, 2) + bbox_label = np.array([2, 3]) + else: # Padding with dummy bbox + bbox_batch = np.array([[0.0, 0.0]]) + bbox_label = np.array([-1]) + + onnx_coord = ( + np.concatenate([point_batch, bbox_batch], axis=0)[None, :, :].astype(np.float32) + if point_batch is not None + else bbox_batch[None, :, :].astype(np.float32) + ) + onnx_label = ( + np.concatenate([point_label, bbox_label], axis=0)[None, :].astype(np.float32) + if point_label is not None + else bbox_label[None, :].astype(np.float32) + ) + + return onnx_coord, onnx_label + + +def batch_prompt_encoder_preprocess( + prompt_group: List[List[Prompt]], +) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: + """Preprocesses a batch of prompts for the encoder model. + + Args: + prompt_group: List of prompt groups. + Returns: + Tuple of preprocessed coordinates and labels. + """ + processed_prompts = [prompt_encoder_preprocess(p) for p in prompt_group] + + onnx_coord = np.concatenate([p[0] for p in processed_prompts], axis=0) + onnx_label = np.concatenate([p[1] for p in processed_prompts], axis=0) + + return onnx_coord, onnx_label + + +def mask_encoder_preprocess( + input_mask: Optional[NDArray[Any]] = None, +) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: + """Preprocess the input mask for the encoder model. + + Args: + input_mask: Input mask. + Returns: + Tuple of preprocessed mask and has_mask inputs. + """ + if not input_mask: + onnx_mask_input = np.zeros((1, 1, 256, 256), dtype=np.float32) + onnx_has_mask_input = np.zeros(1, dtype=np.float32) + return onnx_mask_input, onnx_has_mask_input + + # TODO: Implement mask preprocessing if passed as argument + # input_mask = ... + return input_mask, np.ones(1, dtype=np.float32) + + +# +# POSTPROCESSING +# + + +def get_mask_within_bbox(mask: NDArray[Any], prompt: List[Prompt]) -> NDArray[Any]: + """Filter input mask pixels only for those within the bounding box of the prompt (if any). + + Args: + mask: Input mask. + prompt: List of prompts. + Returns: + Mask filtered within the bounding box of the prompt. + """ + bbox = [coords for coords, _ in prompt if len(coords) == 4] + if bbox: + xmin, ymin, xmax, ymax = cast(BBox, bbox[0]) + bbox_mask = np.full(mask.shape, False) + bbox_mask[ + 0, 0, int(round(ymin)) : int(round(ymax)), int(round(xmin)) : int(round(xmax)) + ] = True + return np.logical_and(mask, bbox_mask) + return mask + + +# +# ONNX RUNTIME METHODS +# + + +def extract_img_embeddings_from_chip( + chip_data: NDArray[Any], + preprocessing_operation: Callable[[NDArray[Any]], NDArray[Any]], + encoder: ort.InferenceSession, +) -> NDArray[Any]: + """Extract image embeddings from a chip using the encoder model. + + Args: + chip_data: Input chip data. + preprocessing_operation: Preprocessing operation for the chip. + encoder: ONNX encoder model. + Returns: + Image embeddings. + """ + model_input = img_encoder_preprocess(chip_data, preprocessing_operation) + model_output = encoder.run(None, {encoder.get_inputs()[0].name: model_input})[0] + return model_output diff --git a/src/vibe_lib/vibe_lib/shapefile.py b/src/vibe_lib/vibe_lib/shapefile.py new file mode 100644 index 00000000..57a4ae47 --- /dev/null +++ b/src/vibe_lib/vibe_lib/shapefile.py @@ -0,0 +1,48 @@ +import os +from typing import Any + +import geopandas as gpd +import numpy as np +from numpy.typing import NDArray +from rasterio import Affine +from rasterio.crs import CRS +from rasterio.features import shapes +from shapely import geometry as shpg + +from vibe_core.data.core_types import AssetVibe, gen_guid +from vibe_lib.archive import create_flat_archive +from vibe_lib.geometry import SimplifyBy + + +def write_shapefile( + data: NDArray[Any], + input_crs: CRS, + tr: Affine, + mask1: NDArray[Any], + path: str, + simplify: str, + tolerance: float, + file_name: str, + output_crs: int = 4326, +) -> AssetVibe: + clusters = np.unique(data) + data1 = data * mask1.astype(np.uint16) + + for segment in clusters: + cluster = data1 == segment + df_shapes = gpd.GeoSeries( + [shpg.shape(s) for s, _ in shapes(data1.astype(np.uint16), mask=cluster, transform=tr)], + crs=input_crs, + ) # type: ignore + cluster_path = os.path.join(path, f"{file_name}{segment}.shp") + + if simplify == SimplifyBy.simplify: + df_shapes.simplify(tolerance).to_crs(output_crs).to_file(cluster_path) + elif simplify == SimplifyBy.convex: + df_shapes.convex_hull.to_file(cluster_path) + else: + df_shapes.to_file(cluster_path) + + # Create zip archive containing all output + archive_path = create_flat_archive(path, "result") + return AssetVibe(reference=archive_path, type="application/zip", id=gen_guid()) diff --git a/src/vibe_lib/vibe_lib/spaceeye/__init__.py b/src/vibe_lib/vibe_lib/spaceeye/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/vibe_lib/vibe_lib/spaceeye/chip.py b/src/vibe_lib/vibe_lib/spaceeye/chip.py new file mode 100644 index 00000000..89ff19c2 --- /dev/null +++ b/src/vibe_lib/vibe_lib/spaceeye/chip.py @@ -0,0 +1,428 @@ +""" +This module contains code for running a pytorch module in chips extracted from +rasters. Chips are read from disk before inference and predictions are written +to disk as they are computed. +""" + +import logging +import os +from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union, cast, overload + +import geopandas as gpd +import numpy as np +import onnxruntime as ort +import rasterio +from numpy.typing import NDArray +from rasterio import Affine +from rasterio.mask import raster_geometry_mask +from rasterio.windows import Window +from rasterio.windows import bounds as window_bounds +from rasterio.windows import transform as window_transform +from shapely import geometry as shpg +from shapely.geometry.base import BaseGeometry +from torch.utils.data import DataLoader, Dataset + +from vibe_core.data import Raster +from vibe_core.data.rasters import RasterChunk + +from ..raster import MaskedArrayType, write_window_to_file +from .dataset import Dims, get_read_windows, get_write_windows + +LOGGER = logging.getLogger(__name__) +T = TypeVar("T", bound=Raster) + +ChipDataType = Tuple[NDArray[Any], NDArray[Any], Dict[str, Any]] + +EPS = 1e-6 + + +def affine_all_close(tr1: Affine, tr2: Affine, rel_tol: float = EPS) -> bool: + return all(abs((a - b) / (a + b + EPS)) < rel_tol for a, b in zip(tr1, tr2)) + + +class InMemoryReader: + def __init__(self, downsampling: int): + self.rasters = {} + self.downsampling = downsampling + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + + def _cache_raster(self, raster: Raster): + """ + Read the whole raster and keep it in memory for subsequent windows + """ + + self.logger.debug(f"Loading raster id={raster.id} into memory") + # Read the whole raster and keep it in memory + with rasterio.open(raster.raster_asset.url) as src: + ds_shape = (src.height // self.downsampling, src.width // self.downsampling) + raster_data = src.read(out_shape=ds_shape) + self.rasters[raster.id] = { + "data": raster_data, + "meta": src.meta, + } + self.logger.debug( + f"Loaded raster id={raster.id} into memory as array of shape " + f"{raster_data.shape} and dtype {raster_data.dtype}" + ) + + def _adjust_window(self, window: Window): + """Adjust window to downsampled raster""" + win = Window(*(i // self.downsampling for i in window.flatten())) + return win + + def _read_data_from_cache(self, raster: Raster, window: Window): + if raster.id not in self.rasters: + self._cache_raster(raster) + # Adjust window to downsampled raster + win = self._adjust_window(window) + i, j = win.toslices() + raster_cache = self.rasters[raster.id] + x = raster_cache["data"][:, i, j] + return x.astype(np.float32), x == raster_cache["meta"]["nodata"] + + def __call__(self, raster: Raster, window: Window, out_shape: Tuple[int, int]): + win_data, win_mask = self._read_data_from_cache(raster, window) + if win_data.shape[1:] != out_shape: + raise ValueError( + f"Requested output shape {out_shape}, got {win_data.shape[1:]} " + f"for downsampling {self.downsampling}" + ) + return win_data, win_mask + + +class ChipDataset(Dataset[ChipDataType]): + """ + Pytorch dataset that load chips of data for model inference. + + This dataset can be used with a pytorch DataLoader to load data as needed and + avoid loading the whole raster into memory. Will optionally downsample the + input to reduce computation requirements. + """ + + def __init__( + self, + rasters: List[T], + chip_size: Dims, + step_size: Dims, + downsampling: int = 1, + nodata: Optional[float] = None, + geometry_or_chunk: Optional[Union[BaseGeometry, RasterChunk]] = None, + reader: Optional[ + Callable[[T, Window, Tuple[int, int]], Tuple[NDArray[Any], NDArray[Any]]] + ] = None, + dtype: str = "float32", + ): + self.rasters = rasters + self.chip_size = Dims(*chip_size) + self.step_size = Dims(*step_size) + self.downsampling = downsampling + self.read_chip = Dims( + chip_size.width * downsampling, chip_size.height * downsampling, chip_size.time + ) + self.read_step = Dims( + step_size.width * downsampling, step_size.height * downsampling, step_size.time + ) + self.reader = reader if reader is not None else self._default_reader + + self._read_meta(rasters[0].raster_asset.url, geometry_or_chunk, nodata) + + self.out_width = self.width // self.downsampling + self.out_height = self.height // self.downsampling + self.out_transform = self.transform * Affine.scale(self.downsampling, self.downsampling) + + self.read_windows = get_read_windows( + self.width, self.height, len(self.rasters), self.read_chip, self.read_step, self.offset + ) + self.write_windows, self.chip_slices = get_write_windows( + self.out_width, self.out_height, len(self.rasters), self.chip_size, self.step_size + ) + + self.meta = { + "driver": "GTiff", + "height": self.out_height, + "width": self.out_width, + "crs": self.crs, + "dtype": dtype, + "transform": self.out_transform, + "nodata": self.nodata, + } + + def _adjust_roi_window(self, window: Window) -> Window: + width = self.chip_size.width + height = self.chip_size.height + if window.width >= width and window.height >= height: + return window + width = max(window.width, width) + height = max(window.height, height) + LOGGER.warning( + f"RoI has dimensions {window.width, window.height} and chip size is {self.chip_size}," + f" adjusting to {width, height}" + ) + diff_w = width - window.width + dw = diff_w // 2 + diff_h = height - window.height + dh = diff_h // 2 + + hs, ws = window.toranges() + min_w = max(ws[0] - dw, 0) + max_w = min(ws[1] + diff_w - dw, self.raster_width) + min_h = max(hs[0] - dh, 0) + max_h = min(hs[1] + diff_h - dh, self.raster_height) + + new_win = Window.from_slices((min_h, max_h), (min_w, max_w)) + LOGGER.info(f"Adjusting from {window} to {new_win}") + return new_win + + def __len__(self): + return len(self.read_windows) + + def _read_meta( + self, + url: str, + geometry_or_chunk: Optional[Union[BaseGeometry, RasterChunk]] = None, + nodata: Optional[float] = None, + ): + with rasterio.open(url) as src: + self.crs = src.crs + self.raster_width: int = src.width + self.raster_height: int = src.height + self.nodata = src.nodata if nodata is None else nodata + if geometry_or_chunk and isinstance(geometry_or_chunk, BaseGeometry): + # Compute envelope in native CRS to avoid nodata + box = cast( + shpg.Polygon, + gpd.GeoSeries(geometry_or_chunk, crs="epsg:4326") + .to_crs(self.crs) + .iloc[0] + .envelope, + ) + window = cast( + Window, raster_geometry_mask(src, [box], all_touched=True, crop=True)[2] + ) + # Adjust window to make sure it is not too small + window = self._adjust_roi_window(window) + # Compute the transform with the adjusted window + self.transform: Affine = window_transform(window, src.transform) + self.roi_window = window + self.width: int = window.width + self.height: int = window.height + self.offset = Dims(window.col_off, window.row_off, 0) + elif geometry_or_chunk and isinstance(geometry_or_chunk, RasterChunk): + col_off, row_off, width, height = geometry_or_chunk.limits + self.transform: Affine = src.transform + self.width: int = width + self.height: int = height + self.offset = Dims(col_off, row_off, 0) + self.roi_window = Window(*geometry_or_chunk.limits) # type:ignore + box = window_bounds(self.roi_window, self.transform) + else: + box = shpg.box(*src.bounds) + self.transform: Affine = src.transform + self.width: int = src.width + self.height: int = src.height + self.offset = Dims(0, 0, 0) + self.roi_window = Window(0, 0, src.width, src.height) # type:ignore + self.roi = box + + @staticmethod + def _default_reader( + raster: Raster, window: Window, out_shape: Tuple[int, int] + ) -> Tuple[NDArray[np.float32], NDArray[np.bool_]]: + with rasterio.open(raster.raster_asset.url) as src: + x = src.read(window=window, out_shape=out_shape, masked=True).astype(np.float32) + x = cast(MaskedArrayType, x) + return x.data, np.ma.getmaskarray(x) + + def __getitem__(self, idx: int) -> ChipDataType: + read_window, read_times = self.read_windows[idx] + write_window, write_times = self.write_windows[idx] + chip_slices = self.chip_slices[idx] + # Squeeze to remove singleton dimension if time chip_size is 1 + data = [ + self.reader(self.rasters[i], read_window, self.chip_size[:2]) + for i in range(*read_times) + ] + data, mask = (np.squeeze(np.stack(x)) for x in zip(*data)) + + write_info = { + "write_window": write_window, + "write_times": write_times, + "chip_slices": chip_slices, + "meta": self.meta, + } + return data, mask, write_info + + def get_filename(self, idx: int): + return f"pred_{idx}.tif" + + +class StackOnChannelsChipDataset(ChipDataset): + def __init__( + self, + rasters: List[List[T]], + chip_size: Dims, + step_size: Dims, + downsampling: int = 1, + nodata: Optional[float] = None, + geometry_or_chunk: Optional[Union[BaseGeometry, RasterChunk]] = None, + reader: Optional[ + Callable[[T, Window, Tuple[int, int]], Tuple[NDArray[Any], NDArray[Any]]] + ] = None, + ): + super().__init__( + rasters[0], chip_size, step_size, downsampling, nodata, geometry_or_chunk, reader + ) + self.datasets = [ + ChipDataset(r, chip_size, step_size, downsampling, nodata, geometry_or_chunk, reader) + for r in rasters + ] + for attr in ("width", "height", "crs", "transform"): + for d in self.datasets: + ref_attr = getattr(self, attr) + comp_attr = getattr(d, attr) + if (attr == "transform" and not affine_all_close(ref_attr, comp_attr)) or ( + attr != "transform" and ref_attr != comp_attr + ): + raise ValueError( + f"Expected '{attr}' to be the same for all datasets, found " + f"{ref_attr} != {comp_attr}" + ) + + def __getitem__(self, idx: int) -> ChipDataType: + # Convert sequence of tuples to tuple of sequences + # (d, i), (d, i), (d, i) -> (d, d, d), (i, i, i) + chip_data, chip_mask, chip_info = zip(*(d[idx] for d in self.datasets)) + chip_data = cast(List[NDArray[Any]], chip_data) + chip_mask = cast(List[NDArray[Any]], chip_mask) + chip_info = cast(List[Dict[str, str]], chip_info) + assert all( + chip_info[0][k] == c[k] + for c in chip_info + for k in ("write_window", "write_times", "chip_slices") + ) + chip_data = np.concatenate([c[None] if c.ndim == 2 else c for c in chip_data]) + chip_mask = np.concatenate([c[None] if c.ndim == 2 else c for c in chip_mask]) + return chip_data, chip_mask, chip_info[0] + + +def custom_collate( + samples: List[ChipDataType], +) -> Tuple[NDArray[Any], NDArray[Any], List[Dict[str, Any]]]: + """Custom function for joining samples from `ChipDataset` into a batch""" + chip_data, chip_mask, write_info = zip(*samples) + chip_data = cast(List[NDArray[Any]], chip_data) + chip_mask = cast(List[NDArray[Any]], chip_mask) + write_info = cast(List[Dict[str, Any]], write_info) + return collate_data(chip_data), collate_data(chip_mask), write_info + + +@overload +def collate_data(data: List[NDArray[Any]]) -> NDArray[Any]: ... + + +@overload +def collate_data(data: Dict[Any, NDArray[Any]]) -> Dict[Any, NDArray[Any]]: ... + + +@overload +def collate_data(data: NDArray[Any]) -> NDArray[Any]: ... + + +def collate_data( + data: Union[List[NDArray[Any]], Dict[Any, NDArray[Any]], NDArray[Any]], +) -> Union[Dict[Any, NDArray[Any]], NDArray[Any]]: + if isinstance(data, dict): + return {k: collate_data(v) for k, v in data.items()} + if isinstance(data, (list, tuple)): + if isinstance(data[0], np.ndarray): + return np.stack(data) + if isinstance(data, np.ndarray): + return data + + raise ValueError(f"Invalid type {type(data)} for collate function.") + + +def get_loader( + dataset: ChipDataset, + batch_size: int, + num_workers: int = 1, + collate_fn: Callable[ + [List[ChipDataType]], Tuple[NDArray[Any], NDArray[Any], List[Dict[str, Any]]] + ] = custom_collate, +): + return DataLoader( + dataset, + batch_size=batch_size, + collate_fn=collate_fn, # type: ignore + num_workers=num_workers, + ) + + +def predict_chips( + model: ort.InferenceSession, + dataloader: DataLoader[ChipDataType], + out_dir: str, + skip_nodata: bool, + pre_process: Callable[[NDArray[Any], NDArray[Any]], NDArray[Any]] = lambda x, _: x, + post_process: Callable[[NDArray[Any], NDArray[Any], NDArray[Any]], NDArray[Any]] = lambda *x: x[ + -1 + ], +) -> List[str]: + """ + Function to extract chips, compute model predictions, and save to disk. + + Optionally accepts `pre_process` and `post_process` functions which are + called before and after model predictions, respectively. + """ + filepaths: List[str] = [] + dataset = cast(ChipDataset, dataloader.dataset) + get_filename = dataset.get_filename + out_shape: Optional[Tuple[int, ...]] = None + for batch_idx, batch in enumerate(dataloader): + LOGGER.info(f"Running model for batch ({batch_idx + 1}/{len(dataloader)})") + chip_data, chip_mask, write_info_list = batch + if skip_nodata and chip_mask.all(): + if out_shape is None: + # Run the model to get the output shape + model_inputs = pre_process(chip_data, chip_mask) + out_shape = model.run(None, {model.get_inputs()[0].name: model_inputs})[0].shape[1:] + LOGGER.info(f"Skipping batch of nodata ({batch_idx+1})") + assert out_shape is not None + model_out = dataset.nodata * np.ones((chip_data.shape[0], *out_shape)) + else: + model_inputs = pre_process(chip_data, chip_mask) + model_out = model.run(None, {model.get_inputs()[0].name: model_inputs})[0] + out_shape = model_out.shape[1:] # ignore batch size + post_out = post_process(chip_data, chip_mask, model_out) + write_prediction_to_file( + post_out, chip_mask, write_info_list, out_dir, filepaths, get_filename + ) + return filepaths + + +def write_prediction_to_file( + chip_data: NDArray[Any], + chip_mask: NDArray[Any], + write_info_list: List[Dict[str, Any]], + out_dir: str, + filepaths: List[str], + get_filename: Callable[[int], str], +): + for out, mask, write_info in zip(chip_data, chip_mask, write_info_list): + if out.ndim == 3: + out = out[None] # Create singleton time dimension if necessary + if mask.ndim == 3: + mask = mask[None] + chip_times, chip_rows, chip_cols = write_info["chip_slices"] + for write_t, chip_t in zip(range(*write_info["write_times"]), range(*chip_times)): + filename = get_filename(write_t) + filepath = os.path.join(out_dir, filename) + if filepath not in filepaths: + filepaths.append(filepath) + write_window_to_file( + out[chip_t, :, slice(*chip_rows), slice(*chip_cols)], + mask[chip_t, :, slice(*chip_rows), slice(*chip_cols)].any(axis=0), + write_info["write_window"], + filepath, + write_info["meta"], + ) diff --git a/src/vibe_lib/vibe_lib/spaceeye/dataset.py b/src/vibe_lib/vibe_lib/spaceeye/dataset.py new file mode 100644 index 00000000..88e174d7 --- /dev/null +++ b/src/vibe_lib/vibe_lib/spaceeye/dataset.py @@ -0,0 +1,501 @@ +""" +Module for generating inputs for the SpaceEye model. + +It includes code for splitting the RoI into chips of adequate size, loading and +normalizing Sentinel 1 and 2 data, doing illuminance normalization, and +generating the windows for writing predictions to file. + +The main idea is that we only load the necessary data to perform inference, and +write predictions to disk as they are done, to avoid loading the whole thing +into memory. +""" + +import logging +from datetime import datetime, timedelta +from typing import Any, Callable, Dict, List, NamedTuple, Optional, Tuple, TypeVar, Union, cast + +import geopandas as gpd +import numpy as np +import rasterio +from numpy.typing import NDArray +from rasterio import Affine +from rasterio.mask import raster_geometry_mask +from rasterio.windows import Window +from rasterio.windows import transform as window_transform +from shapely.geometry.base import BaseGeometry +from torch.utils.data import Dataset + +from vibe_core.data import Sentinel1Raster, Sentinel2Raster +from vibe_core.data.core_types import AssetVibe +from vibe_core.data.sentinel import ( + Sentinel1RasterTileSequence, + Sentinel2CloudMaskTileSequence, + Sentinel2RasterTileSequence, +) + +from .illumination import interpolate_illuminance, masked_average_illuminance +from .utils import QUANTIFICATION_VALUE + +EPS = 1e-10 +LOGGER = logging.getLogger(__name__) + + +class Dims(NamedTuple): + width: int + height: int + time: int + + +Interval = Tuple[int, int] + +DatasetReturnType = Tuple[ + Dict[str, NDArray[Any]], + Dict[str, Union[Window, Interval, Tuple[Interval, Interval, Interval]]], +] + +TileSequenceData = Union[ + Sentinel1RasterTileSequence, + Sentinel2RasterTileSequence, + Sentinel2CloudMaskTileSequence, +] + +T = TypeVar("T", Sentinel1Raster, Sentinel2Raster) +NDArrayInt = NDArray[np.int_] + + +def get_read_intervals( + dim_size: int, chip_size: int, step: int, offset: int +) -> Tuple[NDArrayInt, NDArrayInt]: + """ + Divide total dim size in intervals by using an approximate step + Actual step is computed by rounding the step so that the number of windows + is the rounded number of windows with the desired step + """ + if dim_size < chip_size: + raise ValueError( + f"{dim_size=} cannot be smaller than {chip_size=}. " + "Please consider reducing the step/chip size or increasing the input geometry." + ) + + # Effects of using round versus ceil for determining step size: + # With round: + # This number of blocks should have the step be at most 1.5x the original step + # Which should only happen when the chip size is quite big compared to the dimension size + # With ceil: step size should be at most the chosen step + num_blocks = int(np.ceil((dim_size - chip_size) / step)) + 1 + # Make sure we capture the whole area if dim_size is barely larger + if dim_size > chip_size: + num_blocks = max(num_blocks, 2) + start = np.round(np.linspace(0, dim_size - chip_size, num_blocks)).astype(int) + end = np.clip(start + chip_size, 0, dim_size) + assert end[-1] == dim_size, f"{end[-1]=} != {dim_size}" + return start + offset, end + offset + + +def get_write_intervals( + dim_size: int, chip_size: int, step: int, offset: int +) -> Tuple[Tuple[NDArrayInt, NDArrayInt], Tuple[NDArrayInt, NDArrayInt]]: + """ + Divide total dim size in non-overlapping intervals which divide the overlap + sections according to proximity to the center of the interval + """ + read_start, read_end = get_read_intervals(dim_size, chip_size, step, offset) + edges = np.concatenate((read_start[:1], (read_end[:-1] + read_start[1:]) // 2, read_end[-1:])) + write_start = edges[:-1].astype(int) + write_end = edges[1:].astype(int) + chip_start = write_start - read_start + chip_end = write_end - read_start + return (write_start, write_end), (chip_start, chip_end) + + +def get_read_windows( + width: int, height: int, time_length: int, chip_size: Dims, step: Dims, offset: Dims +) -> List[Tuple[Window, Interval]]: + """ + Generate read windows for a tensor with width, height, and time_length. + The windows are generated according to chip_size, step and offset (for all three dimensions). + The offset is used to start the first read window in the RoI boundary. + """ + return [ + ( + Window.from_slices(rows, cols), + time, + ) + for time in zip(*get_read_intervals(time_length, chip_size.time, step.time, offset.time)) + for rows in zip(*get_read_intervals(height, chip_size.height, step.height, offset.height)) + for cols in zip(*get_read_intervals(width, chip_size.width, step.width, offset.width)) + ] + + +def get_write_windows( + width: int, height: int, time_length: int, chip_size: Dims, step: Dims +) -> Tuple[List[Tuple[Window, Interval]], List[Tuple[Interval, Interval, Interval]]]: + """ + Generate write windows for a tensor with width, height, and time_length. + The windows are generated according to chip_size and step (for all three dimensions). + """ + col_intervals, chip_col_intervals = get_write_intervals(width, chip_size.width, step.width, 0) + row_intervals, chip_row_intervals = get_write_intervals( + height, chip_size.height, step.height, 0 + ) + time_intervals, chip_time_intervals = get_write_intervals( + time_length, chip_size.time, step.time, 0 + ) + return ( + [ + ( + Window.from_slices(rows, cols), + time, + ) + for time in zip(*time_intervals) + for rows in zip(*row_intervals) + for cols in zip(*col_intervals) + ], + [ + (chip_time, chip_rows, chip_cols) + for chip_time in zip(*chip_time_intervals) + for chip_rows in zip(*chip_row_intervals) + for chip_cols in zip(*chip_col_intervals) + ], + ) + + +class SpaceEyeReader(Dataset[DatasetReturnType]): + """Dataset that lazily reads chips from sentinel 1 and 2 rasters. + The dataset computes the necessary chips to cover the whole RoI according to + chip size and overlap, and generates input data, as well as write windows + for each chip. + It also includes preprocessing steps such as input standardization, + discarding very cloud days illuminance normalization + Input data is a daily tensor with padding on non-available days. + """ + + def __init__( + self, + s1_items: Optional[Sentinel1RasterTileSequence], + s2_items: Sentinel2RasterTileSequence, + cloud_masks: Sentinel2CloudMaskTileSequence, + time_range: Tuple[datetime, datetime], + geometry: BaseGeometry, + chip_size: Dims, + overlap: Tuple[float, float, float], + s2_bands: List[int], + min_clear_ratio: float, + normalize_illuminance: bool, + ): + self.s1_items = s1_items + self.s2_items = s2_items + self.cloud_masks = cloud_masks + ref_item = s2_items.assets[0] + self.time_range = time_range + self.geometry = geometry + self.chip_size = chip_size + self.min_clear_ratio = min_clear_ratio + if any((o < 0) or (o >= 1) for o in overlap): + raise ValueError(f"Overlap values must be in range [0, 1), found {overlap}") + self.overlap = overlap + self.step = Dims(*(int(s * (1 - o)) for s, o in zip(chip_size, overlap))) + self.s2_bands = s2_bands + self.normalize_illuminance = normalize_illuminance + self.time_length = (self.time_range[1] - self.time_range[0]).days + 1 + if self.time_length != self.chip_size.time: + raise ValueError( + f"Expected time length = {self.time_length} to be the same as " + f"chip size = {self.chip_size.time}" + ) + self.write_range = s2_items.write_time_range + self.write_indices = ( + (self.write_range[0] - self.time_range[0]).days, + (self.write_range[1] - self.time_range[0]).days + 1, + ) + + with rasterio.open(ref_item.url) as src: + # Assuming all products are from the same tile for now + self.crs = src.crs + self.raster_width: int = src.width + self.raster_height: int = src.height + # Compute envelope in native CRS to avoid nodata + box = gpd.GeoSeries(geometry, crs="epsg:4326").to_crs(self.crs).iloc[0].envelope + window = cast(Window, raster_geometry_mask(src, [box], all_touched=True, crop=True)[2]) + # Adjust window to make sure it is not too small + window = self._adjust_roi_window(window) + # Compute the transform with the adjusted window + self.transform: Affine = window_transform(window, src.transform) + self.width: int = window.width + self.height: int = window.height + self.roi = box + self.offset = Dims(window.col_off, window.row_off, 0) + self.roi_window = window + read_windows = get_read_windows( + self.width, self.height, self.time_length, self.chip_size, self.step, self.offset + ) + write_windows, chip_slices = get_write_windows( + self.width, self.height, self.time_length, self.chip_size, self.step + ) + assert all(i == write_windows[0][1] for _, i in write_windows) + assert all(i == chip_slices[0][0] for i, _, _ in chip_slices) + # Overwrite time indices by what we get from the input sequence + write_windows = [(w, self.write_indices) for w, _ in write_windows] + chip_slices = [(self.write_indices, h, w) for _, h, w in chip_slices] + + assert len(read_windows) == len(write_windows) == len(chip_slices) + self.s1_indices = self._get_indices(self.s1_items) if self.s1_items is not None else None + self.s2_indices = self._get_s2_indices(self.s2_items, self.cloud_masks) + + # Filter out windows without any cloud-free data + valid_idx = [idx for idx in self.s2_indices if idx != -1] + + if valid_idx: + self.read_windows = cast(List[Tuple[Window, Interval]], read_windows) + self.write_windows = cast(List[Tuple[Window, Interval]], write_windows) + self.chip_slices = cast(List[Tuple[Interval, Interval, Interval]], chip_slices) + else: + self.read_windows, self.write_windows, self.chip_slices = [], [], [] + assert len(self.read_windows) == len(self.write_windows) == len(self.chip_slices) + + self.illuminance = self._get_illumination_array() + + def _adjust_roi_window(self, window: Window) -> Window: + width = self.chip_size.width + height = self.chip_size.height + if window.width >= width and window.height >= height: + return window + width = max(window.width, width) + height = max(window.height, height) + LOGGER.warning( + f"RoI has dimensions {window.width, window.height} and chip size is {self.chip_size}," + f" adjusting to {width, height}" + ) + diff_w = width - window.width + dw = diff_w // 2 + diff_h = height - window.height + dh = diff_h // 2 + + hs, ws = window.toranges() + min_w = max(ws[0] - dw, 0) + max_w = min(ws[1] + diff_w - dw, self.raster_width) + min_h = max(hs[0] - dh, 0) + max_h = min(hs[1] + diff_h - dh, self.raster_height) + + new_win = Window.from_slices((min_h, max_h), (min_w, max_w)) + LOGGER.info(f"Adjusting from {window} to {new_win}") + return new_win + + def _get_indices(self, sequence: TileSequenceData) -> List[int]: + """ + Get timestep indices for each asset in the sequence. + Assuming daily predictions here. Not supporting multiple day intervals. + For a generic timestep we would need to treat possible collisions, i.e., + multiple products on the same timestep index. This is not currently treated here. + """ + asset_list = sequence.get_ordered_assets() + start = sequence.asset_time_range[asset_list[0].id][0] + return [(sequence.asset_time_range[a.id][0] - start).days for a in asset_list] + + def _get_clear_ratio(self, cloud_mask_asset: AssetVibe) -> int: + mask = self._read_cloud_mask( + cloud_mask_asset, + np.zeros(1, dtype=bool), + self.roi_window, + ) + return (mask == 1).mean() + + def _get_s2_indices( + self, + s2_sequence: Sentinel2RasterTileSequence, + cloud_mask_sequence: Sentinel2CloudMaskTileSequence, + ) -> List[int]: + """ + Get indices and remove items that have too much cloud cover. To do so, + we consider that each asset in the same (ordered) position in s2_sequence + and cloud_mask_sequence is associated. + """ + indices = self._get_indices(s2_sequence) + return [ + index if self._get_clear_ratio(cloudmask_item) > self.min_clear_ratio else -1 + for index, cloudmask_item in zip(indices, cloud_mask_sequence.get_ordered_assets()) + ] + + def _get_illumination_array(self) -> NDArray[np.float32]: + """ + Compute the illumance array for each available product in the RoI + The illuminance for days where there is no data (or not enough cloudless + data) is obtained through interpolation + """ + if not self.normalize_illuminance: + return np.ones((len(self.s2_bands), self.time_length, 1, 1), dtype=np.float32) + illuminance = np.zeros((len(self.s2_bands), self.time_length, 1, 1), dtype=np.float32) + mask_ar = np.zeros((1, self.time_length, 1, 1), dtype=np.float32) + for s2_asset, cloud_mask_asset, index in zip( + self.s2_items.get_ordered_assets(), + self.cloud_masks.get_ordered_assets(), + self.s2_indices, + ): + if 0 <= index < self.time_length: + x, m = self._read_s2(s2_asset, self.roi_window, cloud_mask_asset) + m = m == 1 + clear_ratio = m.mean() + if clear_ratio < self.min_clear_ratio: + LOGGER.warning( + "Discarding sentinel data for illumination computation with date " + f"{self.s2_items.asset_time_range[s2_asset.id][0]} (index {index}) because " + f"clear_ratio {clear_ratio:.1%} < threshold {self.min_clear_ratio:.1%}" + ) + continue + illum_ar = masked_average_illuminance(x, m.astype(np.float32)) + illuminance[:, index] = illum_ar + mask_ar[:, index] = 1 + if mask_ar.sum() == 0: + LOGGER.warning("No cloudless day available for illuminance calculation.") + return np.ones((len(self.s2_bands), self.time_length, 1, 1), dtype=np.float32) + return interpolate_illuminance(illuminance, mask_ar) + + @staticmethod + def _read_data( + file_ref: str, window: Window, bands: Optional[List[int]] = None + ) -> NDArray[Any]: + """ + Read a window of data from a file + """ + offset_bands = [b + 1 for b in bands] if bands else None + with rasterio.open(file_ref) as src: + return src.read(indexes=offset_bands, window=window) + + def _read_s2( + self, + s2_asset: AssetVibe, + window: Window, + cloud_mask_asset: AssetVibe, + ) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: + """ + Read a window sentinel 2 data and the associated cloud mask + """ + # Read s2 data + s2_data = self._read_data(s2_asset.url, window, self.s2_bands) + nodata = s2_data.sum(axis=0, keepdims=True) == 0 + s2_data = s2_data.astype(np.float32) / QUANTIFICATION_VALUE + cloud_mask = self._read_cloud_mask(cloud_mask_asset, nodata, window) + return s2_data, cloud_mask + + def _read_cloud_mask( + self, cloud_mask_asset: AssetVibe, nodata: NDArray[np.bool8], window: Window + ) -> NDArray[np.float32]: + """ + Read a cloud mask and change the binary mask to the format expected by the model + """ + # Read cloud mask + cloud_mask = self._read_data(cloud_mask_asset.url, window, [0]) + # Use this masking for now for compatibility purposes + # TODO: Change the model to receive a binary mask for Sentinel2 as well + cloud_mask[cloud_mask == 1] = 2 + cloud_mask[cloud_mask == 0] = 1 + # Add nodata as cloud + cloud_mask[nodata] = 2 + return cloud_mask.astype(np.float32) + + def _read_s1( + self, s1_asset: AssetVibe, window: Window, _ + ) -> Tuple[NDArray[np.float32], NDArray[np.bool8]]: + filepath = s1_asset.url + s1 = self._read_data(filepath, window, None) + s1_available = np.sum(np.abs(s1), axis=0) > 0 + s1 = (s1 + 20.0) / 40.0 + s1[:, ~s1_available] = 0.0 # just to make it match the images that are completely missing. + return s1, s1_available + + def _get_data_array( + self, + items: List[AssetVibe], + mask_items: List[Optional[AssetVibe]], + indices: List[int], + read_times: Interval, + read_window: Window, + read_callback: Callable[ + [AssetVibe, Window, Optional[AssetVibe]], Tuple[NDArray[np.float32], NDArray[Any]] + ], + ) -> Tuple[NDArray[np.float32], NDArray[Any]]: + """ + Get data array which will be used as input to the network. + This is done by selecting data inside the time range of the input + and inserting it in the correct time index + """ + x = None + mask = None + # Closed at beginning, open at ending + read_start, read_end = read_times + for item, mask_item, index in zip(items, mask_items, indices): + if read_start <= index < read_end: + chip_data, chip_mask = read_callback(item, read_window, mask_item) + if x is None: + x = np.zeros( + ( + chip_data.shape[0], + self.chip_size.time, + self.chip_size.height, + self.chip_size.width, + ), + dtype=np.float32, + ) + if mask is None: + mask = np.zeros( + (1, self.chip_size.time, self.chip_size.height, self.chip_size.width), + dtype=chip_mask.dtype, + ) + x[:, index - read_start] = chip_data + mask[:, index - read_start] = chip_mask + if x is None or mask is None: + start_time = (self.time_range[0] + timedelta(days=int(read_start))).isoformat() + end_time = (self.time_range[0] + timedelta(days=int(read_end))).isoformat() + raise RuntimeError( + f"Could not find any cloud-free data from dates {start_time} to {end_time}" + ) + return x, mask + + def __getitem__(self, idx: int) -> DatasetReturnType: + # Tensors are C x T x H x W + read_window, read_times = self.read_windows[idx] + + s2_data, s2_mask = self._get_data_array( + self.s2_items.get_ordered_assets(), + self.cloud_masks.get_ordered_assets(), # type: ignore + self.s2_indices, + read_times, + read_window, + self._read_s2, # type: ignore + ) + # Get data on where to write in the file + write_window, write_times = self.write_windows[idx] + # Which part of the predictions will be written + chip_slices = self.chip_slices[idx] + # Illuminance values for the chip + chip_illuminance = self.illuminance[:, read_times[0] : read_times[1]] + + # Data we feed into the network + chip_data = { + "S2": s2_data / (chip_illuminance + np.float32(EPS)), + "cloud_label": s2_mask, + "illuminance": chip_illuminance, + } + if self.s1_items is not None: + s1_sorted_assets = self.s1_items.get_ordered_assets() + # Read data + s1_data, s1_mask = self._get_data_array( + s1_sorted_assets, + [None for _ in range(len(s1_sorted_assets))], + cast(List[int], self.s1_indices), + read_times, + read_window, + self._read_s1, + ) + chip_data.update({"S1": s1_data, "S1_mask": s1_mask}) + # Information for writing in the files + write_info = { + "write_window": write_window, + "write_times": write_times, + "chip_slices": chip_slices, + } + + return chip_data, write_info + + def __len__(self) -> int: + return len(self.read_windows) diff --git a/src/vibe_lib/vibe_lib/spaceeye/illumination.py b/src/vibe_lib/vibe_lib/spaceeye/illumination.py new file mode 100644 index 00000000..3fb49107 --- /dev/null +++ b/src/vibe_lib/vibe_lib/spaceeye/illumination.py @@ -0,0 +1,111 @@ +""" +Methods for computing, normalizing and interpolation illuminance of +multispectral raster timeseries. +""" + +from typing import Tuple + +import numpy as np +from numpy.typing import NDArray + +EPS = 1e-10 +MIN_CLEAR_RATIO = 0.01 +MIN_OVERLAP = 0.01 +DEFAULT_LAMBDA_T = 0.5 +SPATIAL_AXES = (-2, -1) + + +def extract_illuminance( + x: NDArray[np.float32], mask: NDArray[np.float32] +) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: + numerator = (x * mask).sum(axis=SPATIAL_AXES, keepdims=True) + denominator = mask.sum(axis=SPATIAL_AXES, keepdims=True) + illuminance = numerator / (denominator + EPS) + albedo = x / (illuminance + EPS) + return albedo, illuminance + + +def extract_illuminance_simple( + x: NDArray[np.float32], mask: NDArray[np.float32] +) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: + illuminance = masked_average_illuminance(x, mask) + illuminance_mask = (mask.mean(axis=SPATIAL_AXES, keepdims=True) > MIN_CLEAR_RATIO).astype( + np.float32 + ) + interp_illuminance = interpolate_illuminance(illuminance, illuminance_mask) + x /= interp_illuminance + EPS # Modify inplace to save memory + return x, interp_illuminance + + +def masked_average_illuminance( + x: NDArray[np.float32], mask: NDArray[np.float32] +) -> NDArray[np.float32]: + # x: C x T x H x W + # mask: 1 x T x H x W + # output: C x T x 1 x 1 + numerator = (x * mask).sum(axis=SPATIAL_AXES, keepdims=True) + denominator = mask.sum(axis=SPATIAL_AXES, keepdims=True) + illuminance = numerator / (denominator + EPS) + return illuminance + + +def extract_illuminance_relative( + x: NDArray[np.float32], mask: NDArray[np.float32] +) -> Tuple[NDArray[np.float32], NDArray[np.float32]]: + illuminance_mask = (mask.mean(axis=SPATIAL_AXES, keepdims=True) > MIN_CLEAR_RATIO).astype( + np.float32 + ) + + # Relevant inputs for which we have data + # We'll interpolate the rest + available = np.squeeze(illuminance_mask).astype(bool) + x_s = x[:, available] + mask_s = mask[:, available] + + # find the anchor image + clear_percentage = mask_s.sum(axis=0).mean(axis=SPATIAL_AXES) + t_anchor = np.argmax(clear_percentage) + + # compute the anchor illuminance + anchor_x = x_s[:, t_anchor : t_anchor + 1] + anchor_mask = mask_s[:, t_anchor : t_anchor + 1] + anchor_illuminance = masked_average_illuminance(anchor_x, anchor_mask) + + # Compute relative illuminance + ratio_mask = ((mask_s + anchor_mask) == 2.0).astype(np.float32) + # Fall back to the old method if there is not enough overlap + overlap_mask = ratio_mask.mean(axis=(0, *SPATIAL_AXES)) > MIN_OVERLAP + _, i_old = extract_illuminance(x_s[:, ~overlap_mask], mask_s[:, ~overlap_mask]) + # New method for the rest + relative_illuminance = masked_average_illuminance( + x_s[:, overlap_mask], ratio_mask[:, overlap_mask] + ) / (masked_average_illuminance(anchor_x, ratio_mask[:, overlap_mask]) + EPS) + # Compute final illuminance + i_new = anchor_illuminance * relative_illuminance + + available_idx = np.where(available)[0] + illuminance = np.zeros((*x.shape[:2], 1, 1), dtype=np.float32) + illuminance[:, available_idx[~overlap_mask]] = i_old + illuminance[:, available_idx[overlap_mask]] = i_new + interp_illuminance = interpolate_illuminance(illuminance, illuminance_mask) + x /= interp_illuminance + EPS # Modify inplace to save memory + return x, interp_illuminance + + +def add_illuminance( + albedo: NDArray[np.float32], illuminance: NDArray[np.float32] +) -> NDArray[np.float32]: + return albedo * illuminance + + +def interpolate_illuminance( + illuminance: NDArray[np.float32], mask: NDArray[np.float32], lambda_t: float = DEFAULT_LAMBDA_T +) -> NDArray[np.float32]: + C, T, _, _ = illuminance.shape + t_tensor = np.arange(T, dtype=np.float32) + delta_t_matrix = np.abs(t_tensor[None] - t_tensor[:, None]) + weight = np.exp(-lambda_t * delta_t_matrix) + illuminance_sum = (weight @ illuminance.reshape((C, T, -1))).reshape(illuminance.shape) + mask_sum = (weight @ mask.reshape((1, T, -1))).reshape(mask.shape) + weighted_illuminance = illuminance_sum / (mask_sum + EPS) + return weighted_illuminance * (1 - mask) + illuminance * mask diff --git a/src/vibe_lib/vibe_lib/spaceeye/interpolation.py b/src/vibe_lib/vibe_lib/spaceeye/interpolation.py new file mode 100644 index 00000000..5548c63c --- /dev/null +++ b/src/vibe_lib/vibe_lib/spaceeye/interpolation.py @@ -0,0 +1,100 @@ +from typing import Dict + +import torch +import torch.nn as nn +from einops import rearrange, repeat + +EPS = 1e-6 + + +def generate_delta_matrix(dim: int): + """ + The matrix returned calculates discrete forward differences (discrete derivative). + delta * x returns a matrix with elements x[t+1]-x[t] with the last entry being 0. + + The matrix returned looks in general like this: + delta = [ [-1, 1, 0, ..., 0, 0], + [ 0, -1, 1, ..., 0, 0], + ... + [ 0, 0, 0, ..., -1, 1], + [ 0, 0, 0, ..., 0, 0]] + """ + d = torch.zeros((dim, dim), dtype=torch.float32) + i = torch.arange(dim - 1) + d[i, i] = -1 + d[i, i + 1] = 1 + return d + + +def masked_time_average(x: torch.Tensor, m: torch.Tensor): + n = (x * m).sum(dim=2, keepdim=True) + d = m.sum(dim=2, keepdim=True) + return n / (d + EPS) + + +class DampedInterpolation(nn.Module): + """ + This algorithm implements interpolation through minimizing an object function, namely: + + F(X) = sum_t || (X_t - S2_t) .* M_t ||_F^2 + alpha sum_t ||X_{t+1}-X_t||_F^2 + = || (X - S2) .* M ||_F^2 + alpha || Delta * X ||_F^2 + + The gradient is + F'(X) = 2 * M**2 .* (X-S2) + 2 * alpha * (Delta^T @ Delta) @ X + We use || F'(X) ||_F^2 / (nb*nt*nx*ny) as a stoppping criteria for the algorithm. + Note that M**2=M when M represents a 0/1 cloud-mask. + In the case of cloud-probabilities it's more complex. + + Using algorithm from SpaceEye paper: + X <== (I+alpha*Delta^T*Delta)^{-1} ((M.*S2)-(1-M).*X) + + Note that S2, X and M here are assumed to me (nb*nt) x (nx*ny) matrices, while the illumination + calculation is done on nb x nt x nx x ny tensors. (Of course we just use different views of the + same tensors). + + """ + + def __init__( + self, + num_bands: int, + time_window: int, + damping_factor: float = 0.1, + tol: float = 1e-3, + max_iter: int = 200, + check_interval: int = 5, + ): + super().__init__() + self.num_bands = num_bands + self.time_window = time_window + self.damping_factor = damping_factor + self.tol = tol + self.max_iter = max_iter + self.check_interval = check_interval + assert self.damping_factor > 0 + d = generate_delta_matrix(self.time_window) + self.delta = torch.kron(torch.eye(self.num_bands), d) + self.w: torch.Tensor = torch.linalg.inv( + torch.eye(self.time_window) + damping_factor * (d.T @ d) + ) + + def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor: + s2, m = inputs["S2"], inputs["cloud_label"] == 1 + x = s2.clone() + m = m.to(x) + m_: torch.Tensor = 1 - m + pixel_avg = masked_time_average(x, m) + x = x * m + pixel_avg * m_ + b, c, _, h, _ = s2.shape + s2 = rearrange(s2, "b c t h w -> t (b c h w)").contiguous() + x = rearrange(x, "b c t h w -> t (b c h w)").contiguous() + m = repeat(m, "b 1 t h w -> t (b c h w)", c=c).contiguous() + m_ = repeat(m_, "b 1 t h w -> t (b c h w)", c=c).contiguous() + f = self.w @ (m * s2) + for i in range(self.max_iter): + x1 = f + self.w @ (m_ * x) + if not (i % self.check_interval) and ( + (x1 - x).abs().mean() / (x1.abs().mean() + EPS) < self.tol + ): + return rearrange(x1, "t (b c h w) -> b c t h w", b=b, c=c, h=h) + x = x1 + return rearrange(x, "t (b c h w) -> b c t h w", b=b, c=c, h=h) diff --git a/src/vibe_lib/vibe_lib/spaceeye/utils.py b/src/vibe_lib/vibe_lib/spaceeye/utils.py new file mode 100644 index 00000000..27552bb2 --- /dev/null +++ b/src/vibe_lib/vibe_lib/spaceeye/utils.py @@ -0,0 +1,39 @@ +from typing import Dict, List, Sequence, TypeVar + +from vibe_core.data import S2ProcessingLevel, Sentinel2Product + +T = TypeVar("T", bound=Sentinel2Product) + +QUANTIFICATION_VALUE = 10000 +SPACEEYE_TO_SPYNDEX_BAND_NAMES: Dict[str, str] = { + "B02": "B", + "B03": "G", + "B04": "R", + "B05": "RE1", + "B06": "RE2", + "B07": "RE3", + "B08": "N", + "B8A": "N2", + "B11": "S1", + "B12": "S2", +} + + +def find_s2_product(product_name: str, products: List[T]) -> T: + for product in products: + if product.product_name == product_name: + return product + raise ValueError(f"Could not find product with product name {product_name}.") + + +def verify_processing_level( + items: Sequence[Sentinel2Product], processing_level: S2ProcessingLevel, prefix: str = "" +): + invalid = set( + [item.processing_level for item in items if item.processing_level != processing_level] + ) + if invalid: + raise ValueError( + f"{prefix} {'e' if prefix else 'E'}xpected items with processing level " + f"{processing_level}. Found items with processing level: {','.join(invalid)}" + ) diff --git a/src/vibe_lib/vibe_lib/stats.py b/src/vibe_lib/vibe_lib/stats.py new file mode 100644 index 00000000..45866bcd --- /dev/null +++ b/src/vibe_lib/vibe_lib/stats.py @@ -0,0 +1,56 @@ +from datetime import datetime +from typing import Any, Dict, List, NamedTuple, Sequence, cast + +import geopandas as gpd +import pandas as pd +import rasterio +from pandas.core.frame import DataFrame +from rasterstats import zonal_stats +from shapely.geometry import shape +from shapely.geometry.base import BaseGeometry + + +class Stats(NamedTuple): + date: datetime + min: float + max: float + mean: float + + +def calculate_zonal_stats( + raster_paths: Sequence[str], raster_dates: Sequence[datetime], geo_dict: Dict[str, Any] +) -> List[Stats]: + """For each raster in a list of rasters, calculates min, max, and mean + values of the pixels overlapping or intersecting a geojson geometry. + This function assumes geometry represents a single non multi geometry. + """ + + # Convert geometry to raster CRS + with rasterio.open(raster_paths[0]) as src: # type: ignore + crs = src.crs # type: ignore + geom: BaseGeometry = ( + gpd.GeoSeries(shape(geo_dict), crs="epsg:4326").to_crs(crs).iloc[0] # type: ignore + ) + + result: List[Stats] = [] + + for raster_path, raster_date in zip(raster_paths, raster_dates): + stats = zonal_stats(geom, raster_path) + + raster_stats = Stats( + raster_date, + cast(float, stats[0]["min"]), + cast(float, stats[0]["max"]), + cast(float, stats[0]["mean"]), + ) + + result.append(raster_stats) + + return result + + +def convert_zonal_stats_to_timeseries(stats: Sequence[Stats]) -> DataFrame: + df = pd.DataFrame(stats) + df.set_index("date", drop=True, inplace=True) # type: ignore + + return df diff --git a/src/vibe_lib/vibe_lib/timeseries.py b/src/vibe_lib/vibe_lib/timeseries.py new file mode 100644 index 00000000..f9e4815e --- /dev/null +++ b/src/vibe_lib/vibe_lib/timeseries.py @@ -0,0 +1,17 @@ +import mimetypes +import os + +import pandas as pd + +from vibe_core.data import AssetVibe, gen_guid + + +def save_timeseries_to_asset(timeseries: pd.DataFrame, output_dir: str) -> AssetVibe: + """ + Save dataframe to CSV file and return corresponding asset + """ + out_id = gen_guid() + filepath = os.path.join(output_dir, f"{out_id}.csv") + timeseries.to_csv(filepath) + new_asset = AssetVibe(reference=filepath, type=mimetypes.types_map[".csv"], id=out_id) + return new_asset diff --git a/src/vibe_notebook/setup.py b/src/vibe_notebook/setup.py index 7ca8570f..baf85dd2 100644 --- a/src/vibe_notebook/setup.py +++ b/src/vibe_notebook/setup.py @@ -4,7 +4,7 @@ name="vibe_notebook", version="0.0.1", author="Microsoft", - author_email="eywa-devs@microsoft.com", + author_email="terravibes@microsoft.com", packages=find_packages(), description="Shared notebook library for FarmVibes.AI notebooks.", install_requires=[ diff --git a/src/vibe_server/setup.py b/src/vibe_server/setup.py new file mode 100644 index 00000000..7307eb52 --- /dev/null +++ b/src/vibe_server/setup.py @@ -0,0 +1,37 @@ +from setuptools import find_packages, setup + +setup( + name="vibe_server", + version="0.0.1", + author="Microsoft", + author_email="terravibes@microsoft.com", + description="TerraVibes Geospatial Platform Package - server package.", + license="Proprietary", + keywords="terravibes geospatial", + packages=find_packages(exclude=["tests*"]), + python_requires="~=3.8", + install_requires=[ + "vibe-core", + "vibe-common", + "httpx~=0.24.1", + "fastapi_utils~=0.2.1", + "grpcio~=1.53.0", + "dapr==1.13.0", + "dapr-ext-grpc~=1.12.0", + "cloudevents~=1.2", + "fastapi~=0.109.1", + "fastapi-versioning~=0.10.0", + "requests~=2.32.0", + "starlette~=0.36.2", + "uvicorn~=0.13.4", + "urllib3~=1.26.8", + "psutil~=5.9.0", + ], + entry_points={ + "console_scripts": [ + "vibe-orchestrator = vibe_server.orchestrator:main_sync", + "vibe-server = vibe_server.server:main_sync", + "vibe-sniffer = vibe_server.sniffer:main", + ] + }, +) diff --git a/src/vibe_server/tests/conftest.py b/src/vibe_server/tests/conftest.py new file mode 100644 index 00000000..48ec6ac8 --- /dev/null +++ b/src/vibe_server/tests/conftest.py @@ -0,0 +1,74 @@ +from dataclasses import asdict +from typing import Any, Dict + +import pytest + +from vibe_common.messaging import WorkMessage +from vibe_core.datamodel import RunConfig, RunDetails, RunStatus, SpatioTemporalJson +from vibe_dev.testing import anyio_backend +from vibe_dev.testing.fake_workflows_fixtures import fake_ops_dir, fake_workflows_dir +from vibe_dev.testing.workflow_fixtures import ( + SimpleStrData, + SimpleStrDataType, + workflow_execution_message, + workflow_run_config, +) + + +@pytest.fixture +def run_config(workflow_execution_message: WorkMessage) -> Dict[str, Any]: + run_id = workflow_execution_message.header.run_id + spatio_temporal_json = { + "end_date": "2019-02-03T00:00:00", + "geojson": { + "features": [ + { + "geometry": { + "coordinates": [ + [ + [-88.068487, 37.058836], + [-88.036059, 37.048687], + [-88.012895, 37.068984], + [-88.026622, 37.085711], + [-88.062482, 37.081461], + [-88.068487, 37.058836], + ] + ], + "type": "Polygon", + }, + "type": "Feature", + } + ], + "type": "FeatureCollection", + }, + "start_date": "2019-02-02T00:00:00", + } + + run_config = asdict( + RunConfig( + name="fake", + workflow="fake", + parameters=None, + user_input=SpatioTemporalJson(**spatio_temporal_json), + id=run_id, + details=RunDetails( + status=RunStatus.running, start_time=None, end_time=None, reason=None + ), + task_details={}, + spatio_temporal_json=None, + output="", + ) + ) + return run_config + + +__all__ = [ + "SimpleStrData", + "SimpleStrDataType", + "workflow_execution_message", + "fake_ops_dir", + "fake_workflows_dir", + "workflow_run_config", + "anyio_backend", + "run_config", +] diff --git a/src/vibe_server/tests/test_graph.py b/src/vibe_server/tests/test_graph.py new file mode 100644 index 00000000..8a4d08f9 --- /dev/null +++ b/src/vibe_server/tests/test_graph.py @@ -0,0 +1,152 @@ +import random +from typing import Dict, List + +import pytest + +from vibe_server.workflow.graph import Graph + + +class SomeGraph(Graph[int, int]): + def __init__(self, data: Dict[int, List[int]]): + super().__init__() + + for k in data: + self.add_node(k) + for k, v in data.items(): + for d in v: + self.add_edge(k, d, 1) + + +@pytest.fixture +def loopy_graph() -> SomeGraph: + return SomeGraph( + { + 0: [1, 2, 3], + 1: [2, 3, 4], + 2: [3], + 3: [4], + 4: [3], + 5: [2], + } + ) + + +@pytest.fixture +def a_normal_graph() -> SomeGraph: + # topological sort: [0], [1, 4], [2, 5, 6, 7], [3] + # graph: + # /-> 7 + # /-> 6 + # /-> 4 -> 5 + # 0 -> 1 -> 2 -> 3 + # \-------/ / + # \---------/ + # + return SomeGraph( + { + 0: [1, 2, 3, 4], + 1: [2, 3], + 2: [3], + 3: [], + 4: [5, 6, 7], + 5: [], + 6: [], + 7: [], + } + ) + + +@pytest.fixture +def a_simple_graph() -> SomeGraph: + # /-> 🔙 \ + # 🌎 -> 🎶 --> 🔚 \-> ✅ + # + return SomeGraph( + { + int.from_bytes("🌎".encode("utf-8"), "little"): [ + int.from_bytes("🎶".encode("utf-8"), "little") + ], + int.from_bytes("🎶".encode("utf-8"), "little"): [ + int.from_bytes("🔙".encode("utf-8"), "little"), + int.from_bytes("🔚".encode("utf-8"), "little"), + ], + int.from_bytes("🔙".encode("utf-8"), "little"): [ + int.from_bytes("✅".encode("utf-8"), "little") + ], + int.from_bytes("🔚".encode("utf-8"), "little"): [ + int.from_bytes("✅".encode("utf-8"), "little") + ], + } + ) + + +@pytest.fixture +def empty_graph() -> SomeGraph: + return SomeGraph({}) + + +def test_topological_sort_on_empty_graph(empty_graph: SomeGraph): + assert list(empty_graph.topological_sort()) == [] + + +def test_cycle_detection_on_empty_graph(empty_graph: SomeGraph): + assert not empty_graph.has_cycle() + + +def test_loopy_graph_has_cycle(loopy_graph: SomeGraph): + assert loopy_graph.has_cycle() + + +def test_topological_sort_on_a_loopy_graph(loopy_graph: SomeGraph): + with pytest.raises(ValueError): + loopy_graph.topological_sort() + + +def test_topological_sort_on_a_normal_graph(a_normal_graph: SomeGraph): + sort = list(a_normal_graph.topological_sort()) + assert sort[0] == [0] + assert sort[1] == [1, 4] + assert sort[2] == [2, 5, 6, 7] + assert sort[3] == [3] + + +@pytest.mark.filterwarnings("ignore::UserWarning") +def test_topological_sort_on_a_simple_graph(a_simple_graph: SomeGraph): + sort = list(a_simple_graph.topological_sort()) + assert sort[0] == [int.from_bytes("🌎".encode("utf-8"), "little")] + assert sort[1] == [int.from_bytes("🎶".encode("utf-8"), "little")] + assert set(sort[2]) == set( + [ + int.from_bytes("🔙".encode("utf-8"), "little"), + int.from_bytes("🔚".encode("utf-8"), "little"), + ] + ) + assert sort[3] == [int.from_bytes("✅".encode("utf-8"), "little")] + + +def test_topological_sort_on_random_graphs(): + with pytest.warns(UserWarning): + for _ in range(42): + a = random.randint(-999999, 999999) + b = random.randint(-999999, 999999) + c = random.randint(-999999, 999999) + graph = SomeGraph({a: [b, c], b: [c]}) + sort = list(graph.topological_sort()) + assert len(sort) == 3 + assert sort[0] == [a] + assert sort[1] == [b] + assert sort[2] == [c] + + +def test_relabel_normal_graph(a_normal_graph: SomeGraph): + edge1 = (1, 2, 1) + a_normal_graph.relabel(edge1, 2) + assert 2 in a_normal_graph.neighbors(1) + assert (2, 2) in a_normal_graph.adjacency_list[1] + assert (2, 1) not in a_normal_graph.adjacency_list[1] + + +def test_no_relabel_missing_edge(a_normal_graph: SomeGraph): + edge = (3, 4, 1) + with pytest.raises(KeyError): + a_normal_graph.relabel(edge, 2) diff --git a/src/vibe_server/tests/test_href_handler.py b/src/vibe_server/tests/test_href_handler.py new file mode 100644 index 00000000..a3bee77c --- /dev/null +++ b/src/vibe_server/tests/test_href_handler.py @@ -0,0 +1,99 @@ +import datetime +import os +from pathlib import Path +from typing import Any, Dict, List, cast + +import pytest +from pystac import Asset, Item + +from vibe_core.data.utils import serialize_stac +from vibe_core.datamodel import RunConfigInput, RunConfigUser +from vibe_server.href_handler import LocalHrefHandler +from vibe_server.server import TerravibesProvider + + +@pytest.fixture +def fake_op_name() -> str: + return "fake.fake" + + +@pytest.fixture +def fake_asset_name() -> str: + return "fake_asset" + + +@pytest.fixture +def one_item_one_asset(fake_asset_name: str) -> Item: + asset = Asset(href="../../../assets/asdf/test.txt") + item = Item( + id="fake_id", + geometry={}, + bbox=[], + datetime=datetime.datetime.utcnow(), + properties={}, + ) + item.add_asset(key=fake_asset_name, asset=asset) + return item + + +def test_local_href_handler_parse_item(one_item_one_asset: Item, tmp_path: Path): + local_href_handler = LocalHrefHandler(tmp_path) + new_item = local_href_handler._parse_item(one_item_one_asset) + for _, v in new_item.get_assets().items(): + p = Path(v.href) + assert p.absolute + + +def test_local_href_handler_update_asset(tmp_path: Path): + local_href_handler = LocalHrefHandler(tmp_path) + + asset = Asset(href="../../../assets/asdf/test.txt") + local_href_handler._update_asset(asset) + p = tmp_path / "asdf" / "test.txt" + assert asset.href == str(p) + assert os.path.isabs(asset.href) + + asset = Asset(href=".././/../assets/asdf/test.txt") + local_href_handler._update_asset(asset) + p = tmp_path / "asdf" / "test.txt" + assert asset.href == str(p) + + asset = Asset(href="../../assets/asdf/blah/../test.txt") + local_href_handler._update_asset(asset) + p = tmp_path / "asdf" / "test.txt" + assert asset.href == str(p) + assert ".." not in asset.href + + asset = Asset(href="/test.txt") + local_href_handler._update_asset(asset) + p = tmp_path / "test.txt" + assert asset.href == str(p) + + +@pytest.fixture +def run_config_with_output( + one_item_one_asset: Item, fake_op_name: str, workflow_run_config: Dict[str, Any] +) -> RunConfigUser: + provider = TerravibesProvider(LocalHrefHandler("/tmp")) + _, run_config = provider.create_new_run(RunConfigInput(**workflow_run_config), []) + run_config.set_output({fake_op_name: [serialize_stac(one_item_one_asset)]}) + return RunConfigUser.from_runconfig(run_config) + + +def test_href_handler_handle( + run_config_with_output: RunConfigUser, fake_op_name: str, fake_asset_name: str, tmp_path: Path +): + local_href_handler = LocalHrefHandler(tmp_path) + + original_item = cast(List[Dict[str, Any]], run_config_with_output.output[fake_op_name])[0] + original_href = original_item["assets"][fake_asset_name]["href"] + original_path = str( + local_href_handler.assets_dir / Path(original_href).parent.name / Path(original_href).name + ) + + local_href_handler.handle(run_config_with_output) + + parsed_item = cast(List[Dict[str, Any]], run_config_with_output.output[fake_op_name])[0] + parsed_path = parsed_item["assets"][fake_asset_name]["href"] + + assert parsed_path == original_path diff --git a/src/vibe_server/tests/test_op_parallelism.py b/src/vibe_server/tests/test_op_parallelism.py new file mode 100644 index 00000000..f7e4cf6f --- /dev/null +++ b/src/vibe_server/tests/test_op_parallelism.py @@ -0,0 +1,94 @@ +from typing import Any, Awaitable, Callable, Dict, List, NamedTuple, cast +from unittest.mock import MagicMock, patch +from uuid import UUID, uuid4 + +import pytest + +from vibe_core.data.core_types import DataVibe, OpIOType +from vibe_server.workflow.runner.runner import OpParallelism +from vibe_server.workflow.workflow import EdgeLabel, EdgeType, GraphNodeType, InputFanOut + + +class OpSpecMock: + def __init__(self, inputs: OpIOType): + self.inputs_spec = { + name: List[DataVibe] if isinstance(data, list) else DataVibe + for name, data in inputs.items() + } + + +class NodeMock(NamedTuple): + name: str + spec: OpSpecMock + + +@pytest.fixture +def merge_input() -> List[OpIOType]: + return [{"something": [{"int": i}]} for i in range(10)] + + +@pytest.fixture +def exploder_input() -> OpIOType: + return {"to": [{"something": i} for i in range(10)], "other": {"another": "thing"}} + + +def test_parallelism_merges(merge_input: List[Dict[str, Any]]): + the_edge = EdgeLabel("from", "to", EdgeType.scatter) + none = cast(Callable[[GraphNodeType, OpIOType, UUID, int], Awaitable[OpIOType]], None) + parallelism = OpParallelism([the_edge], cast(GraphNodeType, None), none) + out = parallelism.fan_in(merge_input) + assert len(out) == 1 + assert "something" in out + assert len(out["something"]) == 10 + + +def test_parallelism_explodes_inputs(exploder_input: OpIOType): + op_mock = cast(GraphNodeType, NodeMock("mock", OpSpecMock(exploder_input))) + the_edge = EdgeLabel("from", "to", EdgeType.scatter) + none = cast(Callable[[GraphNodeType, OpIOType, UUID, int], Awaitable[OpIOType]], None) + parallelism = OpParallelism([the_edge], op_mock, none) + exploded_inputs = list(parallelism.fan_out(exploder_input)) + assert len(exploded_inputs) == 10 + + +@pytest.mark.anyio +async def test_parallelism_runs(exploder_input: OpIOType): + async def run_task(_: GraphNodeType, input: OpIOType, __: UUID, ___: int) -> OpIOType: + return {"out_" + k: v for k, v in input.items()} + + op_mock = cast(GraphNodeType, NodeMock("mock", OpSpecMock(exploder_input))) + the_edge = EdgeLabel("from", "to", EdgeType.scatter) + parallelism = OpParallelism([the_edge], op_mock, run_task) + out = parallelism.fan_in(await parallelism.run(exploder_input, uuid4())) + + assert "out_to" in out + assert "out_other" in out + assert len(out["out_to"]) == len(out["out_other"]) == 10 + + +@pytest.mark.anyio +async def test_parallelism_fails(exploder_input: OpIOType): + async def run_task(_: GraphNodeType, input: OpIOType, __: UUID, ___: int) -> OpIOType: + raise RuntimeError(":-(") + + op_mock = cast(GraphNodeType, NodeMock("mock", OpSpecMock(exploder_input))) + the_edge = EdgeLabel("from", "to", EdgeType.scatter) + parallelism = OpParallelism([the_edge], op_mock, run_task) + + with pytest.raises(RuntimeError): + await parallelism.run(exploder_input, uuid4()) + + +@patch.object(OpParallelism, "fan_out") +@patch("pydantic.fields.ModelField.validate", side_effect=lambda *args, **_: (args[1], None)) +@pytest.mark.anyio +async def test_parallelism_input_fan_out(_: MagicMock, fan_out: MagicMock): + run_task = MagicMock() + node = InputFanOut("test", DataVibe) + parallelism = OpParallelism([], GraphNodeType("test", node), run_task) + with patch.object(OpParallelism, "fan_in") as fan_in: + outputs = await parallelism.run(cast(OpIOType, {node.input_port: "👍"}), uuid4()) + fan_in.assert_not_called() + fan_out.assert_not_called() + run_task.assert_not_called() + assert parallelism.fan_in(outputs) == {node.output_port: "👍"} diff --git a/src/vibe_server/tests/test_orchestrator.py b/src/vibe_server/tests/test_orchestrator.py new file mode 100644 index 00000000..936bd61c --- /dev/null +++ b/src/vibe_server/tests/test_orchestrator.py @@ -0,0 +1,430 @@ +from asyncio.queues import Queue +from dataclasses import asdict +from datetime import datetime +from typing import Any, Dict, Optional, Tuple, cast +from unittest.mock import AsyncMock, Mock, patch +from uuid import UUID +from uuid import uuid4 as uuid + +import pytest +from cloudevents.sdk.event import v1 + +from vibe_common.constants import STATUS_PUBSUB_TOPIC, WORKFLOW_REQUEST_PUBSUB_TOPIC +from vibe_common.dropdapr import TopicEventResponseStatus +from vibe_common.messaging import ( + ErrorContent, + ExecuteReplyContent, + MessageHeader, + MessageType, + OpStatusType, + WorkflowExecutionContent, + WorkflowExecutionMessage, + WorkMessage, + WorkMessageBuilder, + build_work_message, + encode, + gen_traceparent, +) +from vibe_common.schemas import CacheInfo +from vibe_common.statestore import StateStore +from vibe_core.data.core_types import OpIOType +from vibe_core.data.json_converter import dump_to_json +from vibe_core.data.utils import StacConverter, is_container_type, serialize_stac +from vibe_core.datamodel import RunConfig, RunDetails, RunStatus, SpatioTemporalJson +from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path # noqa +from vibe_dev.testing.workflow_fixtures import THE_DATAVIBE +from vibe_server.orchestrator import Orchestrator, WorkflowRunManager +from vibe_server.workflow.runner import WorkflowChange +from vibe_server.workflow.runner.remote_runner import RemoteWorkflowRunner +from vibe_server.workflow.spec_parser import WorkflowParser +from vibe_server.workflow.workflow import GraphNodeType, Workflow + + +def make_test_message( + workflow_name: str, + params: Optional[Dict[str, Any]], + fake_ops_dir: str, # noqa + fake_workflows_dir: str, # noqa +) -> WorkflowExecutionMessage: + header = MessageHeader( + type=MessageType.workflow_execution_request, + run_id=uuid(), + ) + workflow_dict = asdict( + WorkflowParser.parse( + get_fake_workflow_path(workflow_name), + ops_dir=fake_ops_dir, + workflows_dir=fake_workflows_dir, + parameters_override=params, + ) + ) + content = WorkflowExecutionContent( + input={}, + workflow=workflow_dict, + parameters=params, + ) + return cast(WorkflowExecutionMessage, build_work_message(header, content)) + + +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_orchestrator_add_output(store: Mock, retrieve: Mock, run_config: Dict[str, Any]): + retrieve.side_effect = lambda _: run_config + output = cast(OpIOType, {"some-op": {"data": "fake"}}) + statestore = StateStore() + await WorkflowRunManager.add_output_to_run(run_config["id"], output, statestore) + run_config["output"] = encode(dump_to_json(output)) + store.assert_called_with(run_config["id"], RunConfig(**run_config)) + + +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_orchestrator_fail_workflow(store: Mock, retrieve: Mock, run_config: Dict[str, Any]): + retrieve.side_effect = lambda _: run_config + orchestrator = Orchestrator() + reason = "fake reason" + await orchestrator.fail_workflow(run_config["id"], reason) + run_config["details"]["status"] = RunStatus.failed + run_config["details"]["reason"] = reason + assert store.mock_calls[0][1][1].details.status == RunStatus.failed + assert store.mock_calls[0][1][1].details.reason == reason + + +def to_cloud_event(msg: WorkMessage) -> v1.Event: + ce = v1.Event() + msgdict = msg.to_cloud_event("test") + for key in msgdict: + if hasattr(ce, key): + try: + setattr(ce, key, msgdict[key]) + except Exception: + pass + ce.data = ce.data.encode("ascii") # type: ignore + return ce + + +def test_run_config_fails_on_invalid_inputs(): + rc = RunConfig( + name="name", + workflow="fake", + parameters=None, + user_input=SpatioTemporalJson( + datetime.now(), + datetime.now(), + {}, + ), + id=uuid(), + details=RunDetails(status=RunStatus.pending, start_time=None, end_time=None, reason=None), + task_details={}, + spatio_temporal_json=None, + ) + for value in float("nan"), float("inf"), float("-inf"): + with pytest.raises(ValueError): + rc.set_output({"a": value}) # type: ignore + + +@pytest.mark.anyio +async def test_orchestrator_update_response(): + reply_content = ExecuteReplyContent( + cache_info=CacheInfo("test_op", "1.0", {}, {}), status=OpStatusType.done, output={} + ) + header = MessageHeader(type=MessageType.execute_reply, run_id=uuid()) + reply = build_work_message(header=header, content=reply_content) + orchestrator = Orchestrator() + orchestrator.inqueues[str(header.run_id)] = Queue() + topic_reply = await orchestrator.handle_update_workflow_status( + STATUS_PUBSUB_TOPIC, to_cloud_event(reply) + ) + assert topic_reply.status == TopicEventResponseStatus.success["status"] + + +@pytest.mark.anyio +async def test_orchestrator_update_error_response(): + reply_content = ErrorContent(status=OpStatusType.failed, ename="", evalue="", traceback=[]) + header = MessageHeader(type=MessageType.error, run_id=uuid()) + reply = build_work_message(header=header, content=reply_content) + orchestrator = Orchestrator() + orchestrator.inqueues[str(header.run_id)] = Queue() + topic_reply = await orchestrator.handle_update_workflow_status( + STATUS_PUBSUB_TOPIC, to_cloud_event(reply) + ) + assert topic_reply.status == TopicEventResponseStatus.success["status"] + + +@pytest.mark.anyio +async def test_orchestrator_update_response_fails_as_message_not_in_queue(): + orchestrator = Orchestrator() + ack_reply = WorkMessageBuilder.build_ack_reply(gen_traceparent(uuid())) + topic_reply = await orchestrator.handle_update_workflow_status( + STATUS_PUBSUB_TOPIC, to_cloud_event(ack_reply) + ) + assert topic_reply.status == TopicEventResponseStatus.drop["status"] + + +@pytest.mark.anyio +async def test_orchestrator_update_response_fails_with_invalid_message( + workflow_execution_message: WorkMessage, +): + orchestrator = Orchestrator() + topic_reply = await orchestrator.handle_update_workflow_status( + STATUS_PUBSUB_TOPIC, to_cloud_event(workflow_execution_message) + ) + assert topic_reply.status == TopicEventResponseStatus.drop["status"] + + +@pytest.mark.anyio +async def test_orchestrator_workflow_submission_rejects(): + request = WorkMessageBuilder.build_error(gen_traceparent(uuid()), "", "", []) + orchestrator = Orchestrator() + topic_reply = await orchestrator.handle_manage_workflow_event( + WORKFLOW_REQUEST_PUBSUB_TOPIC, to_cloud_event(request) + ) + assert topic_reply.status == TopicEventResponseStatus.drop["status"] + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnhandledThreadExceptionWarning") +@pytest.mark.anyio +async def test_orchestrator_workflow_submission_accepts(): + spec = WorkflowParser._load_workflow(get_fake_workflow_path("item_gather")) + request = WorkMessageBuilder.build_workflow_request(uuid(), spec, {}, {}) + orchestrator = Orchestrator() + reply = await orchestrator.handle_manage_workflow_event( + WORKFLOW_REQUEST_PUBSUB_TOPIC, to_cloud_event(request) + ) + assert reply.status == TopicEventResponseStatus.success["status"] + + +@patch("vibe_common.statestore.StateStore.retrieve_bulk") +@patch("vibe_common.statestore.StateStore.retrieve") +@pytest.mark.anyio +async def test_orchestrator_startup_sees_no_runs(retrieve: Mock, retrieve_bulk: Mock): + retrieve.return_value = [] + retrieve_bulk.return_value = [] + orchestrator = Orchestrator() + assert await orchestrator.get_unfinished_workflows() == [] + retrieve_bulk.assert_called_once_with([]) + + +@patch("vibe_common.statestore.StateStore.retrieve") +@pytest.mark.anyio +async def test_orchestrator_startup_dapr_not_stared(retrieve: Mock): + retrieve.side_effect = Exception("Random error when retrieving runs") + with pytest.raises(RuntimeError): + orchestrator = Orchestrator() + await orchestrator._resume_workflows() + + +@patch("vibe_common.statestore.StateStore.retrieve_bulk") +@patch("vibe_common.statestore.StateStore.retrieve") +@pytest.mark.anyio +async def test_orchestrator_startup_sees_no_unfinished_runs( + retrieve: Mock, retrieve_bulk: Mock, run_config: Dict[str, Any] +): + retrieve.return_value = [run_config["id"]] + run_config["details"]["status"] = RunStatus.done + retrieve_bulk.return_value = [run_config] + orchestrator = Orchestrator() + assert await orchestrator.get_unfinished_workflows() == [] + retrieve_bulk.assert_called_once_with([run_config["id"]]) + + +@patch("vibe_common.statestore.StateStore.retrieve_bulk") +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@patch("vibe_server.workflow.runner.task_io_handler.WorkflowIOHandler.map_output") +@patch("vibe_server.workflow.runner.task_io_handler.TaskIOHandler.retrieve_sinks") +@patch("vibe_server.workflow.runner.remote_runner.RemoteWorkflowRunner._run_ops") +@pytest.mark.anyio +async def test_orchestrator_startup_sees_unfinished_runs( + _run_ops: AsyncMock, + retrieve_sinks: Mock, + map_output: Mock, + store: Mock, + retrieve: Mock, + retrieve_bulk: Mock, + run_config: Dict[str, Any], + fake_ops_dir: str, + fake_workflows_dir: str, +): + first = True + + def retrieve_fun(_: str): + nonlocal first + if first: + first = False + return run_config["id"] + return run_config + + _run_ops.return_value = None + retrieve_sinks.return_value = None + map_output.return_value = None + retrieve.side_effect = retrieve_fun + retrieve_bulk.return_value = [run_config, run_config, run_config] + build_return_value = Workflow.build( + get_fake_workflow_path("single_and_parallel"), fake_ops_dir, fake_workflows_dir + ) + + with patch("vibe_server.workflow.workflow.Workflow.build", return_value=build_return_value): + orchestrator = Orchestrator() + await orchestrator._resume_workflows() + retrieve_bulk.assert_called_once_with(run_config["id"]) + _run_ops.assert_called() + + +@patch("vibe_server.orchestrator.WorkflowStateUpdate.__call__") +@pytest.mark.anyio +async def test_orchestrator_cancel_run( + state_update: Mock, + fake_ops_dir: str, # noqa + fake_workflows_dir: str, # noqa +): + workflow = Workflow.build( + get_fake_workflow_path("str_input"), + fake_ops_dir, + fake_workflows_dir, + ) + + message = WorkMessageBuilder.build_workflow_request( + uuid(), + asdict(workflow.workflow_spec), + None, + {k: [{}] for k in workflow.inputs_spec}, + ) + + cancellation = WorkMessageBuilder.build_workflow_cancellation(message.run_id) + orchestrator = Orchestrator(ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) + await orchestrator.manage_workflow(message) + assert len(orchestrator._workflow_management_tasks.values()) == 1 + wf = list(orchestrator._workflow_management_tasks.values())[0] + + await orchestrator.manage_workflow(cancellation) + await wf.task + assert wf.is_cancelled + assert wf.runner + assert wf.runner.is_cancelled + state_update.assert_any_call(WorkflowChange.WORKFLOW_CANCELLED) + + +@pytest.mark.parametrize("params", [None, {"new": "from_message"}]) +@pytest.mark.anyio +async def test_build_workflow_with_params( + fake_ops_dir: str, # noqa + fake_workflows_dir: str, # noqa + params: Optional[Dict[str, Any]], +): + msg = make_test_message("resolve_params", params, fake_ops_dir, fake_workflows_dir) + manager = WorkflowRunManager( + None, # type: ignore + msg, + 1, # type: ignore + "", + "", + "", + fake_ops_dir, + fake_workflows_dir, + ) + workflow, _ = manager.build_workflow({"input": None}) # type: ignore + expected = workflow.workflow_spec.default_parameters["new"] if params is None else params["new"] + assert workflow.workflow_spec.parameters["new"] == expected + + +@pytest.mark.parametrize( + "wf_params", [("resolve_params", {"made_up": 1}), ("list_list", {"any": "!"})] +) +@patch("vibe_server.orchestrator.update_workflow") +@pytest.mark.anyio +async def test_build_workflow_invalid_params_update_status( + update: Mock, + wf_params: Tuple[str, Dict[str, Any]], + fake_ops_dir: str, # noqa + fake_workflows_dir: str, # noqa +): + msg = make_test_message( + wf_params[0], {}, fake_ops_dir=fake_ops_dir, fake_workflows_dir=fake_workflows_dir + ) + msg.content.parameters = wf_params[1] + manager = WorkflowRunManager( + {}, + msg, + 1, # type: ignore + "", + "", + "", + fake_ops_dir, + fake_workflows_dir, # type: ignore + ) + with pytest.raises(ValueError): + await manager.task + update.assert_called_once() + run_id, _, status, _ = update.call_args[0] + assert run_id == str(msg.header.run_id) + assert status == RunStatus.failed + + +@patch.object(RemoteWorkflowRunner, "_build_and_process_request", autospec=True) +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_run_workflow_that_will_fail( + store: Mock, + retrieve: Mock, + bpr: Mock, + fake_ops_dir: str, # noqa + fake_workflows_dir: str, # noqa + run_config: Dict[str, Any], +): + converter = StacConverter() + + workflow = Workflow.build( + get_fake_workflow_path("custom_indices_structure"), + fake_ops_dir, + fake_workflows_dir, + ) + + message = WorkMessageBuilder.build_workflow_request( + uuid(), + asdict(workflow.workflow_spec), + None, + {k: serialize_stac(converter.to_stac_item([THE_DATAVIBE])) for k in workflow.inputs_spec}, + ) + + def mock_build_and_process_request( + self: Any, op: GraphNodeType, input: OpIOType, run_id: UUID, subtask_idx: int + ) -> OpIOType: + self._handle_ack_message(op.name, subtask_idx) + if op.name.startswith("ndvi"): + raise RuntimeError("Received unsupported message error. Aborting execution.") + return { + k: serialize_stac( + converter.to_stac_item( + # This should work just fine, as `DataVibe` inherits from `BaseVibe`, + # but pyright doesn't like it. I think the issue pyright is having + # is because we use `__init_subclass__` in a dataclass, and it is + # getting confused + [THE_DATAVIBE] if is_container_type(v) else THE_DATAVIBE # type: ignore + ) + ) + for k, v in op.spec.output_spec.items() + } + + def store_side_effect(key: str, obj: Any, _: Optional[str] = None): # type: ignore + nonlocal run_config + run_config = obj + + def retrieve_side_effect(key: str, _: Optional[str] = None): # type: ignore + return run_config + + store.side_effect = store_side_effect + retrieve.side_effect = retrieve_side_effect + bpr.side_effect = mock_build_and_process_request + + orchestrator = Orchestrator(ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) + + with pytest.raises(RuntimeError): + await orchestrator.manage_workflow(message) + wf = list(orchestrator._workflow_management_tasks.values())[0] + await wf.task + + assert run_config["details"]["status"] == RunStatus.failed diff --git a/src/vibe_server/tests/test_parameter_resolver.py b/src/vibe_server/tests/test_parameter_resolver.py new file mode 100644 index 00000000..7889d9b8 --- /dev/null +++ b/src/vibe_server/tests/test_parameter_resolver.py @@ -0,0 +1,125 @@ +import os + +from vibe_common.schemas import OperationParser +from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path +from vibe_server.workflow.parameter import Parameter, ParameterResolver +from vibe_server.workflow.spec_parser import WorkflowParser + + +def test_parameter_defaults_from_child(): + p_root = Parameter("root", "root", None, None, None) + p_child = Parameter("child", "task", "@from(root)", 0, "child description") + p_root.add_child(p_child) + assert p_root.default == p_child.default + assert p_root.description == p_child.description + p_root._default = "set default" + assert p_root.default == p_root._default + assert p_root.description == p_child.description + p_root._description = "set desc" + assert p_root.default == p_root._default + assert p_root.description == p_root._description + p_root._default = None + assert p_root.default == p_child.default + assert p_root.description == p_root._description + + +def test_parameter_two_children(): + p_root = Parameter("root", "root", None, None, None) + p_child = Parameter("child", "task", "@from(root)", 0, "child1 description") + p_child2 = Parameter("child2", "task2", "@from(root)", 1, "child2 description") + p_root.add_child(p_child) + p_root.add_child(p_child2) + assert p_root.default == (p_child.default, p_child2.default) + assert p_root.description == (p_child.description, p_child2.description) + + +def test_parameter_two_children_same_definition(): + p_root = Parameter("root", "root", None, None, None) + p_child = Parameter("child", "task", "@from(root)", 0, "child description") + p_child2 = Parameter("child2", "task2", "@from(root)", 0, "child description") + p_root.add_child(p_child) + p_root.add_child(p_child2) + assert p_root.default == p_child.default == p_child2.default + assert p_root.description == p_child.description == p_child2.description + + +def test_parameter_children_handle_none(): + p_root = Parameter("root", "root", None, None, None) + p_child = Parameter("child", "task", "@from(root)", 0, "child1 description") + p_child2 = Parameter("child2", "task2", "@from(root)", None, None) + p_root.add_child(p_child) + p_root.add_child(p_child2) + # For parameters, we don't discard None! + assert p_root.default == (p_child.default, p_child2.default) + # For descriptions, we ignore None from child2 + assert p_root.description == p_child.description + p_child3 = Parameter("child", "task", "@from(root)", 2, "child3 description") + p_root.add_child(p_child3) + assert p_root.default == (p_child.default, p_child2.default, p_child3.default) + assert p_root.description == (p_child.description, p_child3.description) + + +def test_get_op_params(fake_ops_dir: str): + resolver = ParameterResolver("", "") + op_spec = OperationParser.parse( + os.path.join(fake_ops_dir, "fake", "simple_parameter.yaml"), {"overwrite": "over"} + ) + params = {p.name: p for p in resolver._get_op_params(op_spec, "task")} + assert len(params) == 2 + assert params["keep"]._value == "kept" + assert params["keep"].default == "kept" + assert params["keep"].description is None + + assert params["overwrite"]._value == "over" + assert params["overwrite"].default == "kept" + assert params["overwrite"].description is None + + +def test_get_op_params_nested(fake_ops_dir: str): + resolver = ParameterResolver("", "") + op_spec = OperationParser.parse( + os.path.join(fake_ops_dir, "fake", "nested_parameters.yaml"), + {"nested": {"overwrite": "over nested"}}, + ) + params = {p.name: p for p in resolver._get_op_params(op_spec, "task")} + assert len(params) == 3 + param = params["overwrite"] + assert param._value == param.default == "kept" + assert param.description == "param named overwrite" + + param = params["nested.overwrite"] + assert param._value == "over nested" + assert param.default == "kept nested" + assert param.description == "nested overwrite" + + +def test_resolve_params(fake_ops_dir: str, fake_workflows_dir: str): + wf_path = get_fake_workflow_path("resolve_nested_params_multiple_default") + wf_spec = WorkflowParser.parse(wf_path, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) + resolver = ParameterResolver(fake_workflows_dir, fake_ops_dir) + params = resolver.resolve(wf_spec) + assert len(params) == 2 + param = params["new"] + assert param.default == ("kept", "overwritten") + assert param._value is None + assert len(param.childs) == 2 + assert sorted([p.name for p in param.childs]) == ["new", "overwrite"] + + param = params["new_nested"] + assert param.default == "overwritten nested" + assert param._value is None + assert len(param.childs) == 1 + assert param.description == "nested overwrite" + + +def test_resolve_only_description(fake_ops_dir: str, fake_workflows_dir: str): + wf_path = get_fake_workflow_path("resolve_params") + wf_spec = WorkflowParser.parse(wf_path, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) + resolver = ParameterResolver(fake_workflows_dir, fake_ops_dir) + params = resolver.resolve(wf_spec) + param = params["new_nested"] + # We don't get default from child + assert param.default == param._value == "overwritten nested" + # But we do get description + assert param._description is None + assert param.description == "nested overwrite" diff --git a/src/vibe_server/tests/test_remote_workflow_runner.py b/src/vibe_server/tests/test_remote_workflow_runner.py new file mode 100644 index 00000000..37f8d781 --- /dev/null +++ b/src/vibe_server/tests/test_remote_workflow_runner.py @@ -0,0 +1,239 @@ +import os +import sys +import traceback +from asyncio.queues import Queue +from datetime import datetime, timezone +from typing import Any, Optional, Tuple, cast +from unittest.mock import AsyncMock, patch + +import pydantic +import pytest +from shapely.geometry import Polygon, mapping + +from vibe_common.input_handlers import gen_stac_item_from_bounds +from vibe_common.messaging import ( + ErrorContent, + ExecuteReplyContent, + ExecuteRequestContent, + MessageHeader, + MessageType, + OpStatusType, + WorkMessage, + build_work_message, +) +from vibe_common.schemas import CacheInfo, EntryPointDict, OperationSpec +from vibe_core.data import TypeDictVibe +from vibe_core.data.core_types import OpIOType +from vibe_core.data.utils import is_vibe_list +from vibe_core.datamodel import TaskDescription +from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path +from vibe_server.workflow.runner.remote_runner import ( + MessageRouter, + RemoteWorkflowRunner, + WorkMessageBuilder, +) +from vibe_server.workflow.runner.task_io_handler import WorkflowIOHandler +from vibe_server.workflow.workflow import Workflow + +HERE = os.path.dirname(os.path.abspath(__file__)) + + +class FakeMessage(str): + def __init__(self, s: str): + self.parent_id = "" + self.msg = s + + def __str__(self): + return self.msg + + +@pytest.fixture +def time_range() -> Tuple[datetime, datetime]: + return ( + datetime(year=2021, month=2, day=1, tzinfo=timezone.utc), + datetime(year=2021, month=2, day=11, tzinfo=timezone.utc), + ) + + +@pytest.fixture +def input_polygon() -> Polygon: + polygon_coords = [ + (-88.062073563448919, 37.081397673802059), + (-88.026349330507315, 37.085463858128762), + (-88.026349330507315, 37.085463858128762), + (-88.012445388773259, 37.069230099135126), + (-88.035931592028305, 37.048441375086092), + (-88.068120429075847, 37.058833638440767), + (-88.062073563448919, 37.081397673802059), + ] + + return Polygon(polygon_coords) + + +@pytest.fixture +def helloworld_input(input_polygon: Polygon, time_range: Tuple[datetime, datetime]): + return gen_stac_item_from_bounds(mapping(input_polygon), time_range[0], time_range[1]) + + +def test_work_message_builder_fails(workflow_execution_message: WorkMessage): + if hasattr(pydantic, "error_wrappers"): + ValidationError = pydantic.error_wrappers.ValidationError # type: ignore + else: + ValidationError = pydantic.ValidationError # type: ignore + with pytest.raises(ValidationError): + WorkMessageBuilder.build_execute_request( + workflow_execution_message.header.run_id, + "", + None, # type: ignore + {}, + ) + + +def test_work_message_builder_succeeds_with_op_spec(workflow_execution_message: WorkMessage): + message = WorkMessageBuilder.build_execute_request( + workflow_execution_message.header.run_id, + "", + OperationSpec( + name="fake", + root_folder="/tmp", + inputs_spec=TypeDictVibe({}), + output_spec=TypeDictVibe({}), + entrypoint=EntryPointDict(file="op.py", callback_builder="whatever"), + description=TaskDescription(), + ), + {}, + ) + assert cast(ExecuteRequestContent, message.content).operation_spec + + +@pytest.mark.anyio +async def test_message_router_put(): + inqueue = Queue() + handler = MessageRouter(inqueue) + item = FakeMessage("some really cool item") + await inqueue.put(item) + assert await handler.get("") == item + + +@pytest.mark.anyio +async def test_message_router_len(): + inqueue = Queue() + handler = MessageRouter(inqueue) + assert len(handler) == 0 + for i in range(10): + await inqueue.put(FakeMessage(f"{i}")) + assert len(handler) == 10 + handler.should_stop = True + + +def build_reply( + parent_header: MessageHeader, op: Optional[OperationSpec] = None, failure: bool = False +) -> WorkMessage: + if op is None: + output = {} + else: + output = { + k: ([{"a": 1}] if is_vibe_list(op.output_spec[k]) else {"a": 1}) for k in op.output_spec + } + if failure: + try: + 1 / 0 # type: ignore + except ZeroDivisionError: + ename, evalue, tb = sys.exc_info() + content = ErrorContent( + status=OpStatusType.failed, + ename=str(ename), # type: ignore + evalue=str(evalue), # type: ignore + traceback=traceback.format_tb(tb), # type: ignore + ) + else: + content = ExecuteReplyContent( + cache_info=CacheInfo("test_op", "1.0", {}, {}), + status=OpStatusType.done, + output=output, # type: ignore + ) + header = MessageHeader( + type=MessageType.error if failure else MessageType.execute_reply, + run_id=parent_header.run_id, + parent_id=parent_header.id, + ) + return build_work_message(header=header, content=content) + + +async def workflow_callback(change, **kwargs): # type: ignore + print(change, kwargs) # type: ignore + + +@patch("vibe_server.workflow.runner.remote_runner.send_async") +@pytest.mark.anyio +async def test_remote_workflow_runner_runs( + send_async: AsyncMock, + fake_ops_dir: str, + fake_workflows_dir: str, + helloworld_input: OpIOType, + workflow_execution_message: WorkMessage, +): + inqueue: "Queue[WorkMessage]" = Queue() + handler = MessageRouter(inqueue) + workflow = Workflow.build(get_fake_workflow_path("str_input"), fake_ops_dir, fake_workflows_dir) + io_mapper = WorkflowIOHandler(workflow) + runner = RemoteWorkflowRunner( + handler, + workflow, + workflow_execution_message.id, + pubsubname="", + source="", + topic="", + io_mapper=io_mapper, + update_state_callback=workflow_callback, + ) + + async def patched_send(item: WorkMessage, *args: Any) -> None: + reply = build_reply( + parent_header=item.header, op=cast(ExecuteRequestContent, item.content).operation_spec + ) + await inqueue.put(reply) + + send_async.side_effect = patched_send + + await runner.run( + {k: helloworld_input for k in runner.workflow.inputs_spec}, + workflow_execution_message.header.run_id, + ) + + +@patch("vibe_server.workflow.runner.remote_runner.send_async") +@pytest.mark.anyio +async def test_remote_workflow_runner_fails( + send_async: AsyncMock, + fake_ops_dir: str, + fake_workflows_dir: str, + helloworld_input: OpIOType, + workflow_execution_message: WorkMessage, +): + inqueue: "Queue[WorkMessage]" = Queue() + handler = MessageRouter(inqueue) + workflow = Workflow.build(get_fake_workflow_path("str_input"), fake_ops_dir, fake_workflows_dir) + io_mapper = WorkflowIOHandler(workflow) + runner = RemoteWorkflowRunner( + handler, + workflow, + workflow_execution_message.id, + pubsubname="", + source="", + topic="", + io_mapper=io_mapper, + update_state_callback=workflow_callback, + ) + + async def patched_send(item: WorkMessage, *args: Any) -> None: + reply = build_reply(item.header, None, True) + await inqueue.put(reply) + + send_async.side_effect = patched_send + + with pytest.raises(RuntimeError): + await runner.run( + {k: helloworld_input for k in runner.workflow.inputs_spec}, + workflow_execution_message.header.run_id, + ) diff --git a/src/vibe_server/tests/test_workflow.py b/src/vibe_server/tests/test_workflow.py new file mode 100644 index 00000000..544c0157 --- /dev/null +++ b/src/vibe_server/tests/test_workflow.py @@ -0,0 +1,304 @@ +import os +from typing import List + +import pytest + +from vibe_core.data.core_types import DataVibe +from vibe_core.data.rasters import Raster +from vibe_core.data.utils import is_vibe_list +from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path +from vibe_server.workflow.spec_parser import WorkflowParser, WorkflowSpec, WorkflowSpecEdge +from vibe_server.workflow.workflow import EdgeType, Workflow + +HERE = os.path.dirname(os.path.abspath(__file__)) + + +def test_workflow_parameters( + fake_ops_dir: str, + fake_workflows_dir: str, +): + workflow = Workflow.build( + get_fake_workflow_path("task_params"), fake_ops_dir, fake_workflows_dir + ) + assert workflow["parameterizable"].parameters["fake_param"] == 3 # type: ignore + assert workflow["parameterizable"].parameters["fake_another_param"] == { # type: ignore + "fake_nested": 2, + "fake_nested_too": 3, + } + + +def test_workflow_nested_parameters( + fake_ops_dir: str, + fake_workflows_dir: str, +): + workflow = Workflow.build( + get_fake_workflow_path("nested_task_params"), fake_ops_dir, fake_workflows_dir + ) + assert workflow["parameterizable"].parameters["fake_param"] == 1 # type: ignore + assert workflow["parameterizable"].parameters["fake_another_param"] == { # type: ignore + "fake_nested": 2, + "fake_nested_too": 4, + } + + +def test_workflow_unknown_parameter( + fake_ops_dir: str, + fake_workflows_dir: str, +): + with pytest.raises(ValueError): + Workflow.build( + get_fake_workflow_path("unknown_task_params"), fake_ops_dir, fake_workflows_dir + ) + + +def test_misconfigured_workflow( + fake_ops_dir: str, + fake_workflows_dir: str, +): + with pytest.raises(ValueError): + Workflow.build(get_fake_workflow_path("missing_edge"), fake_ops_dir, fake_workflows_dir) + + +def test_fan_out_fan_in( + fake_ops_dir: str, + fake_workflows_dir: str, +): + # Tests whether we support workflows with nodes + # from List[DataVibe] <-> [DataVibe] + Workflow.build(get_fake_workflow_path("fan_out_and_in"), fake_ops_dir, fake_workflows_dir) + + +def test_nested_fan_out_fails( + fake_ops_dir: str, + fake_workflows_dir: str, +): + with pytest.raises(ValueError): + Workflow.build(get_fake_workflow_path("nested_fan_out"), fake_ops_dir, fake_workflows_dir) + + +@pytest.mark.parametrize( + "workflow_name", + ["single_and_parallel", "gather_and_parallel", "gather_and_parallel_input_gather_output"], +) +def test_parallelism_two_edge_types( + workflow_name: str, + fake_ops_dir: str, + fake_workflows_dir: str, +): + workflow_path = get_fake_workflow_path(workflow_name) + + workflow_spec: WorkflowSpec = WorkflowParser.parse( + workflow_path, fake_ops_dir, fake_workflows_dir + ) + workflow = Workflow(workflow_spec) + edge = workflow.edges_from(workflow.index["two_types"])[0] + correct_type = EdgeType.gather if "gather_output" in workflow_name else EdgeType.parallel + assert edge[-1].type == correct_type + + +def test_gather_not_parallel( + fake_ops_dir: str, + fake_workflows_dir: str, +): + workflow = Workflow.build( + get_fake_workflow_path("item_gather"), fake_ops_dir, fake_workflows_dir + ) + assert workflow.edges_from(workflow.index["item"])[0][-1].type == EdgeType.gather + + +def test_loading_inheritance_works( + fake_ops_dir: str, + fake_workflows_dir: str, +): + workflow = Workflow.build( + get_fake_workflow_path("inheritance"), fake_ops_dir, fake_workflows_dir + ) + assert not is_vibe_list(workflow["inherit_item"].output_spec["processed_data"]) + assert is_vibe_list(workflow["inherit_list"].output_spec["processed_data"]) + + +def test_loading_missing_inheritance_fails( + fake_ops_dir: str, + fake_workflows_dir: str, +): + with pytest.raises(ValueError): + Workflow.build( + get_fake_workflow_path("missing_inheritance"), fake_ops_dir, fake_workflows_dir + ) + + +def test_loading_multi_level_inheritance_works( + fake_ops_dir: str, + fake_workflows_dir: str, +): + workflow = Workflow.build( + get_fake_workflow_path("two_level_inheritance"), fake_ops_dir, fake_workflows_dir + ) + assert workflow["direct_inherit"].output_spec["processed_data"] is DataVibe + assert workflow["indirect_inherit"].output_spec["processed_data"] is DataVibe + + +def test_inheritance_before_fanout( + fake_ops_dir: str, + fake_workflows_dir: str, +): + workflow = Workflow.build( + get_fake_workflow_path("inheritance_before_fan_out"), fake_ops_dir, fake_workflows_dir + ) + + assert workflow["inherit_list"].output_spec["processed_data"] is List[DataVibe] + assert list(workflow.edges_from(workflow.index["inherit_list"]))[0][-1].type == EdgeType.scatter + + +def test_inheritance_after_fanout( + fake_ops_dir: str, + fake_workflows_dir: str, +): + workflow = Workflow.build( + get_fake_workflow_path("inheritance_after_fan_out"), fake_ops_dir, fake_workflows_dir + ) + + assert workflow["scatter_inherit"].output_spec["processed_data"] is DataVibe + assert list(workflow.edges_from(workflow.index["list"]))[0][-1].type == EdgeType.scatter + assert ( + list(workflow.edges_from(workflow.index["scatter_inherit"]))[0][-1].type + == EdgeType.parallel + ) + + +def test_inheritance_source( + fake_ops_dir: str, + fake_workflows_dir: str, +): + workflow = Workflow.build( + get_fake_workflow_path("inheritance_from_source"), fake_ops_dir, fake_workflows_dir + ) + + assert workflow["inherit_raster"].output_spec["processed_data"] is Raster + assert workflow["inherit_source"].output_spec["processed_data"] is DataVibe + + +def test_cycle_disconnected_components_detection( + fake_ops_dir: str, + fake_workflows_dir: str, +): + workflow_path = get_fake_workflow_path("three_ops") + + workflow_spec: WorkflowSpec = WorkflowParser.parse( + workflow_path, fake_ops_dir, fake_workflows_dir + ) + for origin, destination in zip( + ("second.processed_data", "third.processed_data", "third.processed_data"), + ("first.user_data", "second.user_data", "third.user_data"), + ): + edge: WorkflowSpecEdge = WorkflowSpecEdge(origin=origin, destination=[destination]) + workflow_spec.edges.append(edge) + + with pytest.raises(ValueError): + Workflow(workflow_spec) + + workflow_spec.edges.pop() + + +def test_parameter_resolution( + fake_ops_dir: str, + fake_workflows_dir: str, +): + workflow_path = get_fake_workflow_path("resolve_params") + + workflow = Workflow.build(workflow_path, fake_ops_dir, fake_workflows_dir) + assert workflow["simple"].parameters["keep"] == "kept" + assert workflow["simple"].parameters["overwrite"] == "overwritten" + assert workflow["nested"].parameters["overwrite"] == "overwritten" + assert workflow["nested"].parameters["nested"]["keep"] == "kept nested" + assert workflow["nested"].parameters["nested"]["overwrite"] == "overwritten nested" + + +def test_nested_workflow_parameter_resolution( + fake_ops_dir: str, + fake_workflows_dir: str, +): + workflow_path = get_fake_workflow_path("resolve_nested_params") + + workflow = Workflow.build(workflow_path, fake_ops_dir, fake_workflows_dir) + assert workflow["simple"].parameters["keep"] == "kept" + assert workflow["simple"].parameters["overwrite"] == "overwritten" + assert workflow["nested.simple"].parameters["overwrite"] == "overwritten" + assert workflow["nested.nested"].parameters["overwrite"] == "overwritten" + assert workflow["nested.nested"].parameters["nested"]["keep"] == "kept nested" + assert workflow["nested.nested"].parameters["nested"]["overwrite"] == "overwritten nested" + + +def test_workflow_parameter_resolution_default_values(fake_ops_dir: str, fake_workflows_dir: str): + workflow_path = get_fake_workflow_path("resolve_nested_params_default") + + workflow = Workflow.build(workflow_path, fake_ops_dir, fake_workflows_dir) + assert workflow["simple"].parameters["keep"] == "kept" + # Default value for the op in 'overwrite' is "kept" + assert workflow["simple"].parameters["overwrite"] == "kept" + assert workflow["nested.simple"].parameters["overwrite"] == "overwritten" + assert workflow["nested.nested"].parameters["overwrite"] == "overwritten" + assert workflow["nested.nested"].parameters["nested"]["keep"] == "kept nested" + # Default value for the op in 'overwrite' is kept, + # but default for the workflow containing it is 'overwritten nested' + assert workflow["nested.nested"].parameters["nested"]["overwrite"] == "overwritten nested" + + +@pytest.mark.parametrize("invalid", ["", "inexistent"]) +def test_workflow_parameter_resolution_invalid_ref( + fake_ops_dir: str, fake_workflows_dir: str, invalid: str +): + workflow_path = get_fake_workflow_path("resolve_params") + + spec = WorkflowParser.parse(workflow_path, fake_ops_dir, fake_workflows_dir) + spec.tasks["simple"].parameters["overwrite"] = f"@from({invalid})" + with pytest.raises(ValueError): + Workflow(spec) + + +def test_workflow_missing_source(fake_ops_dir: str, fake_workflows_dir: str): + workflow_path = get_fake_workflow_path("bad_source") + with pytest.raises(ValueError): + Workflow.build( + workflow_path, ops_base_dir=fake_ops_dir, workflow_base_dir=fake_workflows_dir + ) + + +def test_workflow_missing_sink(fake_ops_dir: str, fake_workflows_dir: str): + workflow_path = get_fake_workflow_path("bad_sink") + with pytest.raises(ValueError): + Workflow.build( + workflow_path, ops_base_dir=fake_ops_dir, workflow_base_dir=fake_workflows_dir + ) + + +def test_most_specific_source_type(fake_ops_dir: str, fake_workflows_dir: str): + workflow_path = get_fake_workflow_path("specific_source") + wf = Workflow.build( + workflow_path, ops_base_dir=fake_ops_dir, workflow_base_dir=fake_workflows_dir + ) + assert wf.inputs_spec["input"] is Raster + + +def test_item_list_source_type(fake_ops_dir: str, fake_workflows_dir: str): + workflow_path = get_fake_workflow_path("specific_source_item_list") + wf = Workflow.build( + workflow_path, ops_base_dir=fake_ops_dir, workflow_base_dir=fake_workflows_dir + ) + assert wf.inputs_spec["input"] is Raster + + +def test_list_list_source_type(fake_ops_dir: str, fake_workflows_dir: str): + workflow_path = get_fake_workflow_path("specific_source_list_list") + wf = Workflow.build( + workflow_path, ops_base_dir=fake_ops_dir, workflow_base_dir=fake_workflows_dir + ) + assert wf.inputs_spec["input"] is List[Raster] + + +def test_incompatible_sources_fails(fake_ops_dir: str, fake_workflows_dir: str): + workflow_path = get_fake_workflow_path("incompatible_source") + with pytest.raises(ValueError): + Workflow.build( + workflow_path, ops_base_dir=fake_ops_dir, workflow_base_dir=fake_workflows_dir + ) diff --git a/src/vibe_server/tests/test_workflow_input_handler.py b/src/vibe_server/tests/test_workflow_input_handler.py new file mode 100644 index 00000000..c7f0e071 --- /dev/null +++ b/src/vibe_server/tests/test_workflow_input_handler.py @@ -0,0 +1,251 @@ +import copy +from dataclasses import dataclass +from datetime import datetime +from typing import Any, Dict, List, cast +from unittest.mock import MagicMock, patch + +import pytest +from shapely import geometry as shpg + +from vibe_common.input_handlers import gen_stac_item_from_bounds +from vibe_core.data.core_types import BaseVibe, DataVibe, OpIOType +from vibe_core.data.rasters import CategoricalRaster, Raster +from vibe_core.data.utils import StacConverter, serialize_stac +from vibe_core.datamodel import SpatioTemporalJson +from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path +from vibe_server.workflow.input_handler import ( + build_args_for_workflow, + patch_workflow_sources, + validate_workflow_input, +) +from vibe_server.workflow.spec_parser import WorkflowParser +from vibe_server.workflow.workflow import Workflow + + +@pytest.fixture +def dummy_input(): + return {"dummy": 0, "another": "1"} + + +def test_build_workflow_args_spatiotemporal_input(): + geom = shpg.box(0, 0, 1, 1) + geojson = {"type": "Feature", "geometry": shpg.mapping(geom)} + start_date = datetime(2020, 1, 2) + end_date = datetime(2020, 1, 3) + user_input = SpatioTemporalJson(start_date, end_date, geojson) + args = build_args_for_workflow(user_input, ["one_input"]) + assert args == {"one_input": gen_stac_item_from_bounds(geojson, start_date, end_date)} + with pytest.raises(ValueError): + build_args_for_workflow(user_input, ["1", "2"]) + + +def test_build_workflow_args_single_source(dummy_input: Dict[str, Any]): + args = build_args_for_workflow(dummy_input, ["one_input"]) + assert args == {"one_input": dummy_input} + args = build_args_for_workflow({"one_input": dummy_input}, ["one_input"]) + assert args == {"one_input": dummy_input} + + +def test_build_workflow_args_multi_source(dummy_input: Dict[str, Any]): + inputs = ["1", "2"] + matching_input = {k: dummy_input for k in inputs} + args = build_args_for_workflow(matching_input, inputs) + assert args == matching_input + + +def test_build_workflow_args_missing_key_fails(dummy_input: Dict[str, Any]): + inputs = ["1", "2"] + with pytest.raises(ValueError): + build_args_for_workflow(dummy_input, inputs) + + +def test_build_workflow_args_wrong_key_fails(dummy_input: Dict[str, Any]): + inputs = ["1", "2"] + bad_input = {k: dummy_input for k in ["1", "3"]} + with pytest.raises(ValueError): + build_args_for_workflow(bad_input, inputs) + + +def test_validate_wf_item_input(): + inputs_spec: Any = {"input": DataVibe} + converter = StacConverter() + geom = shpg.box(0, 0, 1, 1) + now = datetime.now() + x = DataVibe(id="1", time_range=(now, now), geometry=shpg.mapping(geom), assets=[]) + serial = serialize_stac(converter.to_stac_item(x)) + validate_workflow_input(cast(OpIOType, {"input": serial}), inputs_spec) + + +def test_validate_wf_input_subtype(): + inputs_spec: Any = {"input": Raster} + converter = StacConverter() + geom = shpg.box(0, 0, 1, 1) + now = datetime.now() + x = Raster(id="1", time_range=(now, now), geometry=shpg.mapping(geom), assets=[], bands={}) + serial = serialize_stac(converter.to_stac_item(x)) + validate_workflow_input(cast(OpIOType, {"input": serial}), inputs_spec) + + # More specific types are fine + x = CategoricalRaster.clone_from(x, id="2", assets=[], categories=[]) + serial = serialize_stac(converter.to_stac_item(x)) + validate_workflow_input(cast(OpIOType, {"input": serial}), inputs_spec) + + # More generic types are not + x = DataVibe.clone_from(x, id="3", assets=[]) + serial = serialize_stac(converter.to_stac_item(x)) + with pytest.raises(ValueError): + validate_workflow_input(cast(OpIOType, {"input": serial}), inputs_spec) + + +def test_validate_wf_list_input(): + inputs_spec: Any = {"input": List[DataVibe]} + converter = StacConverter() + geom = shpg.box(0, 0, 1, 1) + now = datetime.now() + x = DataVibe(id="1", time_range=(now, now), geometry=shpg.mapping(geom), assets=[]) + serial = serialize_stac(converter.to_stac_item(x)) + validate_workflow_input(cast(OpIOType, {"input": [serial]}), inputs_spec) + # Item is ok as well (will be converted to one item list) + validate_workflow_input(cast(OpIOType, {"input": serial}), inputs_spec) + + +def test_validate_wf_base_input(): + @dataclass + class A(BaseVibe): + a: int + + inputs_spec: Any = {"input": List[A]} + input = serialize_stac(StacConverter().to_stac_item(A(a=1))) + other_input = copy.deepcopy(input) + del other_input["properties"]["a"] + other_input["properties"]["b"] = 1 + + validate_workflow_input({"input": input}, inputs_spec) + validate_workflow_input({"input": [input]}, inputs_spec) + + with pytest.raises(ValueError): + validate_workflow_input({"input": other_input}, inputs_spec) + + with pytest.raises(ValueError): + validate_workflow_input({"input": [other_input]}, inputs_spec) + + inputs_spec: Any = {"input": A} + validate_workflow_input({"input": input}, inputs_spec) + + +def test_validate_wf_multi_source_input(): + inputs_spec: Any = {"input1": DataVibe, "input2": Raster} + converter = StacConverter() + geom = shpg.box(0, 0, 1, 1) + now = datetime.now() + x1 = DataVibe(id="1", time_range=(now, now), geometry=shpg.mapping(geom), assets=[]) + s1 = serialize_stac(converter.to_stac_item(x1)) + x2 = Raster.clone_from(x1, id="1", assets=[], bands={}) + s2 = serialize_stac(converter.to_stac_item(x2)) + x3 = CategoricalRaster.clone_from(x2, id="1", assets=[], categories=[]) + s3 = serialize_stac(converter.to_stac_item(x3)) + + validate_workflow_input({"input1": s1, "input2": s2}, inputs_spec) + validate_workflow_input({"input1": s1, "input2": s3}, inputs_spec) + validate_workflow_input({"input1": s3, "input2": s2}, inputs_spec) + + with pytest.raises(ValueError): + validate_workflow_input({"input1": s1, "input2": s1}, inputs_spec) + + +def test_workflow_source_patch(fake_ops_dir: str, fake_workflows_dir: str): + workflow = Workflow.build(get_fake_workflow_path("item_item"), fake_ops_dir, fake_workflows_dir) + assert workflow.inputs_spec == {"input": DataVibe} + assert len(workflow.nodes) == 1 + assert len(workflow.edges) == 0 + old_source = workflow.source_mappings["input"][0] + patch_workflow_sources({"input": []}, workflow) + # We support list in the input + assert workflow.inputs_spec == {"input": List[DataVibe]} + # We add one fan-out node + assert len(workflow.nodes) == 2 + # We add one edge from fan-out node to actual node + assert len(workflow.edges) == 1 + # Our new edge should be from our node to the former source port + edge = workflow.edges_from(workflow.index["input_fanout"])[0] + destination = f"{edge[1].name}.{edge[2][1]}" + assert destination == old_source + + +def test_workflow_source_patch_multiedge(fake_ops_dir: str, fake_workflows_dir: str): + workflow = Workflow.build( + get_fake_workflow_path("specific_source"), fake_ops_dir, fake_workflows_dir + ) + assert workflow.inputs_spec == {"input": Raster} + assert len(workflow.nodes) == 2 + assert len(workflow.edges) == 0 + old_sources = [s for s in workflow.source_mappings["input"]] + patch_workflow_sources({"input": []}, workflow) + # We support list in the input + assert workflow.inputs_spec == {"input": List[Raster]} + # We add one fan-out node + assert len(workflow.nodes) == 3 + # We add one edge from fan-out node to each input port in the source (2) + assert len(workflow.edges) == 2 + # Each new edge should be from our node to a former source port + edges = workflow.edges_from(workflow.index["input_fanout"]) + destinations = [f"{edge[1].name}.{edge[2][1]}" for edge in edges] + assert sorted(destinations) == sorted(old_sources) + + +def test_workflow_source_patch_fails_nested_fanout(fake_ops_dir: str, fake_workflows_dir: str): + workflow = Workflow.build( + get_fake_workflow_path("fan_out_and_in"), fake_ops_dir, fake_workflows_dir + ) + with pytest.raises(ValueError): + patch_workflow_sources({"input": []}, workflow) + + +@patch("vibe_server.workflow.input_handler.fan_out_workflow_source") +def test_workflow_source_patch_list_source( + patch_mock: MagicMock, fake_ops_dir: str, fake_workflows_dir: str +): + workflow = Workflow.build(get_fake_workflow_path("list_list"), fake_ops_dir, fake_workflows_dir) + patch_workflow_sources({"input": []}, workflow) + # Put something that is not a list + patch_workflow_sources({"input": 0}, workflow) # type: ignore + patch_mock.assert_not_called() + + +def test_workflow_multi_source_patch(fake_ops_dir: str, fake_workflows_dir: str): + wf_dict = { + "name": "test", + "sources": { + "input1": ["t1.input"], + "input2": ["t2.input"], + "input3": ["t3.input"], + }, + "sinks": { + "output1": "t1.output", + "output2": "t2.gather", + "output3": "t3.raster", + }, + "tasks": { + "t1": {"workflow": "item_gather"}, + "t2": {"workflow": "fan_out_and_in"}, + "t3": {"workflow": "specific_source"}, + }, + } + spec = WorkflowParser.parse_dict( + wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir + ) + workflow = Workflow(spec) + unpatched_nodes = len(workflow.nodes) + unpatched_edges = len(workflow.edges) + assert workflow.inputs_spec == {"input1": DataVibe, "input2": DataVibe, "input3": Raster} + with pytest.raises(ValueError): + patch_workflow_sources({"input1": [], "input2": [], "input3": []}, workflow) + # We patched the first and last ones, but reverted the second one + assert workflow.inputs_spec == { + "input1": List[DataVibe], + "input2": DataVibe, + "input3": List[Raster], + } + assert len(workflow.nodes) == unpatched_nodes + 2 + # t3 has a source that maps to two ports so it will create two edges + assert len(workflow.edges) == unpatched_edges + 3 diff --git a/src/vibe_server/tests/test_workflow_parser.py b/src/vibe_server/tests/test_workflow_parser.py new file mode 100644 index 00000000..b21e8f46 --- /dev/null +++ b/src/vibe_server/tests/test_workflow_parser.py @@ -0,0 +1,89 @@ +from dataclasses import asdict + +import pytest +import yaml + +from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path +from vibe_server.workflow.spec_parser import WorkflowParser + + +@pytest.mark.parametrize("missing_field", WorkflowParser.required_fields) +def test_parser_fails_missing_field(missing_field: str, fake_ops_dir: str, fake_workflows_dir: str): + wf_path = get_fake_workflow_path("resolve_params") + with open(wf_path) as f: + wf_dict = yaml.safe_load(f) + del wf_dict[missing_field] + with pytest.raises(ValueError): + WorkflowParser.parse_dict(wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) + + +def test_parser_fails_unknown_wf_field(fake_ops_dir: str, fake_workflows_dir: str): + wf_path = get_fake_workflow_path("resolve_params") + with open(wf_path) as f: + wf_dict = yaml.safe_load(f) + wf_dict["unknown"] = "🤦‍♂️" + + with pytest.raises(ValueError): + WorkflowParser.parse_dict(wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) + + +def test_parser_fails_unknown_task_field(fake_ops_dir: str, fake_workflows_dir: str): + wf_path = get_fake_workflow_path("resolve_params") + with open(wf_path) as f: + wf_dict = yaml.safe_load(f) + + wf_dict["tasks"]["simple"]["unknown"] = "🤦‍♂" + + with pytest.raises(ValueError): + WorkflowParser.parse_dict(wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) + + +def test_parser_fills_optional_fields(fake_ops_dir: str, fake_workflows_dir: str): + wf_path = get_fake_workflow_path("list_list") + with open(wf_path) as f: + wf_dict = yaml.safe_load(f) + for field in WorkflowParser.optional_fields: + wf_dict[field] = None + spec = WorkflowParser.parse_dict( + wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir + ) + assert getattr(spec, field) is not None + del wf_dict[field] + + +def test_parser_parameter_override(fake_ops_dir: str, fake_workflows_dir: str): + spec = WorkflowParser.parse( + get_fake_workflow_path("resolve_params"), + ops_dir=fake_ops_dir, + workflows_dir=fake_workflows_dir, + parameters_override={"new": "override"}, + ) + assert spec.parameters["new"] == "override" + + +def test_parser_parameter_override_yaml_dict(fake_ops_dir: str, fake_workflows_dir: str): + wf_path = get_fake_workflow_path("resolve_params") + with open(wf_path) as f: + wf_dict = yaml.safe_load(f) + spec = WorkflowParser.parse_dict( + wf_dict, + ops_dir=fake_ops_dir, + workflows_dir=fake_workflows_dir, + parameters_override={"new": "override"}, + ) + assert spec.parameters["new"] == "override" + + +def test_parser_parameter_override_spec_dict(fake_ops_dir: str, fake_workflows_dir: str): + spec = WorkflowParser.parse( + get_fake_workflow_path("resolve_params"), + ops_dir=fake_ops_dir, + workflows_dir=fake_workflows_dir, + ) + spec = WorkflowParser.parse_dict( + asdict(spec), + ops_dir=fake_ops_dir, + workflows_dir=fake_workflows_dir, + parameters_override={"new": "override"}, + ) + assert spec.parameters["new"] == "override" diff --git a/src/vibe_server/tests/test_workflow_runner.py b/src/vibe_server/tests/test_workflow_runner.py new file mode 100644 index 00000000..6576cbc1 --- /dev/null +++ b/src/vibe_server/tests/test_workflow_runner.py @@ -0,0 +1,62 @@ +from typing import Any, List +from uuid import UUID, uuid4 + +import pytest + +from vibe_common.messaging import OpIOType +from vibe_core.data.utils import StacConverter, is_container_type, serialize_stac +from vibe_dev.testing.fake_workflows_fixtures import ( # noqa + fake_ops_dir, + fake_workflows_dir, + get_fake_workflow_path, +) +from vibe_dev.testing.workflow_fixtures import THE_DATAVIBE +from vibe_server.workflow.runner.runner import WorkflowRunner +from vibe_server.workflow.runner.task_io_handler import WorkflowIOHandler +from vibe_server.workflow.workflow import GraphNodeType, Workflow + + +class MockWorkflowRunner(WorkflowRunner): + def __init__(self, fail_list: List[str], *args: Any, **kwargs: Any): + self.fail_list = fail_list + super().__init__(*args, **kwargs) + + async def _run_op_impl( + self, op: GraphNodeType, input: OpIOType, run_id: UUID, _: int + ) -> OpIOType: + for fail in self.fail_list: + if op.name.startswith(fail): + raise RuntimeError(f"Failed op {op} because it was in the fail list") + converter = StacConverter() + return { + k: serialize_stac( + converter.to_stac_item( + [THE_DATAVIBE] if is_container_type(v) else THE_DATAVIBE # type: ignore + ) + ) + for k, v in op.spec.output_spec.items() + } + + +@pytest.mark.anyio +async def test_one_failure_in_sink_fails_workflow( + fake_ops_dir: str, # noqa + fake_workflows_dir: str, # noqa +): + workflow = Workflow.build( + get_fake_workflow_path("custom_indices_structure"), + fake_ops_dir, + fake_workflows_dir, + ) + + data = StacConverter().to_stac_item([THE_DATAVIBE]) + wf_input: OpIOType = {"user_input": serialize_stac(data)} + + runner = MockWorkflowRunner( + fail_list=["ndvi"], + workflow=workflow, + io_mapper=WorkflowIOHandler(workflow), + ) + + with pytest.raises(RuntimeError): + await runner.run(wf_input, uuid4()) diff --git a/src/vibe_server/tests/test_workflow_spec_validator.py b/src/vibe_server/tests/test_workflow_spec_validator.py new file mode 100644 index 00000000..be54137e --- /dev/null +++ b/src/vibe_server/tests/test_workflow_spec_validator.py @@ -0,0 +1,45 @@ +import pytest + +from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path +from vibe_server.workflow.spec_parser import WorkflowParser +from vibe_server.workflow.spec_validator import WorkflowSpecValidator + + +def test_validator_fails_unused_parameter(fake_ops_dir: str, fake_workflows_dir: str): + wf_path = get_fake_workflow_path("resolve_params") + wf_dict = WorkflowParser._load_workflow(wf_path) + spec = WorkflowParser.parse_dict( + wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir + ) + WorkflowSpecValidator.validate(spec) + # Add unused param + wf_dict["parameters"]["unused"] = None + spec = WorkflowParser.parse_dict( + wf_dict, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir + ) + with pytest.raises(ValueError): + WorkflowSpecValidator._validate_parameter_references(spec) + + +def test_validator_fails_bad_ref(fake_ops_dir: str, fake_workflows_dir: str): + wf_path = get_fake_workflow_path("resolve_params") + wf_dict = WorkflowParser._load_workflow(wf_path) + # Add invalid ref + wf_dict["tasks"]["nested"]["parameters"]["overwrite"] = "@from(unexistent)" + spec = WorkflowParser.parse_dict(wf_dict, fake_ops_dir, workflows_dir=fake_workflows_dir) + with pytest.raises(ValueError): + WorkflowSpecValidator._validate_parameter_references(spec) + + +def test_validator_fails_multiple_defaults(fake_ops_dir: str, fake_workflows_dir: str): + wf_path = get_fake_workflow_path("resolve_nested_params_multiple_default") + spec = WorkflowParser.parse(wf_path, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) + with pytest.raises(ValueError): + WorkflowSpecValidator.validate(spec) + + +def test_validator_fails_source_and_destination(fake_ops_dir: str, fake_workflows_dir: str): + wf_path = get_fake_workflow_path("source_and_destination") + spec = WorkflowParser.parse(wf_path, ops_dir=fake_ops_dir, workflows_dir=fake_workflows_dir) + with pytest.raises(ValueError): + WorkflowSpecValidator.validate(spec) diff --git a/src/vibe_server/tests/test_workflow_state.py b/src/vibe_server/tests/test_workflow_state.py new file mode 100644 index 00000000..0f1816c5 --- /dev/null +++ b/src/vibe_server/tests/test_workflow_state.py @@ -0,0 +1,417 @@ +from collections import Counter +from dataclasses import asdict +from datetime import datetime +from typing import Any, Dict, List, Tuple +from unittest.mock import Mock, patch + +import pytest + +from vibe_core.datamodel import RunDetails, RunStatus +from vibe_server.orchestrator import WorkflowStateUpdate +from vibe_server.workflow.runner import WorkflowChange + +MOCK_SUBMISSION_TIME = datetime(2020, 1, 2, 3, 4, 5, 6) + + +async def setup_updater(run_config: Dict[str, Any], tasks: List[str]): + with patch.object(WorkflowStateUpdate, "_init_cache", autospec=True) as mock_method: + deets = asdict(RunDetails()) + deets["submission_time"] = MOCK_SUBMISSION_TIME + + def mock_fun(self): # type:ignore + self.wf_cache["details"] = deets + self._cache_init = True + + mock_method.side_effect = mock_fun + updater = WorkflowStateUpdate(run_config["id"]) + await updater(WorkflowChange.WORKFLOW_STARTED, tasks=tasks) + return updater + + +@patch("vibe_common.statestore.StateStore.transaction") +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_workflow_started( + store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] +): + retrieve.return_value = run_config + tasks = ["task1", "task2"] + updater = await setup_updater(run_config, tasks) + transaction_ops = transaction.mock_calls[0][1][0] + # We update all tasks + workflow + assert len(transaction_ops) == len(tasks) + 1 + assert transaction_ops[-1]["key"] == str(updater.run_id) + wf_cache = updater._get_cache(None, None)[0] + assert wf_cache["status"] == RunStatus.pending + assert wf_cache["submission_time"] == MOCK_SUBMISSION_TIME + assert wf_cache["start_time"] is not None + for t_op, task in zip(transaction_ops, tasks): + assert task in updater.task_cache + cache = updater._get_cache(task, None)[0] + assert t_op["key"] == f"{updater.run_id}-{task}" + assert cache["status"] == RunStatus.pending + assert cache["subtasks"] is None + assert cache["submission_time"] is None + + +@patch("vibe_common.statestore.StateStore.transaction") +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_workflow_finished( + store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] +): + retrieve.return_value = run_config + tasks = ["task1", "task2"] + updater = await setup_updater(run_config, tasks) + await updater(WorkflowChange.WORKFLOW_FINISHED) + transaction_ops = transaction.mock_calls[1][1][0] + # We only update the workflow + assert len(transaction_ops) == 1 + assert transaction_ops[0]["key"] == str(updater.run_id) + + +@patch("vibe_common.statestore.StateStore.transaction") +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_task_started( + store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] +): + retrieve.return_value = run_config + tasks = ["task1", "task2"] + updater = await setup_updater(run_config, tasks) + task_start = "task1" + num_subtasks = 4 + await updater(WorkflowChange.TASK_STARTED, task=task_start, num_subtasks=num_subtasks) + transaction_ops = transaction.mock_calls[1][1][0] + # We update the task, not the workflow (still pending) + assert len(transaction_ops) == 1 + assert transaction_ops[0]["key"] == f"{updater.run_id}-{task_start}" + cache = updater._get_cache(task_start, None)[0] + assert cache["status"] == RunStatus.pending + assert len(cache["subtasks"]) == num_subtasks + assert all([s["status"] == RunStatus.pending for s in cache["subtasks"]]) + + +@patch("vibe_common.statestore.StateStore.transaction") +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_propagate_up( + store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] +): + retrieve.return_value = run_config + tasks = ["task1", "task2"] + updater = await setup_updater(run_config, tasks) + assert updater._get_cache(None, None)[0]["submission_time"] == MOCK_SUBMISSION_TIME + assert updater._get_cache(None, None)[0]["start_time"] is not None + task_start = "task1" + num_subtasks = 4 + await updater(WorkflowChange.TASK_STARTED, task=task_start, num_subtasks=num_subtasks) + transaction.reset_mock() + await updater(WorkflowChange.SUBTASK_QUEUED, task=task_start, subtask_idx=0) + transaction_ops = transaction.mock_calls[0][1][0] + # We update the task and workflow to queued + assert len(transaction_ops) == 2 + assert transaction_ops[0]["key"] == f"{updater.run_id}-{task_start}" + assert transaction_ops[1]["key"] == f"{updater.run_id}" + + assert updater._get_cache(None, None)[0]["status"] == RunStatus.queued + assert updater._get_cache(task_start, None)[0]["status"] == RunStatus.queued + assert updater._get_cache(task_start, 0)[0]["status"] == RunStatus.queued + # Check that submission time was properly updated + subtask_submission = updater._get_cache(task_start, 0)[0]["submission_time"] + assert subtask_submission is not None + assert updater._get_cache(task_start, None)[0]["submission_time"] == subtask_submission + + # A different subtask should still be pending + assert updater._get_cache(task_start, 1)[0]["status"] == RunStatus.pending + + # Let's queue another subtask from the same task + await updater(WorkflowChange.SUBTASK_QUEUED, task=task_start, subtask_idx=1) + transaction_ops = transaction.mock_calls[-1][1][0] + # We only update the task since the workflow is already queued + assert len(transaction_ops) == 1 + assert transaction_ops[0]["key"] == f"{updater.run_id}-{task_start}" + assert updater._get_cache(task_start, 1)[0]["status"] == RunStatus.queued + + # Let's start the other task and queue a subtask + other_task = "task2" + await updater(WorkflowChange.TASK_STARTED, task=other_task, num_subtasks=1) + transaction_ops = transaction.mock_calls[-1][1][0] + assert len(transaction_ops) == 1 + assert transaction_ops[0]["key"] == f"{updater.run_id}-{other_task}" + await updater(WorkflowChange.SUBTASK_QUEUED, task=other_task, subtask_idx=0) + transaction_ops = transaction.mock_calls[-1][1][0] + assert len(transaction_ops) == 1 + assert transaction_ops[0]["key"] == f"{updater.run_id}-{other_task}" + + # Let's start a subtask on the first task + await updater(WorkflowChange.SUBTASK_RUNNING, task=task_start, subtask_idx=0) + transaction_ops = transaction.mock_calls[-1][1][0] + # We should update the task and the workflow to running here + assert len(transaction_ops) == 2 + assert transaction_ops[0]["key"] == f"{updater.run_id}-{task_start}" + assert transaction_ops[1]["key"] == f"{updater.run_id}" + assert updater._get_cache(task_start, 0)[0]["status"] == RunStatus.running + assert updater._get_cache(task_start, None)[0]["status"] == RunStatus.running + assert updater._get_cache(None, None)[0]["status"] == RunStatus.running + # The start times should match + subtask_start = updater._get_cache(task_start, 0)[0]["start_time"] + assert updater._get_cache(task_start, None)[0]["start_time"] == subtask_start + + # Let's finish the first subtask + await updater(WorkflowChange.SUBTASK_FINISHED, task=task_start, subtask_idx=0) + transaction_ops = transaction.mock_calls[-1][1][0] + # We should update the task and the workflow back to queued + assert len(transaction_ops) == 2 + assert transaction_ops[0]["key"] == f"{updater.run_id}-{task_start}" + assert transaction_ops[1]["key"] == f"{updater.run_id}" + assert updater._get_cache(task_start, 0)[0]["status"] == RunStatus.done + assert updater._get_cache(task_start, None)[0]["status"] == RunStatus.queued + assert updater._get_cache(None, None)[0]["status"] == RunStatus.queued + # The task should have an end time, but the task and workflow should not be updated + assert updater._get_cache(task_start, 0)[0]["end_time"] is not None + assert updater._get_cache(task_start, None)[0]["end_time"] is None + assert updater._get_cache(None, None)[0]["end_time"] is None + + # If we start the subtask for the other task, the workflow should be running + await updater(WorkflowChange.SUBTASK_RUNNING, task=other_task, subtask_idx=0) + transaction_ops = transaction.mock_calls[-1][1][0] + assert len(transaction_ops) == 2 + assert transaction_ops[0]["key"] == f"{updater.run_id}-{other_task}" + assert transaction_ops[1]["key"] == f"{updater.run_id}" + assert updater._get_cache(other_task, 0)[0]["status"] == RunStatus.running + assert updater._get_cache(other_task, None)[0]["status"] == RunStatus.running + assert updater._get_cache(None, None)[0]["status"] == RunStatus.running + + # Completing the only subtask should set the task to finished and workflow back to queued + await updater(WorkflowChange.SUBTASK_FINISHED, task=other_task, subtask_idx=0) + transaction_ops = transaction.mock_calls[-1][1][0] + assert len(transaction_ops) == 2 + assert transaction_ops[0]["key"] == f"{updater.run_id}-{other_task}" + assert transaction_ops[1]["key"] == f"{updater.run_id}" + assert updater._get_cache(other_task, 0)[0]["status"] == RunStatus.done + assert updater._get_cache(other_task, None)[0]["status"] == RunStatus.done + assert updater._get_cache(None, None)[0]["status"] == RunStatus.queued + # The task should have an end time, but the workflow should not be updated + subtask_end = updater._get_cache(other_task, 0)[0]["end_time"] + assert subtask_end is not None + assert updater._get_cache(other_task, None)[0]["end_time"] == subtask_end + assert updater._get_cache(None, None)[0]["end_time"] is None + + # Complete all subtasks for the first task + for subtask_idx in range(num_subtasks): + await updater(WorkflowChange.SUBTASK_FINISHED, task=task_start, subtask_idx=subtask_idx) + assert updater._get_cache(task_start, subtask_idx)[0]["status"] == RunStatus.done + # The task should be finished and the workflow should NOT + assert updater._get_cache(task_start, None)[0]["status"] == RunStatus.done + assert updater._get_cache(None, None)[0]["status"] != RunStatus.done + # Check end time for the task + subtask_end = updater._get_cache(task_start, 3)[0]["end_time"] + assert subtask_end is not None + assert updater._get_cache(task_start, None)[0]["end_time"] == subtask_end + assert updater._get_cache(None, None)[0]["end_time"] is None + + +@patch("vibe_common.statestore.StateStore.transaction") +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_workflow_cancel( + store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] +): + retrieve.return_value = run_config + tasks = ["task1", "task2"] + updater = await setup_updater(run_config, tasks) + task_start = tasks[0] + num_subtasks = 4 + finished_subtask = 2 + await updater(WorkflowChange.TASK_STARTED, task=task_start, num_subtasks=num_subtasks) + await updater(WorkflowChange.SUBTASK_FINISHED, task=task_start, subtask_idx=finished_subtask) + transaction.reset_mock() + await updater(WorkflowChange.WORKFLOW_CANCELLED) + transaction_ops = transaction.mock_calls[0][1][0] + # We update the workflow and all tasks + assert len(transaction_ops) == 3 + for t_op, task in zip(transaction_ops, tasks): + assert t_op["key"] == f"{updater.run_id}-{task}" + assert transaction_ops[-1]["key"] == str(updater.run_id) + assert updater._get_cache(None, None)[0]["status"] == RunStatus.cancelled + assert updater._get_cache(None, None)[0]["reason"] == updater.user_request_reason + for task, task_cache in updater.task_cache.items(): + assert task in tasks + assert task_cache["status"] == RunStatus.cancelled + assert task_cache["reason"] == updater.user_request_reason + subtasks = updater._get_cache(task_start, None)[0]["subtasks"] + # We should have cancelled all subtasks except the one that finished + for i, subtask in enumerate(subtasks): + if i == finished_subtask: + assert subtask["status"] == RunStatus.done + else: + assert subtask["status"] == RunStatus.cancelled + + +@patch("vibe_common.statestore.StateStore.transaction") +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_no_update_if_done( + store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] +): + retrieve.return_value = run_config + tasks = ["task1", "task2"] + updater = await setup_updater(run_config, tasks) + task_start = tasks[0] + num_subtasks = 4 + canceled_subtask = 0 + finished_subtask = 2 + await updater(WorkflowChange.TASK_STARTED, task=task_start, num_subtasks=num_subtasks) + await updater(WorkflowChange.SUBTASK_FINISHED, task=task_start, subtask_idx=finished_subtask) + await updater(WorkflowChange.WORKFLOW_CANCELLED) + transaction.reset_mock() + + # We should not update anything if we try to update a finished task + # Either if it's marked as `done` + await updater(WorkflowChange.SUBTASK_RUNNING, task=task_start, subtask_idx=finished_subtask) + transaction.assert_not_called() + # Or if it's marked as `cancelled` + await updater(WorkflowChange.SUBTASK_RUNNING, task=task_start, subtask_idx=canceled_subtask) + transaction.assert_not_called() + + +@patch("vibe_common.statestore.StateStore.transaction") +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_workflow_fail( + store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] +): + retrieve.return_value = run_config + tasks = ["task1", "task2"] + updater = await setup_updater(run_config, tasks) + task_start = tasks[0] + num_subtasks = 1 + finished_subtask = 0 + await updater(WorkflowChange.WORKFLOW_STARTED, tasks=tasks) + await updater(WorkflowChange.TASK_STARTED, task=task_start, num_subtasks=num_subtasks) + await updater(WorkflowChange.SUBTASK_FINISHED, task=task_start, subtask_idx=finished_subtask) + transaction.reset_mock() + failure_reason = "Something went wrong 💀" + await updater(WorkflowChange.WORKFLOW_FAILED, reason=failure_reason) + transaction_ops = transaction.mock_calls[0][1][0] + # We update the workflow and one task + assert len(transaction_ops) == 2 + assert transaction_ops[0]["key"] == f"{updater.run_id}-{tasks[1]}" + assert transaction_ops[-1]["key"] == str(updater.run_id) + assert updater._get_cache(None, None)[0]["status"] == RunStatus.failed + # We should have the reason of failure here + assert updater._get_cache(None, None)[0]["reason"] == failure_reason + # The first task should be done + assert updater._get_cache(task_start, None)[0]["status"] == RunStatus.done + assert updater._get_cache(task_start, 0)[0]["status"] == RunStatus.done + # The second task should be cancelled + assert updater._get_cache(tasks[1], None)[0]["status"] == RunStatus.cancelled + # We should have the cancellation reason for workflow failure here + assert updater._get_cache(tasks[1], None)[0]["reason"] == updater.workflow_failure_reason + + +@patch("vibe_common.statestore.StateStore.transaction") +@patch("vibe_common.statestore.StateStore.retrieve") +@patch("vibe_common.statestore.StateStore.store") +@pytest.mark.anyio +async def test_subtask_fail( + store: Mock, retrieve: Mock, transaction: Mock, run_config: Dict[str, Any] +): + retrieve.return_value = run_config + tasks = ["task1", "task2", "task3", "task4"] + updater = await setup_updater(run_config, tasks) + # Task with several subtasks + await updater(WorkflowChange.TASK_STARTED, task=tasks[0], num_subtasks=3) + # Task with a single subtask + await updater(WorkflowChange.TASK_STARTED, task=tasks[1], num_subtasks=1) + # Task with no subtasks + # Task with single subtask that's done + await updater(WorkflowChange.TASK_STARTED, task=tasks[3], num_subtasks=1) + await updater(WorkflowChange.SUBTASK_FINISHED, task=tasks[3], subtask_idx=0) + # First task has a subtask that is done, and one that is not + await updater(WorkflowChange.SUBTASK_FINISHED, task=tasks[0], subtask_idx=0) + await updater(WorkflowChange.SUBTASK_QUEUED, task=tasks[0], subtask_idx=1) + transaction.reset_mock() + # The last subtask fails + failure_reason = "Something went wrong 💀" + await updater( + WorkflowChange.SUBTASK_FAILED, task=tasks[0], subtask_idx=2, reason=failure_reason + ) + transaction_ops = transaction.mock_calls[0][1][0] + expected_cancel_reason = f"Cancelled because task '{tasks[0]}' (subtask 2) failed" + # We update the workflow and three tasks + assert len(transaction_ops) == 4 + for t_op, task in zip(transaction_ops, tasks[:-1]): + assert t_op["key"] == f"{updater.run_id}-{task}" + assert transaction_ops[-1]["key"] == str(updater.run_id) + # Workflow is marked as failed + assert updater._get_cache(None, None)[0]["status"] == RunStatus.failed + # We should have the reason of failure here + assert updater._get_cache(None, None)[0]["reason"] == failure_reason + # The first task should be failed + assert updater._get_cache(tasks[0], None)[0]["status"] == RunStatus.failed + assert updater._get_cache(tasks[0], None)[0]["reason"] == failure_reason + # Last subtask should be failed + assert updater._get_cache(tasks[0], 2)[0]["status"] == RunStatus.failed + assert updater._get_cache(tasks[0], 2)[0]["reason"] == failure_reason + # The first subtask should be done still + assert updater._get_cache(tasks[0], 0)[0]["status"] == RunStatus.done + # The second subtask should be cancelled + assert updater._get_cache(tasks[0], 1)[0]["status"] == RunStatus.cancelled + assert updater._get_cache(tasks[0], 1)[0]["reason"] == expected_cancel_reason + # Other unfinished tasks should be cancelled + for task in tasks[1:-1]: + assert updater._get_cache(task, None)[0]["status"] == RunStatus.cancelled + # Last task should be done + assert updater._get_cache(tasks[-1], None)[0]["status"] == RunStatus.done + + +@patch.object(WorkflowStateUpdate, "commit_cache_for") +@pytest.mark.anyio +async def test_workflow_state_update_subtasks(commit: Mock, run_config: Dict[str, Any]): + op_name = "fake-op" + updater = await setup_updater(run_config, [op_name]) + await updater(WorkflowChange.TASK_STARTED, task=op_name, num_subtasks=3) + subtasks = updater.task_cache[op_name]["subtasks"] + assert len(subtasks) == 3 + assert all(r["status"] == RunStatus.pending for r in subtasks) + RunDetails(**subtasks[0]) + + def compare(values: Tuple[int, int, int, int]): + counts = Counter([r["status"] for r in subtasks]) + return all( + counts[k] == v + for k, v in zip( + (RunStatus.pending, RunStatus.queued, RunStatus.running, RunStatus.done), values + ) + ) + + await updater(WorkflowChange.SUBTASK_QUEUED, task=op_name, subtask_idx=0) + assert subtasks[0]["status"] == RunStatus.queued + RunDetails(**subtasks[0]) + compare((2, 1, 0, 0)) + await updater(WorkflowChange.SUBTASK_QUEUED, task=op_name, subtask_idx=2) + assert subtasks[2]["status"] == RunStatus.queued + compare((1, 2, 0, 0)) + await updater(WorkflowChange.SUBTASK_RUNNING, task=op_name, subtask_idx=0) + assert subtasks[0]["status"] == RunStatus.running + RunDetails(**subtasks[0]) + compare((1, 1, 1, 0)) + await updater(WorkflowChange.SUBTASK_RUNNING, task=op_name, subtask_idx=2) + assert subtasks[2]["status"] == RunStatus.running + compare((1, 0, 2, 0)) + await updater(WorkflowChange.SUBTASK_FINISHED, task=op_name, subtask_idx=2) + assert subtasks[2]["status"] == RunStatus.done + RunDetails(**subtasks[2]) + compare((1, 0, 1, 1)) + await updater(WorkflowChange.SUBTASK_QUEUED, task=op_name, subtask_idx=1) + assert subtasks[1]["status"] == RunStatus.queued + compare((0, 1, 1, 1)) diff --git a/src/vibe_server/vibe_server/__init__.py b/src/vibe_server/vibe_server/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/vibe_server/vibe_server/href_handler.py b/src/vibe_server/vibe_server/href_handler.py new file mode 100644 index 00000000..5968dad0 --- /dev/null +++ b/src/vibe_server/vibe_server/href_handler.py @@ -0,0 +1,71 @@ +from abc import ABC, abstractmethod +from pathlib import Path +from typing import List, Optional, Union + +from azure.core.credentials import TokenCredential +from pystac import Asset, Item + +from vibe_common.messaging import OpIOType +from vibe_common.tokens import BlobTokenManagerConnectionString, BlobTokenManagerCredentialed +from vibe_core.data.utils import deserialize_stac, serialize_stac +from vibe_core.datamodel import RunConfigUser +from vibe_core.utils import ensure_list + + +class HrefHandler(ABC): + @abstractmethod + def _update_asset(self, asset: Asset): + raise NotImplementedError + + def _parse_item(self, item: Item): + assets = item.get_assets() + for asset in assets: + self._update_asset(assets[asset]) + return item + + def _parse_items(self, obj: Union[Item, List[Item]]) -> Union[Item, List[Item]]: + if isinstance(obj, Item): + return self._parse_item(obj) + else: + return [self._parse_item(item) for item in obj] + + def _run(self, out: OpIOType) -> OpIOType: + result = {} + for key in out: + items = deserialize_stac(out[key]) + items = ensure_list(items) + for item in items: + item.clear_links() + result[key] = serialize_stac(self._parse_items(items)) + return result + + def handle(self, original_response: RunConfigUser) -> RunConfigUser: + original_response.output = self._run(original_response.output) + return original_response + + +class LocalHrefHandler(HrefHandler): + def __init__(self, assets_dir: Union["str", Path]): + super().__init__() + self.assets_dir = assets_dir if isinstance(assets_dir, Path) else Path(assets_dir) + + def _update_asset(self, asset: Asset): + asset_href_path = Path(asset.href).resolve() + parent_name = asset_href_path.parent.name + asset_name = asset_href_path.name + + asset.href = str(self.assets_dir / Path(parent_name) / asset_name) + + +class BlobHrefHandler(HrefHandler): + def __init__( + self, credential: Optional[TokenCredential] = None, connection_string: Optional[str] = None + ): + super().__init__() + if connection_string is not None: + self.manager = BlobTokenManagerConnectionString(connection_string=connection_string) + else: + self.manager = BlobTokenManagerCredentialed(credential=credential) + + def _update_asset(self, asset: Asset): + asset.href = self.manager.sign_url(asset.href) diff --git a/src/vibe_server/vibe_server/orchestrator.py b/src/vibe_server/vibe_server/orchestrator.py new file mode 100755 index 00000000..c04fac33 --- /dev/null +++ b/src/vibe_server/vibe_server/orchestrator.py @@ -0,0 +1,843 @@ +import asyncio +import asyncio.queues +import logging +from argparse import ArgumentParser +from copy import copy +from dataclasses import asdict +from datetime import datetime +from functools import partial +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, cast +from uuid import UUID + +import debugpy +from cloudevents.sdk.event import v1 +from dapr.conf import settings +from opentelemetry import trace + +from vibe_common.constants import ( + CACHE_PUBSUB_TOPIC, + CONTROL_STATUS_PUBSUB, + DEFAULT_OPS_DIR, + RUNS_KEY, + STATUS_PUBSUB_TOPIC, + WORKFLOW_REQUEST_PUBSUB_TOPIC, +) +from vibe_common.dapr import dapr_ready +from vibe_common.dropdapr import App, TopicEventResponse +from vibe_common.messaging import ( + OpIOType, + WorkflowCancellationMessage, + WorkflowDeletionMessage, + WorkflowExecutionMessage, + WorkMessage, + WorkMessageBuilder, + accept_or_fail_event_async, + extract_message_header_from_event, + run_id_from_traceparent, +) +from vibe_common.statestore import StateStore, TransactionOperation +from vibe_common.telemetry import add_trace, setup_telemetry, update_telemetry_context +from vibe_core.datamodel import RunConfig, RunDetails, RunStatus +from vibe_core.logconfig import LOG_BACKUP_COUNT, MAX_LOG_FILE_BYTES, configure_logging + +from .workflow import workflow_from_input +from .workflow.input_handler import build_args_for_workflow, patch_workflow_sources +from .workflow.runner.remote_runner import MessageRouter, RemoteWorkflowRunner +from .workflow.runner.runner import WorkflowCallback, WorkflowChange, WorkflowRunner +from .workflow.runner.task_io_handler import WorkflowIOHandler +from .workflow.spec_parser import WorkflowParser +from .workflow.workflow import Workflow, get_workflow_dir + +Updates = Tuple[bool, List[str]] + + +class WorkflowStateUpdate(WorkflowCallback): + """Keeps track of the state of a workflow and its tasks. + The state is stored in the statestore and updated based on the events received + from the workflow runner. The workflow and task states are updated in the statestore with + different keys in order to avoid upserting a large amount of data with every update. + + In general, the state of a task is defined based on the status of its subtasks. + A task is marked as a status when at least one of its subtasks is marked as that status + in the following other of priority: + 1. failed. + 2. running. + 3. queued. + 4. pending. + 5. done. + Whenever an update to a subtask happens, we propagate it up and update statuses as necessary. + The analogous is defined for workflow w.r.t tasks. + + Cancellation and failure events are also propagated down. + This means that when a workflow is cancelled, all tasks are updated and cancelled as well + (unless already done). The analogous happens for tasks and subtasks. + For failures, we propagate the cancelled state down and the failed state up. + """ + + user_request_reason = "Cancellation requested by user" + workflow_failure_reason = "Cancelled due to failure during workflow execution" + + def __init__(self, workflowRunId: UUID): + self.run_id = workflowRunId + self.wf_cache: Dict[str, Any] = {} + self.task_cache: Dict[str, Any] = {} + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self.statestore = StateStore() + self.update_lock = asyncio.Lock() + # Cache "empty" RunDetails because creating it triggers the big bad bug + self.pending_run = asdict(RunDetails()) + self.wf_change_to_update = { + WorkflowChange.WORKFLOW_STARTED: self.create_workflow, + WorkflowChange.WORKFLOW_FINISHED: self.complete_workflow, + WorkflowChange.WORKFLOW_CANCELLED: self.cancel_workflow, + WorkflowChange.WORKFLOW_FAILED: self.fail_workflow, + WorkflowChange.TASK_STARTED: self.create_subtasks, + WorkflowChange.SUBTASK_QUEUED: self.queue_subtask, + WorkflowChange.SUBTASK_RUNNING: self.execute_subtask, + WorkflowChange.SUBTASK_FINISHED: self.complete_subtask, + WorkflowChange.SUBTASK_FAILED: self.fail_subtask, + WorkflowChange.SUBTASK_PENDING: self.pend_subtask, + } + self._cache_init = False + + async def _init_cache(self): + # TODO: We could also load task cache here in case we want to resume a workflow + cache = await self.statestore.retrieve(str(self.run_id)) + self.wf_cache["details"] = cache["details"] + self._cache_init = True + + def create_workflow(self, tasks: List[str]) -> Updates: + # Workflow start time is set when we start running the graph + self.wf_cache["details"]["start_time"] = datetime.now() + self.wf_cache["tasks"] = tasks + for t in tasks: + self.task_cache[t] = copy(self.pending_run) + return True, tasks + + def complete_workflow(self) -> Updates: + return self._update_finish_change(None, None, cancelled=False, reason=""), [] + + def cancel_workflow(self) -> Updates: + fun = partial(self._update_finish_change, cancelled=True, reason=self.user_request_reason) + return self._propagate_down(fun) + + def fail_workflow(self, reason: str) -> Updates: + wf_updated = self._update_failure_change(None, None, reason=reason) + if not wf_updated: + # We won't cancel the workflow because it is already finished + return False, [] + fun = partial( + self._update_finish_change, + cancelled=True, + reason=self.workflow_failure_reason, + ) + _, updated_tasks = self._propagate_down(fun) + return wf_updated, updated_tasks + + def create_subtasks(self, task: str, num_subtasks: int) -> Updates: + cache, name = self._get_cache(task, None) + cache["subtasks"] = [copy(self.pending_run) for _ in range(num_subtasks)] + self.logger.info(f"Created {num_subtasks} subtasks for {name}. (run id: {self.run_id})") + return False, [task] + + def queue_subtask(self, task: str, subtask_idx: int) -> Updates: + return self._propagate_up(self._update_queued_change, task, subtask_idx) + + def execute_subtask(self, task: str, subtask_idx: int) -> Updates: + return self._propagate_up(self._update_start_change, task, subtask_idx) + + def complete_subtask(self, task: str, subtask_idx: int) -> Updates: + fun = partial(self._update_finish_change, cancelled=False, reason="") + return self._propagate_up(fun, task, subtask_idx) + + def fail_subtask(self, task: str, subtask_idx: int, reason: str) -> Updates: + fail_fun = partial(self._update_failure_change, reason=reason) + subtask_updated = fail_fun(task, subtask_idx, reason=reason) + task_updated = fail_fun(task, None, reason=reason) + wf_updated_up = fail_fun(None, None, reason=reason) + updated_tasks_up = [task] if (task_updated or subtask_updated) else [] + cancel_fun = partial( + self._update_finish_change, + cancelled=True, + reason=f"Cancelled because task '{task}' (subtask {subtask_idx}) failed", + ) + wf_updated_down, updated_tasks_down = self._propagate_down(cancel_fun) + wf_updated = wf_updated_up or wf_updated_down + updated_tasks = updated_tasks_up + [ + i for i in updated_tasks_down if i not in updated_tasks_up + ] + return wf_updated, updated_tasks + + def pend_subtask(self, task: str, subtask_idx: int) -> Updates: + return self._propagate_up(self._update_pending_change, task, subtask_idx) + + def _combine_children_status(self, children_status: Set[RunStatus]) -> RunStatus: + for status in (RunStatus.running, RunStatus.queued, RunStatus.pending): + if status in children_status: + new_status = status + break + else: + if children_status != {RunStatus.done}: + raise ValueError(f"Unknown status combination: {children_status}") + new_status = RunStatus.done + return new_status + + def _combine_children_time( + self, children_start: List[Optional[datetime]], children_end: List[Optional[datetime]] + ) -> Tuple[Optional[datetime], Optional[datetime]]: + children_start = [i for i in children_start if i is not None] + if not children_start: + start_time = None + else: + start_time = min(cast(List[datetime], children_start)) + if any(i is None for i in children_end): + end_time = None + else: + end_time = max(cast(List[datetime], children_end)) + return start_time, end_time + + def _update_task_status(self, task: str) -> bool: + cache, _ = self._get_cache(task, None) + if cache["subtasks"] is None: + raise RuntimeError(f"Tried to update status of task {task} before creating subtasks") + subtask_status = {i["status"] for i in cache["subtasks"]} + new_status = self._combine_children_status(subtask_status) + if new_status != cache["status"]: + cache["status"] = new_status + cache["submission_time"], _ = self._combine_children_time( + [i["submission_time"] for i in cache["subtasks"]], + [None], + ) + cache["start_time"], cache["end_time"] = self._combine_children_time( + [i["start_time"] for i in cache["subtasks"]], + [i["end_time"] for i in cache["subtasks"]], + ) + return True + return False + + def _update_workflow_status(self) -> bool: + cache, _ = self._get_cache(None, None) + task_status = {i["status"] for i in self.task_cache.values()} + new_status = self._combine_children_status(task_status) + if new_status == RunStatus.done: + # We don't set it to done here because we still need to store the output + # We only set to done when complete_workflow is called by the orchestrator + new_status = RunStatus.running + if new_status != cache["status"]: + cache["status"] = new_status + return True + return False + + def _propagate_up( + self, fun: Callable[[Optional[str], Optional[int]], bool], task: str, subtask_idx: int + ) -> Updates: + subtask_updated = fun(task, subtask_idx) + if not subtask_updated: + return False, [] + task_updated = self._update_task_status(task) + if not task_updated: + return False, [task] + return self._update_workflow_status(), [task] + + def _propagate_down(self, fun: Callable[[Optional[str], Optional[int]], bool]) -> Updates: + wf_updated = fun(None, None) + updated_tasks = [] + for task, task_cache in self.task_cache.items(): + task_updated = fun(task, None) + if task_cache["subtasks"] is not None: + subtask_updated = any([fun(task, i) for i in range(len(task_cache["subtasks"]))]) + else: + subtask_updated = False + if task_updated or subtask_updated: + updated_tasks.append(task) + return wf_updated, updated_tasks + + def _get_cache( + self, task: Optional[str], subtask_idx: Optional[int] + ) -> Tuple[Dict[str, Any], str]: + if task is None: + return self.wf_cache["details"], "workflow" + if subtask_idx is None: + return self.task_cache[task], f"task {task}" + subtasks_cache = self.task_cache[task]["subtasks"] + if subtasks_cache is None: + raise ValueError( + f"Tried to update subtask {subtask_idx} for {task} before creating subtasks" + ) + return ( + self.task_cache[task]["subtasks"][subtask_idx], + f"task {task} (subtask {subtask_idx})", + ) + + def _update_pending_change(self, task: Optional[str], subtask_idx: Optional[int]) -> bool: + cache, name = self._get_cache(task, subtask_idx) + if RunStatus.finished(cache["status"]): + return False + cache["status"] = RunStatus.pending + self.logger.info(f"Changed {name} status to {RunStatus.pending}. (run id: {self.run_id})") + return True + + def _update_queued_change(self, task: Optional[str], subtask_idx: Optional[int]) -> bool: + cache, name = self._get_cache(task, subtask_idx) + if RunStatus.finished(cache["status"]): + return False + if cache["submission_time"] is None: + cache["submission_time"] = datetime.now() + cache["status"] = RunStatus.queued + self.logger.info(f"Changed {name} status to {RunStatus.queued}. (run id: {self.run_id})") + return True + + def _update_start_change(self, task: Optional[str], subtask_idx: Optional[int]) -> bool: + cache, name = self._get_cache(task, subtask_idx) + if RunStatus.finished(cache["status"]) or cache["status"] == RunStatus.running: + return False + if cache["start_time"] is None: + cache["start_time"] = datetime.now() + cache["status"] = RunStatus.running + self.logger.info(f"Changed {name} status to {RunStatus.running}. (run id: {self.run_id})") + return True + + def _update_finish_change( + self, task: Optional[str], subtask_idx: Optional[int], cancelled: bool, reason: str + ) -> bool: + cache, name = self._get_cache(task, subtask_idx) + if RunStatus.finished(cache["status"]): + return False + status = RunStatus.cancelled if cancelled else RunStatus.done + for missing in ("submission_time", "start_time"): + if cache[missing] is None: + cache[missing] = datetime.now() + if not cancelled: + self.logger.warning( + f"Marking {name} as finished, " + f"but it didn't have a {missing} set. (run id: {self.run_id})" + ) + cache["end_time"] = datetime.now() + cache["status"] = status + if cancelled: + cache["reason"] = reason + self.logger.info(f"Changed {name} status to {status}. (run id: {self.run_id})") + return True + + def _update_failure_change( + self, task: Optional[str], subtask_idx: Optional[int], reason: str + ) -> bool: + cache, name = self._get_cache(task, subtask_idx) + if RunStatus.finished(cache["status"]): + return False + if cache["start_time"] is None: + self.logger.error( + f"Marking {name} as failed, " + f"but it didn't have a start time set. (run id: {self.run_id})" + ) + cache["start_time"] = datetime.now() + cache["end_time"] = datetime.now() + cache["status"] = RunStatus.failed + cache["reason"] = reason + self.logger.info(f"Changed {name} status to {RunStatus.failed}. (run id: {self.run_id})") + return True + + def update_cache_for(self, change: WorkflowChange, **kwargs: Any) -> Updates: + update_fun = self.wf_change_to_update[change] + return update_fun(**kwargs) + + async def commit_cache_for(self, update_workflow: bool, tasks: List[str]) -> None: + # We are not deserializing run data into a RunConfig object because this breaks *something* + # We do not deserialize the cache into RunDetails for the same reason + operations = [ + TransactionOperation( + key=f"{self.run_id}-{t}", operation="upsert", value=self.task_cache[t] + ) + for t in tasks + ] + if update_workflow: + wf_data = await self.statestore.retrieve(str(self.run_id)) + wf_data["tasks"] = self.wf_cache["tasks"] + wf_data["details"] = self.wf_cache["details"] + operations.append( + TransactionOperation(key=str(self.run_id), operation="upsert", value=wf_data) + ) + + await self.statestore.transaction(operations) + + async def __call__(self, change: WorkflowChange, **kwargs: Any) -> None: + async with self.update_lock: + # Since we parallelize op execution, there might be a race condition + # on updating the overall status of a given workflow run. Locking + # here serializes status updates and guarantees we won't overwrite + # previously-written updates + if not self._cache_init: + await self._init_cache() + update_workflow, tasks_to_update = self.update_cache_for(change, **kwargs) + if update_workflow or tasks_to_update: + await self.commit_cache_for(update_workflow, tasks_to_update) + + +class WorkflowRunManager: + inqueues: Dict[str, "asyncio.queues.Queue[WorkMessage]"] + runner: Optional[WorkflowRunner] + + def __init__( + self, + inqueues: Dict[str, "asyncio.queues.Queue[WorkMessage]"], + message: WorkflowExecutionMessage, + pubsubname: str, + source: str, + topic: str, + ops_dir: str = DEFAULT_OPS_DIR, + workflows_dir: str = get_workflow_dir(), + *args: Any, + **kwargs: Dict[str, Any], + ): + self.message = message + self.inqueues = inqueues + self.statestore = StateStore() + self.runner = None + self.name = str(message.run_id) + self.is_cancelled = False + self.ops_dir = ops_dir + self.workflows_dir = workflows_dir + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self.task = asyncio.create_task(self.start_managing()) + self.pubsubname = pubsubname + self.topic = topic + self.source = source + + def done_callback(task: Any) -> None: + self.task = None + try: + maybe_exception = task.exception() + if maybe_exception is not None: + self.logger.warning( + f"Task {task} for workflow run {self.name} failed " + f"with exception {maybe_exception}" + ) + except (asyncio.CancelledError, asyncio.InvalidStateError): + pass + + self.task.add_done_callback(done_callback) + + def build_workflow(self, input_items: OpIOType): + content = self.message.content + spec = WorkflowParser.parse_dict( + content.workflow, + ops_dir=self.ops_dir, + parameters_override=content.parameters, + ) + workflow = Workflow(spec) + patch_workflow_sources(input_items, workflow) + io_mapper = WorkflowIOHandler(workflow) + return workflow, io_mapper + + async def start_managing(self) -> None: + content = self.message.content + input_items = content.input + run_id = self.message.run_id + self.inqueues[str(run_id)] = asyncio.queues.Queue() + try: + workflow, io_mapper = self.build_workflow(input_items) + except Exception: + await update_workflow( + str(run_id), + self.statestore, + RunStatus.failed, + f"Failed to build workflow {content.workflow}" + f" with parameters: {content.parameters}", + ) + raise + router = MessageRouter(self.inqueues[str(run_id)]) + self.runner = RemoteWorkflowRunner( + traceid=self.message.id, + message_router=router, + workflow=workflow, + io_mapper=io_mapper, + update_state_callback=WorkflowStateUpdate(run_id), + pubsubname=self.pubsubname, + source=self.source, + topic=self.topic, + ) + self.runner.is_cancelled = self.is_cancelled + output = await self.runner.run(input_items, run_id) + router.should_stop = True + if router.task is not None: + await router.task + if not self.is_cancelled: + await self.add_output(output) + self.logger.debug( + f"Updated statestore with output for workflow run {self.message.run_id}" + ) + await self.runner.update_state(WorkflowChange.WORKFLOW_FINISHED) + self.logger.debug(f"Marked workflow run {self.message.run_id} as done") + + async def add_output(self, output: OpIOType) -> None: + await self.add_output_to_run(str(self.message.run_id), output, self.statestore) + + @staticmethod + async def add_output_to_run(run_id: str, output: OpIOType, statestore: StateStore) -> None: + run_data = await statestore.retrieve(run_id) + run_config = RunConfig(**run_data) + run_config.set_output(output) + await statestore.store(run_id, run_config) + + async def cancel(self): + self.is_cancelled = True + if self.runner is not None: + await self.runner.cancel() + + +async def update_workflow( + run_id: str, + statestore: StateStore, + new_status: RunStatus, + reason: Optional[str] = None, + dont_update: Callable[[RunStatus], bool] = RunStatus.finished, +) -> None: + run_data = await statestore.retrieve(run_id) + run_config = RunConfig(**run_data) + if dont_update(run_config.details.status): + return + run_config.details.status = new_status + run_config.details.reason = reason if reason else "" + if new_status in {RunStatus.failed}: + run_config.details.start_time = run_config.details.end_time = datetime.now() + await statestore.store(run_id, run_config) + + +class Orchestrator: + app: App + inqueues: Dict[str, "asyncio.queues.Queue[WorkMessage]"] + pubsubname: str + cache_topic: str + new_workflow_topic: str + _workflow_management_tasks: Dict[UUID, WorkflowRunManager] + ops_dir: str + workflows_dir: str + + # TODO: We need some way of reloading orchestrator state to make it robust + # to crashes + + def __init__( + self, + pubsubname: str = CONTROL_STATUS_PUBSUB, + cache_topic: str = CACHE_PUBSUB_TOPIC, + status_topic: str = STATUS_PUBSUB_TOPIC, + new_workflow_topic: str = WORKFLOW_REQUEST_PUBSUB_TOPIC, + port: int = settings.GRPC_APP_PORT, + ops_dir: str = DEFAULT_OPS_DIR, + workflows_dir: str = get_workflow_dir(), + ): + self.app = App() + self.port = port + self.pubsubname = pubsubname + self.cache_topic = cache_topic + self.status_topic = status_topic + self.new_workflow_topic = new_workflow_topic + self.inqueues = {} + self.statestore = StateStore() + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self._workflow_management_tasks: Dict[UUID, WorkflowRunManager] = {} + self.ops_dir = ops_dir + self.workflows_dir = workflows_dir + + @self.app.subscribe_async(self.pubsubname, self.status_topic) + async def update(event: v1.Event): + await self.handle_update_workflow_status(self.status_topic, event) + + @self.app.subscribe_async(self.pubsubname, self.new_workflow_topic) + async def manage_workflow(event: v1.Event): + await self.handle_manage_workflow_event(self.new_workflow_topic, event) + + async def handle_update_workflow_status(self, channel: str, event: v1.Event): + async def success_callback(message: WorkMessage) -> TopicEventResponse: + if not message.is_valid_for_channel(channel): + self.logger.error( + f"Received unsupported message {message} for channel {channel}. Dropping it." + ) + return TopicEventResponse("drop") + if str(message.run_id) not in self.inqueues: + self.logger.info( + f"Received message {message}, but the run it references" + " is not being managed. Dropping it." + ) + return TopicEventResponse("drop") + await self.inqueues[str(message.run_id)].put(message) + return TopicEventResponse("success") + + return await accept_or_fail_event_async(event, success_callback, self._failure_callback) + + async def handle_manage_workflow_event(self, channel: str, event: v1.Event): + update_telemetry_context(extract_message_header_from_event(event).current_trace_parent) + + @add_trace + async def success_callback(message: WorkMessage) -> TopicEventResponse: + try: + if not message.is_valid_for_channel(channel): + self.logger.error(f"Received unsupported message {message}. Dropping it.") + return TopicEventResponse("drop") + await self.manage_workflow(message) + return TopicEventResponse("success") + except Exception as e: + await self.fail_workflow(str(message.run_id), f"{e.__class__.__name__}: {e}") + self.logger.exception( + f"Failed to submit workflow {message.run_id} " + f"from event {event.id} for execution" + ) + return TopicEventResponse("drop") + + with trace.get_tracer(__name__).start_as_current_span("handle_manage_workflow_event"): + return await accept_or_fail_event_async(event, success_callback, self._failure_callback) + + @add_trace + async def handle_workflow_execution_message(self, message: WorkflowExecutionMessage): + wf = WorkflowRunManager( + self.inqueues, + message, + pubsubname=self.pubsubname, + source="orchestrator", + topic=self.cache_topic, + ops_dir=self.ops_dir, + workflows_dir=self.workflows_dir, + ) + self._workflow_management_tasks[message.run_id] = wf + + def wf_done_callback(task: "asyncio.Future[Any]") -> None: + self.logger.info(f"Workflow run {message.run_id} finished. Freeing up space.") + self.inqueues.pop(str(message.run_id)) + self._workflow_management_tasks.pop(message.run_id) + try: + maybe_exception = task.exception() + if maybe_exception is not None: + self.logger.warning( + f"Workflow run {message.run_id} failed with exception {maybe_exception}" + ) + except (asyncio.CancelledError, asyncio.InvalidStateError): + pass + + wf.task.add_done_callback(wf_done_callback) + + async def handle_workflow_cancellation_message(self, message: WorkflowCancellationMessage): + if message.run_id in self._workflow_management_tasks: + wf = self._workflow_management_tasks[message.run_id] + if not wf.task.done(): + await wf.cancel() + else: + self.logger.warning( + f"Tried to cancel a workflow run from message {message}, " + f"but the workflow has already finished. (run id: {message.run_id})" + ) + else: + # We don't know this workflow run. Either this completed execution + # already, or it doesn't exist. Log and ignore. + self.logger.warning( + f"Tried to cancel a workflow run from message {message}, " + f"but the run doesn't exist. (run id: {message.run_id})" + ) + + async def handle_workflow_deletion_message(self, message: WorkflowDeletionMessage): + # deletion of a workflow run is handled by the data ops service + pass + + async def manage_workflow(self, message: WorkMessage) -> None: + message_handler_map = { + WorkflowExecutionMessage: self.handle_workflow_execution_message, + WorkflowCancellationMessage: self.handle_workflow_cancellation_message, + WorkflowDeletionMessage: self.handle_workflow_deletion_message, + } + handled = False + for type in message_handler_map: + if isinstance(message, type): + handled = True + await message_handler_map[type](message) + break + if not handled: + self.logger.error(f"Unable to process message {message}. Ignoring.") + + async def update_workflow_if_not_finished(self, run_id: str, reason: str, status: RunStatus): + await update_workflow(run_id, self.statestore, status, reason) + + async def fail_workflow(self, run_id: str, reason: str): + await self.update_workflow_if_not_finished(run_id, reason, RunStatus.failed) + + async def _failure_callback( + self, event: v1.Event, e: Exception, traceback: List[str] + ) -> TopicEventResponse: + run_id = str(run_id_from_traceparent(event.id)) + await self.fail_workflow( + run_id, f"{e.__class__.__name__}: {str(e)}\n" + "\n".join(traceback) + ) + self.logger.info(f"Marked workflow {run_id} from event {event.id} failed") + return TopicEventResponse("drop") + + @dapr_ready + async def run(self): + async def shutdown_callback(task: Any): + try: + maybe_exception = task.exception() + if maybe_exception is not None: + self.logger.warning(f"Server task failed with exception {maybe_exception}.") + except (asyncio.CancelledError, asyncio.InvalidStateError): + pass + + self.logger.info(f"Starting orchestrator listening on port {self.port}") + server_task = asyncio.create_task(self.app.run_async(self.port)) + server_task.add_done_callback(shutdown_callback) + resume_call = self._resume_workflows() + await asyncio.gather(server_task, resume_call) + + async def get_unfinished_workflows(self) -> List[RunConfig]: + keys = [] + try: + keys = await self.statestore.retrieve(RUNS_KEY) + except KeyError: + await self.statestore.store(RUNS_KEY, []) + + all_runs = cast( + List[RunConfig], [RunConfig(**r) for r in await self.statestore.retrieve_bulk(keys)] + ) + return [r for r in all_runs if not RunStatus.finished(r.details.status)] + + def run_config_to_workflow_message(self, run: RunConfig) -> WorkflowExecutionMessage: + workflow = workflow_from_input(run.workflow) + inputs_spec = workflow.inputs_spec + user_input = build_args_for_workflow(run.user_input, list(inputs_spec)) + message = WorkMessageBuilder.build_workflow_request( + run.id, asdict(workflow.workflow_spec), run.parameters, user_input + ) + + update_telemetry_context(message.current_trace_parent) + with trace.get_tracer(__name__).start_as_current_span("re-submit-workflow"): + return cast(WorkflowExecutionMessage, message) + + async def _resume_workflows(self): + self.logger.debug("Searching for unfinished workflow runs") + try: + runs = await self.get_unfinished_workflows() + except Exception: + raise RuntimeError( + "Failed to fetch list of unfinished workflow runs. Aborting Execution." + ) + self.logger.debug(f"Found {len(runs)} unfinished workflow run(s)") + + unfinished_tasks = [] + for run in runs: + self.logger.debug(f"Resuming workflow run {run.id}") + try: + message = self.run_config_to_workflow_message(run) + self.logger.debug(f"Created workflow execution message for run id {run.id}") + unfinished_tasks.append( + asyncio.create_task(self.handle_workflow_execution_message(message)) + ) + except Exception: + self.logger.exception(f"Failed to resume execution for workflow run {run.id}") + raise + + await asyncio.gather(*unfinished_tasks) + + +async def main(): + parser = ArgumentParser(description="TerraVibes 🌎 Orchestrator") + parser.add_argument( + "--pubsubname", + type=str, + default=CONTROL_STATUS_PUBSUB, + help="The name of the publish subscribe component to use", + ) + parser.add_argument( + "--cache-topic", + type=str, + default=CACHE_PUBSUB_TOPIC, + help="The name of the topic to use to send control messages", + ) + parser.add_argument( + "--status-topic", + type=str, + default=STATUS_PUBSUB_TOPIC, + help="The name of the topic to use to receive status messages", + ) + parser.add_argument( + "--workflow-topic", + type=str, + default=WORKFLOW_REQUEST_PUBSUB_TOPIC, + help="The name of the topic to use to receive workflow execution requests", + ) + parser.add_argument( + "--port", + type=int, + default=int(settings.GRPC_APP_PORT), + help="The port to use to listen for HTTP requests from dapr", + ) + parser.add_argument( + "--debug", action="store_true", default=False, help="Whether to enable remote debugging" + ) + parser.add_argument( + "--debugger-port", + type=int, + default=5678, + help="The port on which to listen to the debugger", + ) + parser.add_argument( + "--otel-service-name", + type=str, + help="The name of the service to use for OpenTelemetry collector", + default="", + ) + parser.add_argument( + "--logdir", + type=str, + help="The directory on which to save logs", + default="", + ) + parser.add_argument( + "--max-log-file-bytes", + type=int, + help="The maximum number of bytes for a log file", + default=MAX_LOG_FILE_BYTES, + ) + parser.add_argument( + "--log-backup-count", + type=int, + help="The number of log files to keep", + required=False, + default=LOG_BACKUP_COUNT, + ) + parser.add_argument( + "--loglevel", + type=str, + help="The default log level to use", + default="INFO", + ) + options = parser.parse_args() + + appname = "terravibes-orchestrator" + configure_logging( + appname=appname, + logdir=options.logdir if options.logdir else None, + max_log_file_bytes=options.max_log_file_bytes, + log_backup_count=options.log_backup_count, + logfile=f"{appname}.log", + default_level=options.loglevel, + ) + + if options.otel_service_name: + setup_telemetry(appname, options.otel_service_name) + + if options.debug: + debugpy.listen(options.debugger_port) # type: ignore + logging.info(f"Debugger enabled and listening on port {options.debugger_port}") + + orchestrator = Orchestrator( + pubsubname=options.pubsubname, + cache_topic=options.cache_topic, + status_topic=options.status_topic, + new_workflow_topic=options.workflow_topic, + port=options.port, + ) + await orchestrator.run() + + +def main_sync(): + asyncio.run(main()) + + +if __name__ == "__main__": + main_sync() diff --git a/src/vibe_server/vibe_server/server.py b/src/vibe_server/vibe_server/server.py new file mode 100644 index 00000000..dfae3d1b --- /dev/null +++ b/src/vibe_server/vibe_server/server.py @@ -0,0 +1,900 @@ +import asyncio +import logging +import os +from argparse import ArgumentParser, Namespace +from dataclasses import asdict +from datetime import datetime +from enum import auto +from typing import ( + Any, + Dict, + Final, + List, + Optional, + Tuple, + Union, + _type_repr, # type: ignore + cast, +) +from uuid import UUID, uuid4 + +import debugpy +import psutil +import pydantic +import requests +import uvicorn +import yaml +from dapr.conf import settings +from fastapi import Body, FastAPI, Path, Query, status +from fastapi.encoders import jsonable_encoder +from fastapi.responses import JSONResponse +from fastapi_versioning import VersionedFastAPI, version +from hydra_zen import instantiate +from opentelemetry import trace +from starlette.middleware.cors import CORSMiddleware +from strenum import StrEnum + +from vibe_common.constants import ( + ALLOWED_ORIGINS, + CONTROL_STATUS_PUBSUB, + DEFAULT_SECRET_STORE_NAME, + RUNS_KEY, + WORKFLOW_REQUEST_PUBSUB_TOPIC, +) +from vibe_common.dapr import dapr_ready +from vibe_common.messaging import WorkMessageBuilder, send +from vibe_common.secret_provider import DaprSecretConfig +from vibe_common.statestore import StateStore, TransactionOperation +from vibe_common.telemetry import ( + add_span_attributes, + add_trace, + setup_telemetry, + update_telemetry_context, +) +from vibe_core.datamodel import ( + SUMMARY_DEFAULT_FIELDS, + Message, + MetricsDict, + RunConfig, + RunConfigInput, + RunConfigUser, + RunDetails, + RunStatus, + SpatioTemporalJson, +) +from vibe_core.logconfig import LOG_BACKUP_COUNT, MAX_LOG_FILE_BYTES, configure_logging + +from .href_handler import BlobHrefHandler, HrefHandler, LocalHrefHandler +from .workflow import get_workflow_path, workflow_from_input +from .workflow import list_workflows as list_existing_workflows +from .workflow.input_handler import ( + build_args_for_workflow, + patch_workflow_sources, + validate_workflow_input, +) +from .workflow.parameter import ParameterResolver +from .workflow.workflow import Workflow + +RUN_CONFIG_SUBMISSION_EXAMPLE: Final[Dict[str, Any]] = { + "name": "example workflow run for sample region", + "workflow": "helloworld", + "parameters": {}, + "user_input": { + "start_date": "2021-02-02T00:00:00Z", + "end_date": "2021-08-02T00:00:00Z", + "geojson": { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-88.068487, 37.058836], + [-88.036059, 37.048687], + [-88.012895, 37.068984], + [-88.026622, 37.085711], + [-88.062482, 37.081461], + [-88.068487, 37.058836], + ] + ], + }, + } + ], + }, + }, +} +MOUNT_DIR: Final[str] = "/mnt" +RunList = Union[List[str], List[Dict[str, Any]], JSONResponse] +WorkflowList = Union[List[str], Dict[str, Any], JSONResponse] +CreateRunResponse = Union[Dict[str, Union[UUID, str]], JSONResponse] + + +class WorkflowReturnFormat(StrEnum): + description = auto() + yaml = auto() + + +class TerravibesProvider: + state_store: StateStore + logger: logging.Logger + href_handler: HrefHandler + + def __init__(self, href_handler: HrefHandler): + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self.state_store = StateStore() + self.href_handler = href_handler + + @add_trace + def summarize_runs(self, runs: List[RunConfig], fields: List[str] = SUMMARY_DEFAULT_FIELDS): + """Summarizes a list of runs given a list of fields. + + Supports accessing members deeper in the object by using dots to separate levels. + For example, to extract the "status" member from "details", use "details.status". + """ + + summarized_runs = [{k: v for k, v in asdict(e).items() if k in fields} for e in runs] + for field in fields: + if "." not in field: + continue + for i, src in enumerate([asdict(r) for r in runs]): + prefixes, suffix = field.rsplit(".", maxsplit=1) + obj = src + for prefix in prefixes.split("."): + try: + obj = obj[prefix] + summarized_runs[i].update({field: obj[suffix]}) + except TypeError as e: + # We are trying to get a subfield from a field that + # didn't exist in the first place. `obj` is None, so we + # won't be able to get it here + raise KeyError( + f"Workflow run with id {runs[i].id} does not have field {field}" + ) from e + return summarized_runs + + @add_trace + def system_metrics(self) -> MetricsDict: + """Returns a dict of system metrics.""" + + load_avg: Tuple[float, float, float] = psutil.getloadavg() + cpu_usage: float = psutil.cpu_percent() + mem = psutil.virtual_memory() + + df: Optional[int] + if isinstance(self.href_handler, BlobHrefHandler): + df = None + else: + df = psutil.disk_usage(MOUNT_DIR).free + + return MetricsDict( + load_avg=load_avg, + cpu_usage=cpu_usage, + free_mem=mem.free, + used_mem=mem.used, + total_mem=mem.total, + disk_free=df, + ) + + async def root(self) -> Message: + return Message(message="REST API server is running") + + @add_trace + async def list_workflows( + self, + workflow: Optional[str] = None, + return_format: str = WorkflowReturnFormat.description, + ) -> WorkflowList: + if not workflow: + return [i for i in list_existing_workflows() if "private" not in i] + try: + if return_format == WorkflowReturnFormat.description: + wf = Workflow.build(get_workflow_path(workflow)) + wf_spec = wf.workflow_spec + param_resolver = ParameterResolver(wf_spec.workflows_dir, wf_spec.ops_dir) + parameters = param_resolver.resolve(wf_spec) + param_defaults = {k: v.default for k, v in parameters.items()} + param_descriptions = {k: v.description for k, v in parameters.items()} + description = wf.workflow_spec.description + description.parameters = param_descriptions # type: ignore + return { + "name": wf.name, + "inputs": {k: _type_repr(v) for k, v in wf.inputs_spec.items()}, + "outputs": {k: _type_repr(v) for k, v in wf.output_spec.items()}, + "parameters": param_defaults, + "description": asdict(wf.workflow_spec.description), + } + elif return_format == WorkflowReturnFormat.yaml: + with open(get_workflow_path(workflow)) as f: + yaml_content = yaml.safe_load(f) + return yaml_content + else: + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content=asdict(Message(f"Invalid return format: {return_format}")), + ) + except FileNotFoundError: + return JSONResponse( + status_code=status.HTTP_404_NOT_FOUND, + content=asdict(Message(f'Workflow "{workflow}" not found')), + ) + except Exception as e: + return JSONResponse( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + content=asdict(Message(f"Internal server error: {str(e)}")), + ) + + @add_trace + async def list_runs( + self, + ids: Optional[List[UUID]], + page: Optional[int], + items: Optional[int], + fields: Optional[List[str]], + ) -> RunList: + def paginate( + things: List[Any], page: Optional[int] = 0, items: Optional[int] = 0 + ) -> List[Any]: + if items is None or items <= 0: + return things + if page is None or page <= 0: + page = 0 + return things[items * page : items * (page + 1)] + + ret: Union[List[str], List[Dict[str, Any]]] = [] + try: + if ids is None: + all_ids = await self.list_runs_from_store() + if fields is None: + return all_ids + + ret = self.summarize_runs(await self.get_bulk_runs_by_id(all_ids), fields) + else: + ids = cast(List[Any], ids) + if not all([isinstance(i, UUID) for i in ids]): + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content=asdict(Message("Provided ids must be UUIDs")), + ) + if fields is None: + ret = self.summarize_runs(await self.get_bulk_runs_by_id(ids)) + else: + ret = self.summarize_runs(await self.get_bulk_runs_by_id(ids), fields) + + return paginate(ret, page, items) + except (KeyError, IndexError): + reason = f"Failed to get id(s) {ids}" + self.logger.debug(reason) + return JSONResponse( + status_code=status.HTTP_404_NOT_FOUND, content=asdict(Message(reason)) + ) + + async def describe_run( + self, + run_id: UUID = Path(..., title="The ID of the workflow execution to get."), + ): + try: + run = (await self.get_bulk_runs_by_id([run_id]))[0] + run_config_user = RunConfigUser.from_runconfig(run) + return jsonable_encoder(self.href_handler.handle(run_config_user)) + except KeyError: + return JSONResponse( + status_code=status.HTTP_404_NOT_FOUND, + content=asdict(Message(f'Workflow execution "{run_id}" not found')), + ) + + @add_trace + async def cancel_run( + self, + run_id: UUID = Path(..., title="The ID of the workflow run to cancel."), + ) -> JSONResponse: + try: + await self.state_store.retrieve(str(run_id)) + except KeyError: + return JSONResponse( + status_code=status.HTTP_404_NOT_FOUND, + content=asdict(Message(f"Workflow execution {run_id} not found")), + ) + + message = WorkMessageBuilder.build_workflow_cancellation(run_id) + + response = send( + message, + "rest-api", + CONTROL_STATUS_PUBSUB, + WORKFLOW_REQUEST_PUBSUB_TOPIC, + ) + + if not response: + raise RuntimeError("Failed to submit workflow cancellation request.") + self.logger.debug(f"Successfully posted workflow cancellation request for run {run_id}") + + return JSONResponse( + status_code=status.HTTP_202_ACCEPTED, + content=asdict(Message(f"Requested cancellation of workflow run {run_id}")), + ) + + @add_trace + async def delete_run( + self, + run_id: UUID = Path(..., title="The ID of the workflow run to delete."), + ) -> JSONResponse: + try: + run_data = await self.state_store.retrieve(str(run_id)) + except KeyError: + return JSONResponse( + status_code=status.HTTP_404_NOT_FOUND, + content=asdict(Message(f"Workflow execution {run_id} not found")), + ) + + run_config = RunConfig(**run_data) + + if not RunStatus.finished(run_config.details.status): + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content=asdict(Message("Cannot delete an unfinished workflow run.")), + ) + + message = WorkMessageBuilder.build_workflow_deletion(run_id) + + response = send( + message, + "rest-api", + CONTROL_STATUS_PUBSUB, + WORKFLOW_REQUEST_PUBSUB_TOPIC, + ) + + if not response: + raise RuntimeError("Failed to submit workflow deletion request.") + self.logger.debug(f"Successfully posted workflow deletion request for run {run_id}") + + return JSONResponse( + status_code=status.HTTP_200_OK, + content=asdict(Message(f"Requested deletion of workflow run {run_id}")), + ) + + async def create_run(self, runConfig: RunConfigInput) -> CreateRunResponse: + response: JSONResponse + try: + if ( + isinstance(runConfig.workflow, str) + and runConfig.workflow not in list_existing_workflows() + ): + raise ValueError(f'Workflow "{runConfig.workflow}" unknown') + + workflow = workflow_from_input(runConfig.workflow) + inputs_spec = workflow.inputs_spec + # Build and validate inputs + user_input = build_args_for_workflow(runConfig.user_input, list(inputs_spec)) + # Validate workflow inputs and potentially patch workflow for input fan-out + validate_workflow_input(user_input, inputs_spec) + patch_workflow_sources(user_input, workflow) + + run_ids: List[str] = await self.list_runs_from_store() + new_id, new_run = self.create_new_run(runConfig, run_ids) + add_span_attributes({"run_id": new_id}) + + if new_id is None: + raise RuntimeError("Failed to create new run id") + await self.update_run_state(run_ids, new_run) + + # Update run id with parsed workflow and user input + new_run.workflow = asdict(workflow.workflow_spec) + new_run.user_input = user_input + self.submit_work(new_run) + + response = JSONResponse( + status_code=status.HTTP_201_CREATED, + content=asdict( + Message( + id=new_id, + location=f"/runs/{new_id}", + message="Workflow created and queued for execution", + ) + ), + ) + except ( + ValueError, + pydantic.ValidationError, + requests.exceptions.RequestException, + ) as e: + self.logger.exception("Failed to submit workflow to worker") + response = JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content=asdict( + Message(f"Unable to run workflow with provided parameters. {str(e)}") + ), + ) + except FileNotFoundError as e: + self.logger.exception("Failed to submit workflow") + response = JSONResponse( + status_code=status.HTTP_404_NOT_FOUND, + content=asdict(Message(f"Unable to find workflow with name {str(e)}.")), + ) + except Exception as e: + self.logger.exception("Failed to update workflow state") + response = JSONResponse( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + content=asdict( + Message(f"Unable to run workflow with provided parameters. {str(e)}") + ), + ) + return response + + @add_trace + async def resubmit_run(self, run_id: UUID) -> CreateRunResponse: + try: + run = await self.state_store.retrieve(str(run_id)) + except KeyError: + return JSONResponse( + status_code=status.HTTP_404_NOT_FOUND, + content=asdict(Message(f"Workflow run {run_id} not found")), + ) + run_config = RunConfigInput( + **{ + k: v + for k, v in run.items() + if k in ("name", "workflow", "parameters", "user_input") + } + ) + return await self.create_run(run_config) + + def create_new_run(self, workflow: RunConfigInput, run_ids: List[str]): + new_id = str(uuid4()) + + workflow_data = {k: v for k, v in asdict(workflow).items() if k != "user_input"} + workflow_data["id"] = new_id + workflow_data["details"] = RunDetails() # type: ignore + # Set workflow submission time + workflow_data["details"].submission_time = datetime.utcnow() + workflow_data["task_details"] = {} + workflow_data["user_input"] = workflow.user_input + if isinstance(workflow.user_input, SpatioTemporalJson): + workflow_data["spatio_temporal_json"] = workflow.user_input + else: + workflow_data["spatio_temporal_json"] = None + + new_run = RunConfig(**workflow_data) + run_ids.append(new_id) + + return new_id, new_run + + @add_trace + async def update_run_state(self, run_ids: List[str], new_run: RunConfig): + await self.state_store.transaction( + [ + cast( + TransactionOperation, + { + "key": RUNS_KEY, + "operation": "upsert", + "value": run_ids, + }, + ), + cast( + TransactionOperation, + { + "key": str(new_run.id), + "operation": "upsert", + "value": new_run, + }, + ), + ] + ) + + @add_trace + async def list_runs_from_store(self) -> List[str]: + try: + return await self.state_store.retrieve(RUNS_KEY) + except KeyError: + # No workflows exist yet, ignore the failure + return [] + + @add_trace + async def get_bulk_runs_by_id(self, run_ids: Union[List[str], List[UUID]]) -> List[RunConfig]: + run_data = await self.state_store.retrieve_bulk([str(id) for id in run_ids]) + run_id_to_data = {r["id"]: r for r in run_data} + run_task_ids = [(r["id"], task) for r in run_data for task in r.get("tasks", [])] + task_data = await self.state_store.retrieve_bulk([f"{i[0]}-{i[1]}" for i in run_task_ids]) + for run_task_id, task_datum in zip(run_task_ids, task_data): + run_id, task_name = run_task_id + run_datum = run_id_to_data[run_id] + run_datum["task_details"][task_name] = task_datum + runs = [RunConfig(**cast(Dict[str, Any], data)) for data in run_data] + return runs + + def submit_work(self, new_run: RunConfig): + assert isinstance(new_run.workflow, dict) + assert isinstance(new_run.user_input, dict) + message = WorkMessageBuilder.build_workflow_request( + new_run.id, new_run.workflow, new_run.parameters, new_run.user_input + ) + + tracer = trace.get_tracer(__name__) + update_telemetry_context(message.id) + + with tracer.start_as_current_span("submit-workflow"): + response = send( + message, + "rest-api", + CONTROL_STATUS_PUBSUB, + WORKFLOW_REQUEST_PUBSUB_TOPIC, + ) + + if not response: + raise RuntimeError("Failed to submit workflow for processing.") + self.logger.debug(f"Successfully posted workflow message for run {new_run.id}") + + +class TerravibesAPI(FastAPI): + uvicorn_config: uvicorn.Config + terravibes: TerravibesProvider + + def __init__( + self, + href_handler: HrefHandler, + allowed_origins: List[str] = ALLOWED_ORIGINS, + host: str = "127.0.0.1", + port: int = 8000, + reload: bool = False, + debug: bool = False, + *args: Any, + **kwargs: Any, + ): + super().__init__(*args, **kwargs) + + self.terravibes = TerravibesProvider(href_handler) + + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self.logger.info(f"TerraVibes server: using {allowed_origins} as allowed origins") + self.description = """# TerraVibes REST API + + TerraVibes is the execution engine of the FarmVibes platform, a + containerized, distributed system that can run machine learning models at scale. + TerraVibes uses Kubernetes for container orchestration and supports a variety of + machine learning frameworks, as well as various data sources. + + With TerraVibes, farmers can run geospatial ingestion and machine learning models + in the cloud or on-premises, depending on their needs. The platform is + designed to be highly scalable and flexible, so userscan start with a + small deployment and scale up as needed. + + ### Endpoints + + - `GET /`: Root endpoint + - `GET /system-metrics`: Get system metrics + + ## Workflows + + The base computation unit users interact with is a workflow. A workflow is a + collection of tasks that are arranged in a computational graph. Each task + represents a single operation, and the graph represents the dependencies + between the tasks. For example, a workflow might have a task that downloads + satellite imagery, a task that runs a machine learning model on the imagery, + and a task that uploads the results to a cloud storage bucket. The tasks are + executed in parallel, and the results of each task are passed to the next task + in the graph. + + ### Endpoints + + - `GET /workflows`: List all workflows + - `GET /workflows/{workflow_name}`: Get a workflow by name, either as + JSON description, or YAML graph implementation + + ## Runs + + Every time a workflow is executed, the API creates a new run. A run is a + specific instance of a workflow, and it is uniquely identified by a run ID. + The run ID is a UUID, and it is returned to the user when the workflow is + submitted. The run ID can be used to query the status of the workflow, and it + can be used to cancel the workflow. + + ### Endpoints + + - `GET /runs`: Lists all the workflow runs currently in the system. + - `GET /runs/{run_id}`: Get information of a specific run. + - `POST /runs`: Submit a new workflow run. + - `POST /runs/{run_id}/cancel`: Cancel a workflow run. + """ + + self.openapi_tags = [ + { + "name": "workflows", + "description": ( + "Operations on workflows, including listing, describing, " + "and obtaining workflow definition YAMLs." + ), + "externalDocs": { + "description": "FarmVibes.AI Workflow Documentation", + "url": ( + "https://github.com/microsoft/farmvibes-ai/blob/main/documentation/" + "WORKFLOWS.md" + ), + }, + }, + { + "name": "runs", + "description": ( + "Operations on workflow runs, including submitting, listing, " + "describing, and cancelling runs.", + ), + }, + ] + + @self.get("/") + @version(0) + async def terravibes_root() -> Message: + """Root endpoint.""" + return await self.terravibes.root() + + @self.get("/system-metrics") + @version(0) + async def terravibes_metrics() -> MetricsDict: + """Get system metrics, including CPU usage, memory usage, and storage disk space.""" + return self.terravibes.system_metrics() + + @self.get("/workflows", tags=["workflows"], response_model=None) + @version(0) + async def terravibes_list_workflows() -> WorkflowList: + """List all workflows available in FarmVibes.AI.""" + return await self.terravibes.list_workflows() + + @self.get("/workflows/{workflow:path}", tags=["workflows"]) + @version(0) + async def terravibes_describe_workflow( + workflow: str = Path( + ..., title="Workflow name", description="The name of the workflow to be described." + ), + return_format: str = Query( + "description", + title="Return format", + description="The format to return the workflow in [description, yaml].", + ), + ): + """Get a workflow by name, either as JSON description, or YAML graph implementation.""" + return await self.terravibes.list_workflows(workflow, return_format) + + @self.get("/runs", tags=["runs"], response_model=None) + @version(0) + async def terravibes_list_runs( + ids: Optional[List[UUID]] = Query( + None, + description=( + "The list of run IDs to retrieve. If not provided, all runs are returned." + ), + ), + page: Optional[int] = Query(0, description="The page number to retrieve."), + items: Optional[int] = Query(0, description="The number of items per page."), + fields: Optional[List[str]] = Query( + None, + description=( + "Fields to return alongside each run id. " + "If not provided, only run ids are returned." + ), + ), + ) -> RunList: + """List all the workflow runs currently in the system.""" + return await self.terravibes.list_runs(ids, page, items, fields) + + @self.get("/runs/{run_id}", tags=["runs"]) + @version(0) + async def terravibes_describe_run( + run_id: UUID = Path( + ..., + title="Run ID", + description="The ID of the workflow execution to get.", + ), + ): + """Get information of a specific run.""" + return await self.terravibes.describe_run(run_id) + + @self.post("/runs/{run_id}/cancel", tags=["runs"]) + @version(0) + async def terravibes_cancel_run( + run_id: UUID = Path( + ..., + title="Run ID", + description="The ID of the workflow run to cancel.", + ), + ) -> JSONResponse: + """Cancel a workflow run.""" + return await self.terravibes.cancel_run(run_id) + + @self.delete("/runs/{run_id}", tags=["runs"]) + @version(0) + async def terravibes_delete_run( + run_id: UUID = Path( + ..., + title="Run ID", + description="The ID of the workflow run to delete.", + ), + ) -> JSONResponse: + """Delete data associated with a workflow run (if not shared by other runs). + + For a detailed overview on how data is managed in FarmVibes.AI, please refer to the + [documentation](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/CACHE.html). + """ + return await self.terravibes.delete_run(run_id) + + @self.post("/runs/{run_id}/resubmit", tags=["runs"], response_model=None) + @version(0) + async def terravibes_resubmit_run( + run_id: UUID = Path( + ..., + title="Run ID", + description="The ID of the workflow run to resubmit.", + ), + ) -> CreateRunResponse: + """Resubmit a workflow run.""" + return await self.terravibes.resubmit_run(run_id) + + @self.post("/runs", tags=["workflows", "runs"], response_model=None) + @version(0) + async def terravibes_create_run( + runConfig: RunConfigInput = Body( + default=None, + example=RUN_CONFIG_SUBMISSION_EXAMPLE, + description="The configuration and inputs of the workflow run to submit.", + ), + ) -> CreateRunResponse: + """Submit a new workflow run.""" + return await self.terravibes.create_run(runConfig) + + self.versioned_wrapper = VersionedFastAPI( + self, version_format="{major}", prefix_format="/v{major}" + ) + self.versioned_wrapper.add_middleware( + CORSMiddleware, + allow_origins=allowed_origins, + allow_credentials="*" not in allowed_origins, + allow_methods=["*"], + allow_headers=["*"], + ) + self.uvicorn_config = uvicorn.Config( + app=self.versioned_wrapper, + host=host, + port=port, + reload=reload, + debug=debug, + log_config=None, + ) + + async def run(self): + server = uvicorn.Server(self.uvicorn_config) + await server.serve() + + +def build_href_handler(options: Namespace) -> HrefHandler: + logger = logging.getLogger(f"{__name__}.build_href_handler") + if options.terravibes_host_assets_dir: + return LocalHrefHandler(options.terravibes_host_assets_dir) + else: + try: + storage_account_connection_string = instantiate( + DaprSecretConfig( + store_name=DEFAULT_SECRET_STORE_NAME, + secret_name=os.environ["BLOB_STORAGE_ACCOUNT_CONNECTION_STRING"], + key_name=os.environ["BLOB_STORAGE_ACCOUNT_CONNECTION_STRING"], + ) + ) + except Exception: + storage_account_connection_string = "" + logger.exception( + "Failed to load blob storage account connection string from Dapr secret store. " + "Expect describing runs to fail due to an inability to resolve asset hrefs." + ) + return BlobHrefHandler( + connection_string=storage_account_connection_string, + ) + + +async def main() -> None: + parser = ArgumentParser(description="TerraVibes 🌎 REST API Server") + parser.add_argument("--host", type=str, default="0.0.0.0", help="IP address to listen on") + parser.add_argument( + "--port", + type=int, + default=int(settings.HTTP_APP_PORT), + help="Port to listen on", + ) + parser.add_argument( + "--debug", + default=False, + action="store_true", + help="Whether to enable debug support", + ) + parser.add_argument( + "--reload", + default=False, + action="store_true", + help="Whether to reload the server on file change", + ) + parser.add_argument( + "--debugger-port", + type=int, + default=5678, + help="The port on which to listen to the debugger", + ) + parser.add_argument( + "--terravibes-host-assets-dir", + type=str, + help="The asset directory on the host", + default="", + ) + parser.add_argument( + "--otel-service-name", + type=str, + help="The name of the service to use for OpenTelemetry collector", + default="", + ) + parser.add_argument( + "--logdir", + type=str, + help="The directory on which to save logs", + default="", + ) + parser.add_argument( + "--max-log-file-bytes", + type=int, + help="The maximum number of bytes for a log file", + default=MAX_LOG_FILE_BYTES, + ) + parser.add_argument( + "--log-backup-count", + type=int, + help="The number of log files to keep", + required=False, + default=LOG_BACKUP_COUNT, + ) + parser.add_argument( + "--loglevel", + type=str, + help="The default log level to use", + default="INFO", + ) + + options = parser.parse_args() + + appname = "terravibes-rest-api" + configure_logging( + appname=appname, + logdir=options.logdir if options.logdir else None, + max_log_file_bytes=options.max_log_file_bytes, + log_backup_count=options.log_backup_count, + logfile=f"{appname}.log", + default_level=options.loglevel, + ) + + if options.otel_service_name: + setup_telemetry(appname, options.otel_service_name) + + if options.debug: + debugpy.listen(options.debugger_port) # type: ignore + logging.info(f"Debugger enabled and listening on port {options.debugger_port}") + + terravibes_api = TerravibesAPI( + href_handler=build_href_handler(options), + allowed_origins=ALLOWED_ORIGINS, + host=options.host, + port=options.port, + reload=options.reload, + debug=options.debug, + title="TerraVibes 🌎 Spatial API", + description="Low-code planetary analytics with powerful operators", + ) + + await start_service(terravibes_api) + + +@dapr_ready +async def start_service(terravibes_api: TerravibesAPI) -> None: + await terravibes_api.run() + + +def main_sync(): + asyncio.run(main()) + + +if __name__ == "__main__": + main_sync() diff --git a/src/vibe_server/vibe_server/sniffer.py b/src/vibe_server/vibe_server/sniffer.py new file mode 100644 index 00000000..3c78e97d --- /dev/null +++ b/src/vibe_server/vibe_server/sniffer.py @@ -0,0 +1,113 @@ +import argparse +import logging +from typing import List + +from cloudevents.sdk.event import v1 +from dapr.conf import settings +from dapr.ext.grpc import App + +from vibe_common.constants import ( + CACHE_PUBSUB_TOPIC, + CONTROL_PUBSUB_TOPIC, + CONTROL_STATUS_PUBSUB, + STATUS_PUBSUB_TOPIC, +) +from vibe_common.dapr import dapr_ready +from vibe_common.messaging import event_to_work_message +from vibe_core.logconfig import LOG_BACKUP_COUNT, MAX_LOG_FILE_BYTES, configure_logging + + +class Sniffer: + app: App + topics: List[str] + + def __init__(self, pubsub: str, topics: List[str], port: int = settings.GRPC_APP_PORT): + self.app = App() + self.port = port + self.pubsub = pubsub + self.topics = topics + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + + self.logger.info(f"Will subscribe to topics {topics}") + for topic in self.topics: + + @self.app.subscribe(self.pubsub, topic) + def log(event: v1.Event) -> None: + self.log(event, topic) + + def log(self, event: v1.Event, topic: str) -> None: + try: + message = event_to_work_message(event) + except Exception: + raise RuntimeError(f"Failed to decode event with id {event.id}") + self.logger.info(f"{event.source} => {topic}: {message}") + + @dapr_ready + def run(self): + self.app.run(self.port) + + +def main(): + parser = argparse.ArgumentParser( + "vibe-sniffer", description="Sniffs TerraVibes queues and logs them" + ) + + parser.add_argument( + "--logdir", + type=str, + default=None, + help=( + "Path to which to save logs " + "(if specified, duplicate messages will be persisted for all services)" + ), + ) + parser.add_argument( + "--max-log-file-bytes", + type=int, + help="The maximum number of bytes for a log file", + default=MAX_LOG_FILE_BYTES, + ) + parser.add_argument( + "--log-backup-count", + type=int, + help="The number of log files to keep", + required=False, + default=LOG_BACKUP_COUNT, + ) + parser.add_argument( + "--port", + type=int, + default=3000, + help="Port to listen on for dapr connections", + ) + parser.add_argument( + "--pubsub", + type=str, + default=CONTROL_STATUS_PUBSUB, + help="dapr pubsub to connect to", + ) + parser.add_argument( + "--topics", + nargs="+", + default=[CONTROL_PUBSUB_TOPIC, CACHE_PUBSUB_TOPIC, STATUS_PUBSUB_TOPIC], + help="the topics to sniff", + ) + args = parser.parse_args() + + configure_logging( + logdir=None if args.logdir is None else args.logdir, + max_log_file_bytes=args.max_log_file_bytes if args.max_log_file_bytes else None, + log_backup_count=args.log_backup_count if args.log_backup_count else None, + appname="sniffer", + ) + + sniffer = Sniffer( + pubsub=args.pubsub, + topics=args.topics, + port=args.port, + ) + sniffer.run() + + +if __name__ == "__main__": + main() diff --git a/src/vibe_server/vibe_server/workflow/__init__.py b/src/vibe_server/vibe_server/workflow/__init__.py new file mode 100644 index 00000000..9d4053ad --- /dev/null +++ b/src/vibe_server/vibe_server/workflow/__init__.py @@ -0,0 +1,42 @@ +import os +import re +from typing import Any, Dict, List, Union + +from ..workflow.spec_parser import WorkflowParser, get_workflow_dir +from ..workflow.workflow import Workflow + + +def get_workflow_path(name: str, base: str = get_workflow_dir()) -> str: + return os.path.join(base, name) + ".yaml" + + +def workflow_from_input(input: Union[str, Dict[str, Any]]) -> Workflow: + workflow: Workflow + if isinstance(input, str): + workflow = Workflow.build(get_workflow_path(input)) + else: + workflow = Workflow(WorkflowParser.parse_dict(input)) + return workflow + + +def list_workflows() -> List[str]: + "Returns a list of workflows to be loaded by `load_workflow_by_name`" + + workflow_dir = get_workflow_dir() + if not os.path.exists(workflow_dir): + return [] + + workflows: List[str] = [] + for dirpath, _, filenames in os.walk(workflow_dir): + for filename in filenames: + if filename.endswith(".yaml"): + workflows.append( + re.sub( + # Both patterns here are guaranteed to be present + # in the input string. We don't want them. + f"{workflow_dir}/|\\.yaml", + "", + os.path.join(dirpath, filename), + ) + ) + return workflows diff --git a/src/vibe_server/vibe_server/workflow/description_validator.py b/src/vibe_server/vibe_server/workflow/description_validator.py new file mode 100644 index 00000000..1d026655 --- /dev/null +++ b/src/vibe_server/vibe_server/workflow/description_validator.py @@ -0,0 +1,91 @@ +from typing import Any, Dict, List, Tuple, Union + +from .parameter import ParameterResolver +from .spec_parser import WorkflowSpec + + +def unpack_description(description: Union[str, Tuple[str], None]) -> str: + if isinstance(description, tuple): + return description[0] + else: + return "" if description is None else description + + +class WorkflowDescriptionValidator: + @classmethod + def _validate_node_against_description( + cls, + node_name: str, + node_type: str, + description: Dict[str, str], + workflow_name: str, + ): + if node_name not in description or not description[node_name]: + raise ValueError( + f"{node_type} {node_name} in workflow {workflow_name} is missing a description." + ) + + @classmethod + def _validate_description_against_nodes( + cls, desc_nodes: List[str], node_type: str, nodes: Dict[str, Any], workflow_name: str + ): + for name in desc_nodes: + if name not in nodes: + raise ValueError( + f"{name} in the workflow description does not match " + f"any {node_type} in workflow {workflow_name}" + ) + + @classmethod + def _validate_sources(cls, spec: WorkflowSpec): + for source_name in spec.sources.keys(): + cls._validate_node_against_description( + source_name, "Source", spec.description.inputs, spec.name + ) + + @classmethod + def _validate_sinks(cls, spec: WorkflowSpec): + for sink_name in spec.sinks.keys(): + cls._validate_node_against_description( + sink_name, "Sink", spec.description.outputs, spec.name + ) + + @classmethod + def _validate_parameters(cls, workflow_spec: WorkflowSpec): + param_resolver = ParameterResolver(workflow_spec.workflows_dir, workflow_spec.ops_dir) + parameters = param_resolver.resolve(workflow_spec) + param_descriptions = {k: unpack_description(v.description) for k, v in parameters.items()} + + for param_name in workflow_spec.parameters.keys(): + cls._validate_node_against_description( + param_name, "Parameter", param_descriptions, workflow_spec.name + ) + + @classmethod + def _validate_tasks(cls, workflow_spec: WorkflowSpec): + for task_name in workflow_spec.tasks.keys(): + cls._validate_node_against_description( + task_name, "Task", workflow_spec.description.task_descriptions, workflow_spec.name + ) + + @classmethod + def _validate_description(cls, spec: WorkflowSpec): + desc = spec.description + if not desc.short_description: + raise ValueError(f"Short description is missing in workflow {spec.name}.") + + # Make sure every node in the description matches to a source/sink/parameter + for desc_nodes, node_type, node in [ + (desc.inputs, "sources", spec.sources), + (desc.outputs, "sinks", spec.sinks), + (desc.parameters, "parameters", spec.parameters), + ]: + cls._validate_description_against_nodes(desc_nodes.keys(), node_type, node, spec.name) + + @classmethod + def validate(cls, workflow_spec: WorkflowSpec): + cls._validate_sources(workflow_spec) + cls._validate_sinks(workflow_spec) + cls._validate_parameters(workflow_spec) + cls._validate_tasks(workflow_spec) + cls._validate_description(workflow_spec) diff --git a/src/vibe_server/vibe_server/workflow/graph.py b/src/vibe_server/vibe_server/workflow/graph.py new file mode 100644 index 00000000..6b6f107b --- /dev/null +++ b/src/vibe_server/vibe_server/workflow/graph.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +from collections import defaultdict +from enum import IntEnum +from typing import Callable, Dict, Generic, Iterable, Iterator, List, Set, Tuple, TypeVar +from warnings import warn + +T = TypeVar("T") +V = TypeVar("V") +Edge = Tuple[T, T, V] +Adjacency = Set[Tuple[T, V]] + + +class VisitStatus(IntEnum): + new = 0 + visiting = 1 + visited = 2 + + +class Graph(Generic[T, V]): + adjacency_list: Dict[T, Adjacency[T, V]] + + def __init__(self): + self.adjacency_list = {} + + def add_node(self, node: T): + if node in self.adjacency_list: + warn(f"Trying to add already existing node {node} to graph. Ignoring.") + else: + self.adjacency_list[node] = set() + + def add_edge(self, origin: T, destination: T, label: V): + if origin not in self.adjacency_list: + warn(f"Tried to add edge from {origin} to {destination}, but {origin} not in graph") + self.add_node(origin) + if destination not in self.adjacency_list: + warn( + f"Tried to add edge from {origin} to {destination}, but {destination} not in graph" + ) + self.add_node(destination) + self.adjacency_list[origin].add((destination, label)) + + def relabel(self, edge: Edge[T, V], new_label: V): + """Changes an existing edge's label to `new_label`.""" + self.adjacency_list[edge[0]].remove((edge[1], edge[2])) + self.adjacency_list[edge[0]].add((edge[1], new_label)) + + @property + def nodes(self) -> List[T]: + return list(self.adjacency_list.keys()) + + @property + def edges(self) -> List[Edge[T, V]]: + return [ + (origin, destination[0], destination[1]) + for origin, destinations in self.adjacency_list.items() + for destination in destinations + ] + + def neighbors(self, vertex: T) -> Set[T]: + return set(e[0] for e in self.adjacency_list[vertex]) + + def edges_from(self, vertex: T) -> Iterable[Edge[T, V]]: + return [(vertex, *dst) for dst in self.adjacency_list[vertex]] + + def zero_in_degree_nodes(self) -> Iterable[T]: + in_degrees: Dict[T, int] = {k: 0 for k in self.adjacency_list} + for destinations in self.adjacency_list.values(): + for destination in destinations: + in_degrees[destination[0]] += 1 + return [k for k, v in in_degrees.items() if v == 0] + + def _dfs_impl( + self, + vertex: T, + visited: Dict[T, Tuple[VisitStatus, int]], + level: int = 0, + visit: Callable[[int, T, VisitStatus], None] = lambda i, v, s: None, + ) -> None: + if len(visited) == 0: + for v in self.nodes: + visited[v] = (VisitStatus.new, 0) + + if visited[vertex][0] == VisitStatus.visited and level < visited[vertex][1]: + return + + visit(level, vertex, VisitStatus.visiting) + for neighbor in self.neighbors(vertex): + try: + if visited[neighbor][0] == VisitStatus.visiting: + raise ValueError(f"Graph has a cycle with at least node {neighbor}") + elif visited[neighbor][0] == VisitStatus.new or (level + 1 > visited[neighbor][1]): + # Haven't visited this, or need to revisit at a higher level + self._dfs_impl(neighbor, visited, level + 1, visit) + except KeyError: + # We just reached a node we didn't even know existed + # This is probably a terminal node + warn(f"Found node {neighbor}, but it is not in the list of nodes.") + self._dfs_impl(neighbor, visited, level + 1, visit) + + visit(level, vertex, VisitStatus.visited) + + def has_cycle(self) -> bool: + try: + self.topological_sort() + return False + except ValueError as e: + if "cycle" in str(e): + return True + raise + + def topological_sort(self) -> Iterable[List[T]]: + """Performs topological sort in a graph. + + Returns an iterable for all connected components. Raises exception if + the graph has a cycle. + """ + visited: Dict[T, Tuple[VisitStatus, int]] = {k: (VisitStatus.new, 0) for k in self.nodes} + + def visit(level: int, vertex: T, status: VisitStatus): + visited[vertex] = status, level + + for source in self.zero_in_degree_nodes(): + assert visited[source][0] == VisitStatus.new, f"Visited source {source} more than once" + visit(0, source, VisitStatus.visiting) + for neighbor in self.neighbors(source): + self._dfs_impl(neighbor, level=1, visit=visit, visited=visited) + visit(0, source, VisitStatus.visited) + if not all([v[0] == VisitStatus.visited for v in visited.values()]): + raise ValueError( + "Not all nodes visited in topological sort. This indicates " + "disconnected components in the graph." + ) + + ordering: Dict[int, List[T]] = defaultdict(list) + for node, (_, level) in visited.items(): + ordering[level].append(node) + return (ordering[k] for k in sorted(ordering.keys())) + + def __iter__(self) -> Iterator[List[T]]: + return (v for v in self.topological_sort()) diff --git a/src/vibe_server/vibe_server/workflow/input_handler.py b/src/vibe_server/vibe_server/workflow/input_handler.py new file mode 100644 index 00000000..0b52ca6b --- /dev/null +++ b/src/vibe_server/vibe_server/workflow/input_handler.py @@ -0,0 +1,177 @@ +import logging +from typing import ( + Any, + Dict, + List, + Union, + _type_repr, # type: ignore + cast, +) + +from vibe_common.input_handlers import gen_stac_item_from_bounds +from vibe_core.data.core_types import DataVibeType, InnerIOType, OpIOType, TypeDictVibe +from vibe_core.data.utils import StacConverter, deserialize_stac, get_base_type, is_container_type +from vibe_core.datamodel import SpatioTemporalJson +from vibe_core.utils import ensure_list + +from .workflow import EdgeLabel, EdgeType, GraphNodeType, InputFanOut, Workflow, parse_edge_string + +LOGGER = logging.getLogger(__name__) + + +def add_node(workflow: Workflow, node: GraphNodeType): + workflow.index[node.name] = node + workflow.add_node(node) + + def rollback(): + del workflow.adjacency_list[node] + del workflow.index[node.name] + + return rollback + + +def source_to_edge(workflow: Workflow, fan_node: GraphNodeType, source: str, destination: str): + output_port = cast(InputFanOut, fan_node.spec).output_port + workflow._add_workflow_edge_to_graph(f"{fan_node.name}.{output_port}", destination) + node_name, port_name = parse_edge_string(destination, maxsplit=-1) + workflow.sources[workflow.index[node_name]].remove(port_name) + if not workflow.sources[workflow.index[node_name]]: + del workflow._sources[workflow.index[node_name]] + workflow.source_mappings[source].remove(destination) + + def rollback(): + workflow._sources[workflow.index[node_name]].append(port_name) + workflow.source_mappings[source].append(f"{node_name}.{port_name}") + + return rollback + + +def add_fan_source(workflow: Workflow, node: GraphNodeType, source: str): + input_port = cast(InputFanOut, node.spec).input_port + workflow._sources[workflow.index[node.name]] = [input_port] + workflow.source_mappings[source].append(f"{node.name}.{input_port}") + + def rollback(): + del workflow._sources[node] + workflow.source_mappings[source].remove(f"{node.name}.{input_port}") + + return rollback + + +def recompute_parallelism(workflow: Workflow): + for edge in workflow.edges: + new_label = EdgeLabel(*edge[-1][:-1], EdgeType.single) + workflow.relabel(edge, new_label) + fanout, fanin = workflow._find_fan_out_fan_in_edges() + workflow._update_edges(fanout, fanin) + + +def rollback_parallelism(workflow: Workflow): + def rollback(): + recompute_parallelism(workflow) + + return rollback + + +def fan_out_workflow_source(workflow: Workflow, source: str): + rollback_list = [] + try: + op_name = f"{source}_fanout" + fan_node = GraphNodeType(op_name, spec=InputFanOut(op_name, workflow.inputs_spec[source])) + rollback_list.append(add_node(workflow, fan_node)) + destinations = workflow.source_mappings[source].copy() + for destination in destinations: + rollback_list.insert(0, source_to_edge(workflow, fan_node, source, destination)) + rollback_list.insert(0, add_fan_source(workflow, fan_node, source)) + rollback_list.append(rollback_parallelism(workflow)) + recompute_parallelism(workflow) + except Exception: + # Something went wrong, let's rollback all changes to the workflow! + for foo in rollback_list: + foo() + raise + + +def build_args_for_workflow( + user_input: Union[List[Any], Dict[str, Any], SpatioTemporalJson], wf_inputs: List[str] +) -> OpIOType: + """ + Get user input and transform it into a dict where the keys match the workflow sources + """ + # If all the keys match, there is nothing to do + if isinstance(user_input, dict) and set(wf_inputs) == set(user_input.keys()): + return user_input + # Check if there is only one source. If that's the case, assign input to it, otherwise break + if len(wf_inputs) > 1: + raise ValueError( + "User input does not specify workflow sources and workflow has multiple sources: " + f"{', '.join(wf_inputs)}. A dictionary with matching keys is required." + ) + # Check if it's a spatiotemporal json (geom + time range) + # If that's the case we generate a DataVibe with that info + if isinstance(user_input, SpatioTemporalJson): + user_input = gen_stac_item_from_bounds( + user_input.geojson, # type: ignore + user_input.start_date, # type: ignore + user_input.end_date, # type: ignore + ) + return {wf_inputs[0]: user_input} + + +def validate_workflow_input(user_input: OpIOType, inputs_spec: TypeDictVibe): + """ + Validate workflow input by making sure user input types match the respective source types + """ + for source_name, source_type in inputs_spec.items(): + source_input = user_input[source_name] + validate_vibe_types(source_input, source_type, source_name) + + +def validate_vibe_types(source_input: InnerIOType, source_type: DataVibeType, source_name: str): + # If it's a DataVibe, we deserialize and check if the types are compatible + base_type = get_base_type(source_type) + try: + vibe_input = StacConverter().from_stac_item(deserialize_stac(source_input)) + except Exception: + raise ValueError( + "Failed to convert inputs to workflow source " + f"{source_name} of type {_type_repr(source_type)}" + ) + source_types = set(type(i) for i in ensure_list(vibe_input)) + bad_types = [t for t in source_types if not issubclass(t, base_type)] + if bad_types: + raise ValueError( + f"Workflow source {source_name} expects inputs of type {source_type}, " + f"found incompatible types: {', '.join(_type_repr(t) for t in bad_types)}" + ) + + +def patch_workflow_source(source_input: InnerIOType, workflow: Workflow, source_name: str): + # Check if input is list and type is not list + # If that's the case, try to patch the workflow with a source fan-out node + # An element in a list source is fine because we make a one element list + # in the runner automatically + source_type = workflow.inputs_spec[source_name] + if isinstance(source_input, list) and not is_container_type(source_type): + LOGGER.info(f"Input for source {source_name} is a list, trying to patch workflow") + try: + fan_out_workflow_source(workflow, source_name) # patch is done in-place + except ValueError: + raise ValueError( + f"Found list of inputs for workflow source '{source_name}' " + f"which does not support lists" + ) + + +def patch_workflow_sources(user_input: OpIOType, workflow: Workflow): + bad_sources = [] + for source_name in workflow.inputs_spec: + source_input = user_input[source_name] + try: + patch_workflow_source(source_input, workflow, source_name) + except ValueError: + bad_sources.append(source_name) + if bad_sources: + raise ValueError( + f"Found list of inputs for workflow sources {bad_sources} that do not support lists" + ) diff --git a/src/vibe_server/vibe_server/workflow/parameter.py b/src/vibe_server/vibe_server/workflow/parameter.py new file mode 100644 index 00000000..4ec98eef --- /dev/null +++ b/src/vibe_server/vibe_server/workflow/parameter.py @@ -0,0 +1,132 @@ +from collections import defaultdict +from typing import Any, Dict, List, Optional, Tuple, Union, cast + +from .spec_parser import ( + OperationSpec, + TaskType, + WorkflowSpec, + WorkflowSpecNode, + get_parameter_reference, +) + + +class ParameterResolver: + def __init__(self, workflows_dir: str, ops_dir: str): + self.workflows_dir = workflows_dir + self.ops_dir = ops_dir + + def resolve(self, workflow_spec: WorkflowSpec) -> Dict[str, "Parameter"]: + wf_params = self._get_wf_params(workflow_spec, "root") + return {p.name: p for p in wf_params} + + def _get_wf_params(self, workflow_spec: WorkflowSpec, task_name: str): + wf_params: List[Parameter] = [] + for k, v in workflow_spec.parameters.items(): + default = workflow_spec.default_parameters[k] + descriptions = workflow_spec.description.parameters + desc = descriptions.get(k) if descriptions is not None else None + wf_params.append( + Parameter(name=k, task=task_name, value=v, default=default, description=desc) + ) + # Get references from tasks + refs: Dict[str, List[Parameter]] = defaultdict(list) + for task_name, node in workflow_spec.tasks.items(): + for task_param in self._get_node_params(node): + ref = task_param.reference + if ref is not None: + refs[ref].append(task_param) + for wf_param in wf_params: + for ref_param in refs[wf_param.name]: + wf_param.add_child(ref_param) + return wf_params + + def _get_op_params(self, op_spec: OperationSpec, task_name: str) -> List["Parameter"]: + def foo( + params: Dict[str, Any], + descriptions: Optional[Dict[str, Any]], + defaults: Dict[str, Any], + prefix: str = "", + ): + for k, v in params.items(): + desc = descriptions.get(k) if descriptions is not None else None + default = defaults[k] + if isinstance(v, dict): + assert isinstance(desc, dict) or desc is None + assert isinstance(default, dict) + for p in foo(v, desc, default, prefix=k): + yield p + else: + assert isinstance(desc, str) or desc is None + name = f"{prefix}.{k}" if prefix else k + yield Parameter( + name=name, task=task_name, value=v, default=default, description=desc + ) + + return [ + p + for p in foo( + op_spec.parameters, op_spec.description.parameters, op_spec.default_parameters + ) + ] + + def _get_node_params(self, node: WorkflowSpecNode): + task = node.load(ops_base_dir=self.ops_dir, workflow_dir=self.workflows_dir) + if node.type == TaskType.op: + return self._get_op_params(cast(OperationSpec, task), node.task) + return self._get_wf_params(cast(WorkflowSpec, task), node.task) + + +class Parameter: + def __init__( + self, + name: str, + task: str, + value: Any, + default: Any, + description: Optional[Union[str, Dict[str, str]]], + ) -> None: + self.name = name + self.task = task + self._value = value + self._default = default + self._description = description + self.childs: List["Parameter"] = [] + + def add_child(self, child: "Parameter"): + self.childs.append(child) + + def _resolve(self, attr: str, private_attr: str): + # If our attribute is None and we have childs, lets get the default value from them + if getattr(self, private_attr) is None and self.childs: + attrs = [] + for p in self.childs: + p_attr = getattr(p, attr) + if not isinstance(p_attr, tuple): + p_attr = (p_attr,) + for i in p_attr: + if i not in attrs: + attrs.append(i) + if len(attrs) == 1: + return attrs[0] + return tuple(attrs) + return getattr(self, private_attr) + + @property + def default(self) -> Any: + return self._resolve("default", "_default") + + @property + def description(self) -> Union[str, Tuple[str], None]: + descriptions = self._resolve("description", "_description") + # Discard `None` from children and adjust accordingly + if isinstance(descriptions, tuple): + descriptions = tuple(d for d in descriptions if d is not None) + if not descriptions: # Empty set, return None + return None + if len(descriptions) == 1: + return descriptions[0] + return descriptions + + @property + def reference(self) -> Optional[str]: + return get_parameter_reference(self._value, self.task) diff --git a/src/vibe_server/vibe_server/workflow/runner/__init__.py b/src/vibe_server/vibe_server/workflow/runner/__init__.py new file mode 100644 index 00000000..4bb598e2 --- /dev/null +++ b/src/vibe_server/vibe_server/workflow/runner/__init__.py @@ -0,0 +1,10 @@ +from .remote_runner import RemoteWorkflowRunner +from .runner import NoOpStateChange, WorkflowCallback, WorkflowChange, WorkflowRunner + +__all__ = [ # type: ignore + NoOpStateChange, + RemoteWorkflowRunner, + WorkflowCallback, + WorkflowChange, + WorkflowRunner, +] diff --git a/src/vibe_server/vibe_server/workflow/runner/remote_runner.py b/src/vibe_server/vibe_server/workflow/runner/remote_runner.py new file mode 100644 index 00000000..3240a673 --- /dev/null +++ b/src/vibe_server/vibe_server/workflow/runner/remote_runner.py @@ -0,0 +1,242 @@ +import asyncio +import asyncio.queues +import logging +from collections import defaultdict +from typing import Any, Dict, List, NoReturn, Optional, TypeVar, cast +from uuid import UUID + +from vibe_common.messaging import ( + ErrorContent, + ExecuteReplyContent, + ExecuteRequestMessage, + MessageType, + OperationSpec, + WorkMessage, + WorkMessageBuilder, + send_async, +) +from vibe_common.telemetry import add_span_attributes, add_trace +from vibe_core.data.core_types import OpIOType + +from ..workflow import GraphNodeType, Workflow +from .runner import ( + CancelledOpError, + NoOpStateChange, + WorkflowCallback, + WorkflowChange, + WorkflowRunner, +) + +SLEEP_S = 0.2 +RAISE_STR = "raise" +T = TypeVar("T") + + +class MessageRouter: + def __init__(self, inqueue: "asyncio.queues.Queue[WorkMessage]"): + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self.message_map: Dict[str, "asyncio.queues.Queue[WorkMessage]"] = defaultdict( + asyncio.queues.Queue + ) + self.inqueue = inqueue + self.should_stop = False + self.task = asyncio.create_task(self.route_messages()) + + def done_callback(task: Any) -> None: + self.task = None + try: + maybe_exception = task.exception() + if maybe_exception is not None: + self.logger.warning( + f"MessageRouter task {task} encountered an exception: {maybe_exception}" + ) + except (asyncio.CancelledError, asyncio.InvalidStateError): + pass + + self.task.add_done_callback(done_callback) + + async def route_messages(self): + while not self.should_stop: + try: + msg = await asyncio.wait_for(self.inqueue.get(), timeout=SLEEP_S) + self.message_map[msg.parent_id].put_nowait(msg) + self.inqueue.task_done() + except asyncio.TimeoutError: + pass + + async def get(self, request_id: str, block: bool = True) -> WorkMessage: + if block: + msg = await self.message_map[request_id].get() + else: + msg = self.message_map[request_id].get_nowait() + return msg + + def task_done(self, request_id: str) -> None: + try: + self.message_map[request_id].task_done() + except ValueError: + self.logger.exception( + "task_done() called more times than there were items in the queue. " + "This indicates a correctness issue and should be fixed. I'm ignoring " + "it for now, though." + ) + + def clear(self) -> None: + for queue in self.message_map.values(): + while not queue.empty(): + try: + queue.get_nowait() + queue.task_done() + except asyncio.QueueEmpty: + pass + + def __len__(self) -> int: + return sum([q.qsize() for q in self.message_map.values()]) + self.inqueue.qsize() + + def __del__(self): + if self.task and not self.task.done(): + self.task.cancel() + self.task = None + + +class RemoteWorkflowRunner(WorkflowRunner): + def __init__( + self, + message_router: "MessageRouter", + workflow: Workflow, + traceid: str, + update_state_callback: WorkflowCallback = NoOpStateChange, + pubsubname: Optional[str] = None, + source: Optional[str] = None, + topic: Optional[str] = None, + **kwargs: Any, + ): + super().__init__( + workflow=workflow, + update_state_callback=update_state_callback, + **kwargs, + ) + self.topic = topic + self.source = source + self.pubsubname = pubsubname + + self.message_router = message_router + self.traceid = traceid + self.id_queue_map: Dict[str, "asyncio.queues.Queue[WorkMessage]"] = {} + + def _handle_failure(self, request: ExecuteRequestMessage, reply: WorkMessage) -> NoReturn: + content = cast(ErrorContent, reply.content) + root_idx = content.evalue.rfind(RAISE_STR) + root_idx = root_idx + len(RAISE_STR) if root_idx != -1 else 0 + evalue = content.evalue[root_idx:] + error = f"{content.ename}: {evalue}" + self.logger.info( + f"Operation {reply.id} failed with error {error}. (run id {reply.run_id})." + f"Traceback: {content.traceback}" + ) + raise RuntimeError( + f"Failed to run op {request.content.operation_spec.name} in workflow run id " + f"{reply.run_id} for input with message id {request.id}. Error description: {error}." + ) + + async def _handle_ack_message(self, op_name: str, subtask_idx: int) -> None: + await self._report_state_change( + WorkflowChange.SUBTASK_RUNNING, task=op_name, subtask_idx=subtask_idx + ) + + def _process_reply(self, request: WorkMessage, reply: WorkMessage) -> OpIOType: + assert ( + reply.header.type != MessageType.execute_request + ), f"Received invalid message {reply.id}" + assert ( + reply.header.parent_id + ), f"Received invalid reply {reply.id} with empty parent_id. (run id {reply.run_id})" + if reply.header.type == MessageType.error: + self._handle_failure(cast(ExecuteRequestMessage, request), reply) + else: + content = cast(ExecuteReplyContent, reply.content) + self.logger.debug( + f"Received execute reply for run id {reply.run_id} " + f"(op name {content.cache_info.name}, op hash {content.cache_info.hash})." + ) + return content.output + + async def _build_and_process_request( + self, op: GraphNodeType, input: OpIOType, run_id: UUID, subtask_idx: int + ) -> OpIOType: + op_spec: OperationSpec = op.spec + request: ExecuteRequestMessage = cast( + ExecuteRequestMessage, + WorkMessageBuilder.build_execute_request( + run_id, + self.traceid, + op_spec, + input, + ), + ) + + failure_msg: str = ( + f"Failed to run op {op_spec.name} (subtask {subtask_idx})" + f"with execution request id {request.id}, run id {run_id}." + ) + if all([e is not None for e in (self.source, self.pubsubname, self.topic)]): + await send_async(request, self.source, self.pubsubname, self.topic) # type: ignore + + while True: + if self.is_cancelled: + raise CancelledOpError() + + try: + reply = await self._wait_for_reply(request) + except CancelledOpError: + raise + except Exception as e: + raise RuntimeError(failure_msg) from e + + if reply.header.type == MessageType.ack: + await self._handle_ack_message(op.name, subtask_idx) + continue + elif reply.header.type in (MessageType.execute_reply, MessageType.error): + try: + return self._process_reply(request, reply) + finally: + self.message_router.task_done(request.id) + else: + raise RuntimeError(f"Received unsupported message {reply}. Aborting execution.") + + async def _wait_for_reply(self, request: ExecuteRequestMessage) -> WorkMessage: + while True: + try: + return await self.message_router.get(request.id, block=False) + except asyncio.QueueEmpty: + await asyncio.sleep(SLEEP_S) + if self.is_cancelled: + raise CancelledOpError() + + @add_trace + async def _run_op_impl( + self, op: GraphNodeType, input: OpIOType, run_id: UUID, subtask_idx: int + ) -> OpIOType: + try: + add_span_attributes({"op_name": op.spec.name}) + return await self._build_and_process_request(op, input, run_id, subtask_idx) + except CancelledOpError: + self.logger.debug( + f"Did not try to run operation {op.name} for parent event {self.traceid}" + " because the workflow was cancelled" + ) + raise + + @add_trace + async def _run_ops(self, ops: List[GraphNodeType], run_id: UUID): + add_span_attributes({"workflow_id": str(run_id)}) + await super()._run_ops(ops, run_id) + if len(self.message_router): + self.logger.warning( + f"Finishing workflow level {ops} execution with messages still in queue " + f"(run id: {run_id})." + ) + self.message_router.clear() + + def __del__(self): + self.message_router.should_stop = True diff --git a/src/vibe_server/vibe_server/workflow/runner/runner.py b/src/vibe_server/vibe_server/workflow/runner/runner.py new file mode 100644 index 00000000..3d9dc949 --- /dev/null +++ b/src/vibe_server/vibe_server/workflow/runner/runner.py @@ -0,0 +1,325 @@ +import asyncio +import gc +import logging +from abc import ABC, abstractmethod +from collections import defaultdict +from enum import auto +from typing import Any, Awaitable, Callable, Dict, Iterable, List, Protocol, Set, Tuple, cast +from uuid import UUID, uuid4 + +from fastapi_utils.enums import StrEnum + +from vibe_core.data.core_types import OpIOType +from vibe_core.data.utils import is_vibe_list +from vibe_core.utils import ensure_list + +from ..workflow import DESTINATION, LABEL, EdgeLabel, EdgeType, GraphNodeType, InputFanOut, Workflow +from .task_io_handler import TaskIOHandler, WorkflowIOHandler + + +class CancelledOpError(Exception): + pass + + +class WorkflowCallback(Protocol): + async def __call__(self, change: "WorkflowChange", **kwargs: Any) -> None: + pass + + +async def NoOpStateChange(change: "WorkflowChange", **kwargs: Any) -> None: + return None + + +class WorkflowChange(StrEnum): + WORKFLOW_STARTED = cast("WorkflowChange", auto()) + WORKFLOW_FINISHED = cast("WorkflowChange", auto()) + WORKFLOW_FAILED = cast("WorkflowChange", auto()) + WORKFLOW_CANCELLED = cast("WorkflowChange", auto()) + TASK_STARTED = cast("WorkflowChange", auto()) + SUBTASK_QUEUED = cast("WorkflowChange", auto()) + SUBTASK_RUNNING = cast("WorkflowChange", auto()) + SUBTASK_FINISHED = cast("WorkflowChange", auto()) + SUBTASK_FAILED = cast("WorkflowChange", auto()) + SUBTASK_PENDING = cast("WorkflowChange", auto()) + + +class OpParallelism: + parallel_edges: Set[EdgeType] = {EdgeType.parallel, EdgeType.scatter} + + def __init__( + self, + in_edges: List[EdgeLabel], + op: GraphNodeType, + run_task: Callable[[GraphNodeType, OpIOType, UUID, int], Awaitable[OpIOType]], + update_state_callback: WorkflowCallback = NoOpStateChange, + ): + self.op = op + self.in_edges = in_edges + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + self.run_task = run_task + self.update_state = update_state_callback + + def is_parallel(self, edge: EdgeLabel) -> bool: + return edge.type in self.parallel_edges + + def fan_in(self, inputs: List[OpIOType]) -> OpIOType: + if any(self.is_parallel(edge) for edge in self.in_edges): + # Op is running in parallel so we collate the outputs + outputs: OpIOType = {k: [] for k in inputs[0]} + for input in inputs: + for key, value in outputs.items(): + cast(List[Dict[str, Any]], value).append(cast(Dict[str, Any], input[key])) + return outputs + # Op is single, so we just return the output + if len(inputs) > 1: + raise RuntimeError(f"Expected a single input in the list, found {len(inputs)}") + return inputs[0] + + @staticmethod + def align(**kwargs: Any) -> Iterable[Tuple[Any, ...]]: + input_lens = {n: len(arg) for n, arg in kwargs.items() if len(arg) != 1} + lens = set(input_lens.values()) + if len(lens) > 1: + error_str = ", ".join(f"'{k}': {v}" for k, v in input_lens.items()) + raise ValueError(f"Unable to pair sequences of different sizes - {error_str}") + for i in range(1 if len(lens) == 0 else max(lens)): + yield tuple((arg[i] if len(arg) > 1 else arg[0]) for arg in kwargs.values()) + + def fan_out(self, op_input: OpIOType) -> Iterable[Tuple[OpIOType, ...]]: + parallel = {edge.dstport for edge in self.in_edges if self.is_parallel(edge)} + try: + aligned = self.align( + **{k: ([vv for vv in v] if k in parallel else [v]) for k, v in op_input.items()} + ) + for input in aligned: + yield tuple( + cast(OpIOType, ensure_list(i)) + if is_vibe_list(self.op.spec.inputs_spec[name]) + else i + for i, name in zip(input, op_input) + ) + except ValueError as e: + raise ValueError(f"Unable to fan-out input for op {self.op.name}: {e}") from e + + async def run(self, op_input: OpIOType, run_id: UUID) -> List[OpIOType]: + if isinstance(self.op.spec, InputFanOut): + self.logger.info(f"Bypassing input fan-out node {self.op.name}") + await self.update_state(WorkflowChange.TASK_STARTED, task=self.op.name, num_subtasks=1) + await self.update_state( + WorkflowChange.SUBTASK_FINISHED, task=self.op.name, subtask_idx=0 + ) + return [{self.op.spec.output_port: op_input[self.op.spec.input_port]}] + inputs: List[OpIOType] = [ + {k: v for k, v in zip(op_input.keys(), input)} for input in self.fan_out(op_input) + ] + await self.update_state( + WorkflowChange.TASK_STARTED, task=self.op.name, num_subtasks=len(inputs) + ) + self.logger.info( + f"Will run op {self.op.name} with {len(inputs)} different input(s). " + f"(run id: {run_id})" + ) + + async def sub_run(args: Tuple[int, OpIOType]) -> OpIOType: + idx, input = args + try: + self.logger.debug( + f"Executing task {idx + 1}/{len(inputs)} of op {self.op.name}. " + f"(run id: {run_id})" + ) + await self.update_state( + WorkflowChange.SUBTASK_QUEUED, task=self.op.name, subtask_idx=idx + ) + ret = await self.run_task(self.op, input, run_id, idx) + self.logger.debug( + f"Successfully executed task {idx + 1}/{len(inputs)} of op {self.op.name}. " + f"(run id: {run_id})" + ) + await self.update_state( + WorkflowChange.SUBTASK_FINISHED, task=self.op.name, subtask_idx=idx + ) + return ret + except Exception as e: + self.logger.exception( + f"Failed to execute task {idx + 1}/{len(inputs)} of op {self.op.name}. " + f"(run id: {run_id})" + ) + await self.update_state( + WorkflowChange.SUBTASK_FAILED, + task=self.op.name, + subtask_idx=idx, + reason=f"{e.__class__.__name__}: {e}", + ) + raise + + results = await asyncio.gather(*[sub_run(args) for args in enumerate(inputs)]) + return results + + +class WorkflowRunner(ABC): + workflow: Workflow + update_state: WorkflowCallback + logger: logging.Logger + io_mapper: WorkflowIOHandler + io_handler: TaskIOHandler + is_cancelled: bool + + def __init__( + self, + workflow: Workflow, + io_mapper: WorkflowIOHandler, + update_state_callback: WorkflowCallback = NoOpStateChange, + **_: Any, + ): + self.workflow = workflow + self.update_state = update_state_callback + self.io_mapper = io_mapper + self.is_cancelled = False + + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + + async def cancel(self): + await self._report_state_change(WorkflowChange.WORKFLOW_CANCELLED) + self.is_cancelled = True + + @abstractmethod + async def _run_op_impl( + self, op: GraphNodeType, input: OpIOType, run_id: UUID, subtask_idx: int + ) -> OpIOType: + raise NotImplementedError + + async def _run_graph_impl(self, input: OpIOType, run_id: UUID) -> OpIOType: + self.io_handler.add_sources(input) + for ops in self.workflow: + self.logger.info(f"Will run ops {ops} in parallel. (run id: {run_id})") + await self._run_ops(ops, run_id) + if not self.is_cancelled: + return self.io_handler.retrieve_sinks() + + # Workflow was cancelled + return {} + + async def _run_ops(self, ops: List[GraphNodeType], run_id: UUID): + try: + op_parallelism = {} + tasks: List[Tuple[GraphNodeType, "asyncio.Task[List[OpIOType]]"]] = [] + for op in ops: + op_parallelism[op.name] = OpParallelism( + [e[LABEL] for e in self.workflow.edges if e[DESTINATION] == op], + op, + self._run_op_impl, + update_state_callback=self.update_state, + ) + task = asyncio.create_task( + self._submit_op(op, run_id, op_parallelism[op.name]), name=op.name + ) + tasks.append((op, task)) + await self._monitor_futures(tasks, run_id, op_parallelism) + for _, task in tasks: + if not task.done(): + task.cancel() + del tasks + finally: + # The garbage collector seems to be a bit lazy, so we need to force it to collect + # anything that's been leftover from previous executions + collected = gc.collect() + self.logger.debug( + f"Garbage collector collected {collected} objects after running ops {ops} " + f"in run {run_id}." + ) + + async def _monitor_futures( + self, + tasks: List[Tuple[GraphNodeType, "asyncio.Task[List[OpIOType]]"]], + run_id: UUID, + op_parallelism: Dict[str, OpParallelism], + ): + op_outputs: Dict[GraphNodeType, List[OpIOType]] = defaultdict(list) + gather = asyncio.gather(*[t[1] for t in tasks], return_exceptions=True) + await gather + for op, task in tasks: + op_name = task.get_name() + try: + assert op_name is not None + result = await task + if isinstance(result, Exception): + raise result + op_outputs[op].extend(result) + except CancelledOpError: + return + except Exception as e: + gather.cancel() + await self._fail_workflow(e, run_id) + raise + + for op, op_result in op_outputs.items(): + self.io_handler.add_result(op, op_parallelism[op.name].fan_in(op_result)) + + async def _fail_workflow(self, e: Exception, run_id: UUID): + self.logger.exception(f"Failed to run workflow {self.workflow.name}. (run id: {run_id})") + await self._report_state_change(WorkflowChange.WORKFLOW_FAILED, reason=str(e)) + + @classmethod + def build( + cls, + workflow: Workflow, + **kwargs: Any, + ) -> "WorkflowRunner": + return cls(workflow, **kwargs) + + async def _submit_op( + self, + op: GraphNodeType, + run_id: UUID, + parallelism: OpParallelism, + ) -> List[OpIOType]: + if self.is_cancelled: + # Exit early, as this run has been cancelled + return [{}] + input = self.io_handler.retrieve_input(op) + try: + return await parallelism.run(input, run_id) + except CancelledOpError: + return [{}] + except Exception as e: + await self._fail_workflow(e, run_id) + raise + + async def _run_graph(self, input: OpIOType, run_id: UUID) -> OpIOType: + self.logger.debug(f"Starting execution of workflow {self.workflow.name} (run id: {run_id})") + tasks = [task.name for level in self.workflow for task in level] + await self._report_state_change(WorkflowChange.WORKFLOW_STARTED, tasks=tasks) + output = self._run_graph_impl(input, run_id) + # Mark workflow as cancelled if needed + # Do not mark workflow as done, as it will be marked as such after the outputs are updated + # in the statestore + if self.is_cancelled: + await self._report_state_change(WorkflowChange.WORKFLOW_CANCELLED) + self.logger.debug(f"Finished execution of workflow {self.workflow.name} (run id: {run_id})") + + return await output + + async def run(self, input_items: OpIOType, run_id: UUID = uuid4()) -> OpIOType: + try: + # Initializing task IO handler for this specific run. + self.io_handler = TaskIOHandler(self.workflow) + output = await self._run_graph(self.io_mapper.map_input(input_items), run_id) + return self.io_mapper.map_output(output) if not self.is_cancelled else {} + except Exception as e: + self.logger.exception(f"Failed to run workflow {self.workflow.name} (run id: {run_id})") + await self._report_state_change(WorkflowChange.WORKFLOW_FAILED, reason=str(e)) + raise + finally: + del self.io_handler + + async def _report_state_change( + self, + change: WorkflowChange, + **kwargs: Any, + ) -> None: + try: + await self.update_state(change, **kwargs) + except Exception: + logging.exception( + f"Failed to update workflow/operation state with change {change}. Ignoring." + ) diff --git a/src/vibe_server/vibe_server/workflow/runner/task_io_handler.py b/src/vibe_server/vibe_server/workflow/runner/task_io_handler.py new file mode 100644 index 00000000..3a82522c --- /dev/null +++ b/src/vibe_server/vibe_server/workflow/runner/task_io_handler.py @@ -0,0 +1,132 @@ +from copy import copy +from typing import Dict, List + +from vibe_core.data.core_types import InnerIOType, OpIOType + +from ..workflow import GraphNodeType, Workflow + + +class TaskIOHandler: + IoMapType = Dict[GraphNodeType, Dict[str, List[InnerIOType]]] + input_map: IoMapType + output_map: IoMapType + source_map: IoMapType + sink_map: IoMapType + + @staticmethod + def _update_dict(task: GraphNodeType, input_name: str, d: IoMapType, value: List[InnerIOType]): + if task in d: + d[task][input_name] = value + else: + d[task] = {input_name: value} + + def _attach_input_port(self, node: GraphNodeType, input_port: str, io: List[InnerIOType]): + self.input_map.setdefault(node, {}) + node_inputs = self.input_map[node] + if input_port in node_inputs: + raise ValueError( + f"Tried to attach input port {node.name}.{input_port} but it is already attached" + ) + node_inputs[input_port] = io + + def _parse_workflow(self, workflow: Workflow): + io: List[InnerIOType] + for origin, destination, label in workflow.edges: + if origin in self.output_map and label.srcport in self.output_map[origin]: + io = self.output_map[origin][label.srcport] + else: + io = [] + self._update_dict(origin, label.srcport, self.output_map, io) + self._attach_input_port(destination, label.dstport, io) + + for sink, ports in workflow.sinks.items(): + for port in ports: + if sink in self.output_map and port in self.output_map[sink]: + # sink already exists as input to another task + io = self.output_map[sink][port] + else: + # new output that is a sink only + io = [] + self._update_dict(sink, port, self.output_map, io) + self._update_dict(sink, port, self.sink_map, io) + + for source, ports in workflow.sources.items(): + for port in ports: + io = [] + self._attach_input_port(source, port, io) + self._update_dict(source, port, self.source_map, io) + + def __init__(self, workflow: Workflow): + self.input_map = {} + self.output_map = {} + self.sink_map = {} + self.source_map = {} + self._parse_workflow(workflow) + + def add_result(self, task: GraphNodeType, value: OpIOType): + for output_name, result in value.items(): + # Calling `get` here may create a new dict/list but, if it is new, + # it won't be consumed by any other task, or sink + io = self.output_map.get(task, {}).get(output_name, []) + if len(io) != 0: + raise RuntimeError(f"Repeated write to task '{task}' output '{output_name}'.") + io.append(result) + + def retrieve_input(self, task: GraphNodeType) -> OpIOType: + input_dict: OpIOType = {} + for kw_name, input_value in self.input_map[task].items(): + input_dict[kw_name] = copy(input_value[0]) + + return input_dict + + def add_sources(self, values: OpIOType): + if len(values) != sum([len(t) for t in self.source_map.values()]): + raise ValueError("Tried to add different number of values to workflow") + + for task, ports in self.source_map.items(): + for port in ports: + key = task.name + "." + port + try: + value = values.pop(key) + ports[port].append(value) + except KeyError: + raise ValueError(f"Unable to find source {key} for running workflow") + + if values: + raise ValueError(f"Tried to add unknown values {values.keys()} to workflow") + + def retrieve_sinks(self) -> OpIOType: + output_dict: OpIOType = {} + for task, sink_outputs in self.sink_map.items(): + for task_output_name, sink_output in sink_outputs.items(): + output_dict[task.name + "." + task_output_name] = copy(sink_output[0]) + + return output_dict + + def __del__(self): + for mapping in (self.input_map, self.output_map, self.sink_map, self.source_map): + for ports in mapping.values(): + for port in ports: + try: + ports[port].pop() + except IndexError: + break + del self.input_map + del self.output_map + del self.sink_map + del self.source_map + + +class WorkflowIOHandler: + def __init__(self, workflow: Workflow): + self.workflow = workflow + + def map_input(self, input_items: OpIOType) -> OpIOType: + return { + node: input_items[key] + for key, nodes in self.workflow.source_mappings.items() + for node in nodes + } + + def map_output(self, output_items: OpIOType) -> OpIOType: + return {key: output_items[value] for key, value in self.workflow.sink_mappings.items()} diff --git a/src/vibe_server/vibe_server/workflow/spec_parser.py b/src/vibe_server/vibe_server/workflow/spec_parser.py new file mode 100644 index 00000000..76015dc6 --- /dev/null +++ b/src/vibe_server/vibe_server/workflow/spec_parser.py @@ -0,0 +1,362 @@ +import os +import re +from copy import deepcopy +from dataclasses import dataclass +from enum import auto +from re import Pattern +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast + +import yaml +from fastapi_utils.enums import StrEnum + +from vibe_common.constants import DEFAULT_OPS_DIR +from vibe_common.schemas import OperationParser, OperationSpec, update_parameters +from vibe_core.datamodel import TaskDescription +from vibe_core.utils import ( + MermaidVerticesMap, + build_mermaid_edge, + draw_mermaid_diagram, + rename_keys, +) + +HERE = os.path.dirname(os.path.abspath(__file__)) +DEV_WORKFLOW_DIR = os.path.abspath(os.path.join(HERE, "..", "..", "..", "..", "workflows")) +RUN_WORKFLOW_DIR = os.path.join("/", "app", "workflows") + +PARAM_PATTERN: "Pattern[str]" = re.compile(r"@from\((.*)\)") + + +def get_workflow_dir() -> str: + if os.path.exists(DEV_WORKFLOW_DIR): + return DEV_WORKFLOW_DIR + else: + return RUN_WORKFLOW_DIR + + +def get_parameter_reference(param: Any, task_name: str) -> Optional[str]: + if isinstance(param, str) and (match := re.match(PARAM_PATTERN, param)): + param_msg = f"task '{task_name}', parameter '{param}'" + if len(g := match.groups()) > 1: + raise ValueError(f"Failed to parse parameter reference '{param}' in {param_msg}") + ref_name = g[0] + if not ref_name: + raise ValueError(f"Found empty parameter reference in {param_msg}") + return ref_name + return None + + +def split_task_name_port(edge: str) -> Tuple[str, str]: + spllited_edge = edge.split(".") + return tuple(spllited_edge[0:1] + spllited_edge[-1:]) + + +SpecNodeType = Union[OperationSpec, "WorkflowSpec"] + + +class TaskType(StrEnum): + op = auto() + workflow = auto() + + +@dataclass +class WorkflowSpecEdge: + origin: str + destination: List[str] + + +@dataclass +class WorkflowSpecNode: + task: str + type: TaskType + parameters: Dict[str, Any] + op_dir: Optional[str] # only exists when this is an op + parent: str # the workflow that gave rise to this + + def load( + self, ops_base_dir: str = DEFAULT_OPS_DIR, workflow_dir: str = get_workflow_dir() + ) -> SpecNodeType: + if self.type == TaskType.op: + return self._load_op(ops_base_dir) + else: + return self._load_workflow(ops_base_dir, workflow_dir) + + def _load_op(self, ops_base_dir: str) -> OperationSpec: + assert isinstance(self.op_dir, str) + return OperationParser.parse( + os.path.abspath(os.path.join(ops_base_dir, self.op_dir, f"{self.task}.yaml")), + self.parameters, + ) + + def _load_workflow(self, ops_base_dir: str, workflow_dir: str) -> "WorkflowSpec": + return WorkflowParser.parse( + os.path.abspath(os.path.join(workflow_dir, f"{self.task}.yaml")), + ops_base_dir, + workflow_dir, + self.parameters, + ) + + def to_dict(self) -> Dict[str, Any]: + ret = { + ("op" if self.type == TaskType.op else "workflow"): self.task, + "parameters": self.parameters, + } + if self.op_dir is not None: + ret["op_dir"] = self.op_dir + return ret + + +@dataclass +class WorkflowSpec: + name: str + sources: Dict[str, List[str]] + sinks: Dict[str, str] + tasks: Dict[str, WorkflowSpecNode] + edges: List[WorkflowSpecEdge] + parameters: Dict[str, Any] + default_parameters: Dict[str, Any] + description: TaskDescription + ops_dir: str + workflows_dir: str + + def __post_init__(self): + for i, e in enumerate((e for e in self.edges)): + if isinstance(e, dict): + self.edges[i] = WorkflowSpecEdge(**e) + for k, v in zip(self.tasks.keys(), (v for v in self.tasks.values())): + if isinstance(v, dict): + self.tasks[k] = WorkflowSpecNode(**v) + if isinstance(self.description, dict): + self.description = TaskDescription(**self.description) + for task_name, node_spec in self.tasks.items(): + if task_name in self.description.task_descriptions: + continue + spec = node_spec.load(self.ops_dir, self.workflows_dir) + if isinstance(spec.description, dict): + spec.description = TaskDescription(**spec.description) + self.description.task_descriptions[task_name] = spec.description.short_description + + def _build_vertices_map(self) -> MermaidVerticesMap: + vertices = MermaidVerticesMap(sources={}, sinks={}, tasks={}) + # Create a dictionary to map sources, sinks, and tasks to vertex ids + for i, source in enumerate(self.sources.keys()): + vertices.sources[source] = f"inp{i+1}>{source}]" + for i, sink in enumerate(self.sinks.keys()): + vertices.sinks[sink] = f"out{i+1}>{sink}]" + for i, task in enumerate(self.tasks.keys()): + vertices.tasks[task] = f"tsk{i+1}" + "{{" + task + "}}" + return vertices + + def to_mermaid(self) -> str: + vertices_map: MermaidVerticesMap = self._build_vertices_map() + + # Create edges between tasks + edges = [ + build_mermaid_edge( + split_task_name_port(edge.origin), + split_task_name_port(destination), + vertices_map.tasks, + vertices_map.tasks, + ) + for edge in self.edges + for destination in edge.destination + ] + + # Create edges between sources and tasks + edges += [ + build_mermaid_edge( + (source_name, ""), + split_task_name_port(source_port), + vertices_map.sources, + vertices_map.tasks, + ) + for source_name, source_ports in self.sources.items() + for source_port in source_ports + ] + + # Create edges between tasks and sinks + edges += [ + build_mermaid_edge( + split_task_name_port(sink_port), + (sink_name, ""), + vertices_map.tasks, + vertices_map.sinks, + ) + for sink_name, sink_port in self.sinks.items() + ] + + return draw_mermaid_diagram(vertices_map, edges) + + +class WorkflowParser: + required_fields: List[str] = "name sources sinks tasks".split() + optional_fields: List[str] = "parameters default_parameters edges description".split() + op_spec_fields: List[str] = "op parameters op_dir".split() + wf_spec_fields: List[str] = "workflow parameters".split() + + @classmethod + def _load_workflow(cls, yamlpath: str) -> Dict[str, Any]: + with open(yamlpath) as fp: + data = yaml.safe_load(fp) + + return data + + @classmethod + def _parse_nodespec( + cls, nodespec: Dict[str, Union[str, Dict[str, Any]]], workflow_name: str, task_name: str + ) -> WorkflowSpecNode: + if "workflow" in nodespec: + type = TaskType.workflow + possible_fields = cls.wf_spec_fields + elif "op" in nodespec: + type = TaskType.op + possible_fields = cls.op_spec_fields + else: + raise ValueError(f"Task specification is missing fields 'op' or 'workflow': {nodespec}") + + task = nodespec[type] + check_config_fields(nodespec, possible_fields, "Task", task_name) + + # Check field types + if not isinstance(task, str): + raise TypeError(f"'{type}' field of task {task_name} is not a string") + if "parameters" in nodespec and not isinstance(nodespec["parameters"], dict): + raise TypeError(f"'parameters' field of task {task_name} is not a dictionary") + if "op_dir" in nodespec and not isinstance(nodespec["op_dir"], str): + raise TypeError(f"'op_dir' field of task {task_name} is not a dictionary") + + return WorkflowSpecNode( + task=task, + type=type, + parameters=cast(Dict[str, Any], nodespec.get("parameters", {})), + op_dir=cast(str, nodespec.get("op_dir", task)), + parent=workflow_name, + ) + + @classmethod + def _parse_edgespec(cls, edgespec: Dict[str, Union[str, List[str]]]) -> WorkflowSpecEdge: + return WorkflowSpecEdge( + origin=cast(str, edgespec["origin"]), + destination=cast(List[str], edgespec["destination"]), + ) + + @classmethod + def _workflow_spec_from_yaml_dict( + cls, + workflow_dict: Dict[str, Any], + ops_dir: str, + workflows_dir: str, + parameters: Dict[str, Any], + default_parameters: Dict[str, Any], + ): + workflow_name = workflow_dict.get("name", "UNAMED") + for field in cls.required_fields: + if field not in workflow_dict: + raise ValueError( + f"Workflow specification '{workflow_name}' is missing required field '{field}'" + ) + check_config_fields( + workflow_dict, cls.required_fields + cls.optional_fields, "Workflow", workflow_name + ) + try: + edges: Optional[List[Dict[str, Union[str, List[str]]]]] = workflow_dict.get("edges", []) + if edges is None: + edges = [] + if not isinstance(edges, list): + raise TypeError(f"Expected edges to be a list, found {type(edges)}") + yaml_description: Dict[str, Any] = workflow_dict.get("description", {}) + if yaml_description is None: + yaml_description = {} + yaml_description = rename_keys( + yaml_description, {"sources": "inputs", "sinks": "outputs"} + ) + description: TaskDescription = TaskDescription( + **{k: v for k, v in yaml_description.items() if v is not None} + ) + return WorkflowSpec( + name=workflow_dict["name"], + sources=workflow_dict["sources"], + sinks=workflow_dict["sinks"], + tasks={ + k: cls._parse_nodespec(v, workflow_name, k) + for k, v in workflow_dict["tasks"].items() + }, + edges=[cls._parse_edgespec(e) for e in edges], + parameters=parameters, + default_parameters=default_parameters, + description=description, + ops_dir=ops_dir, + workflows_dir=workflows_dir, + ) + except KeyError as e: + raise ValueError(f"Workflow spec {workflow_dict} is missing field {e}") from e + + @classmethod + def parse_dict( + cls, + workflow_dict: Dict[str, Any], + ops_dir: str = DEFAULT_OPS_DIR, + workflows_dir: str = get_workflow_dir(), + parameters_override: Optional[Dict[str, Any]] = None, + ) -> "WorkflowSpec": + params = workflow_dict.get("parameters", {}) + if params is None: + params = {} + workflow_dict["default_parameters"] = deepcopy(params) + if parameters_override is not None: + params = update_parameters(params, parameters_override) + workflow_dict["parameters"] = params + try: + # workflow_dict is a WorkflowSpec that was serialized to a dict + return WorkflowSpec(**workflow_dict) + except TypeError: + # workflow_dict was loaded from a YAML + return cls._workflow_spec_from_yaml_dict( + workflow_dict, + ops_dir, + workflows_dir, + workflow_dict["parameters"], + workflow_dict["default_parameters"], + ) + + @classmethod + def parse( + cls, + workflow_name: str, + ops_dir: str = DEFAULT_OPS_DIR, + workflows_dir: str = get_workflow_dir(), + parameters_override: Optional[Dict[str, Any]] = None, + ) -> "WorkflowSpec": + data = cls._load_workflow(workflow_name) + return cls.parse_dict( + data, + ops_dir, + workflows_dir, + parameters_override, + ) + + +def parse_edge_string(edge_string: str, maxsplit: int = 1) -> Tuple[str, str]: + return ( + ".".join(edge_string.split(".", maxsplit=maxsplit)[:-1]), + edge_string.split(".", maxsplit=maxsplit)[-1], + ) + + +def check_config_fields( + fields: Iterable[str], accepted_fields: List[str], config_type: str, config_name: str +): + bad_fields = [field for field in fields if field not in accepted_fields] + if bad_fields: + bad_fields_str = ", ".join([f"'{field}'" for field in bad_fields]) + s = "s" if len(bad_fields) > 1 else "" + raise ValueError( + f"{config_type} spec '{config_name}' contains unknown field{s} {bad_fields_str}" + ) + + +def flat_params(params: Dict[str, Any]): + for param in params.values(): + if isinstance(param, dict): + yield from flat_params(param) + else: + yield param diff --git a/src/vibe_server/vibe_server/workflow/spec_validator.py b/src/vibe_server/vibe_server/workflow/spec_validator.py new file mode 100644 index 00000000..3c0f5a95 --- /dev/null +++ b/src/vibe_server/vibe_server/workflow/spec_validator.py @@ -0,0 +1,182 @@ +import warnings +from typing import List + +from vibe_common.schemas import OperationSpec + +from .parameter import ParameterResolver +from .spec_parser import ( + SpecNodeType, + WorkflowSpec, + flat_params, + get_parameter_reference, + parse_edge_string, +) + + +class WorkflowSpecValidator: + @classmethod + def _validate_node_exists(cls, spec: WorkflowSpec, nodename: str, type: str) -> bool: + if nodename not in spec.tasks: + raise ValueError( + f"Workflow {spec.name} specifies node {nodename} as {type}, but it doesn't exist" + ) + return True + + @classmethod + def _validate_sources(cls, spec: WorkflowSpec) -> bool: + mapping_error = "Sources field must be a mapping between strings and lists of strings" + if not isinstance(spec.sources, dict): + raise ValueError(mapping_error) + else: + for k, v in spec.sources.items(): + if not (isinstance(k, str) and isinstance(v, list)): + raise ValueError(mapping_error) + + if len(spec.sources) == 0: + raise ValueError(f"There must be at least one source in workflow spec {spec.name}.") + + for source_name, source_ports in spec.sources.items(): + if len(source_ports) == 0: + raise ValueError( + f"Source {source_name} must be associated with at least " + f"one task input in workflow spec {spec.name}." + ) + + return cls._validate_node_list( + spec, [e for v in spec.sources.values() for e in v], "source" + ) + + @classmethod + def _validate_sinks(cls, spec: WorkflowSpec) -> bool: + mapping_error = "Sinks field must be a mapping of strings" + if not isinstance(spec.sinks, dict): + raise ValueError(mapping_error) + else: + for k, v in spec.sinks.items(): + if not (isinstance(k, str) and isinstance(v, str)): + raise ValueError(mapping_error) + + if len(spec.sinks) == 0: + warnings.warn( + f"Workflow {spec.name} has no sinks. Is it being used for side-effects only?" + ) + + return cls._validate_node_list(spec, [v for v in spec.sinks.values()], "sink") + + @classmethod + def _validate_node_list(cls, spec: WorkflowSpec, ref: List[str], type: str) -> bool: + for thing in ref: + nodename, _ = parse_edge_string(thing) + cls._validate_node_exists(spec, nodename, type) + return True + + @classmethod + def _port_exists(cls, port: str, node: SpecNodeType) -> bool: + if isinstance(node, OperationSpec): + return port in node.inputs_spec or port in node.output_spec + return port in node.sources or port in node.sinks + + @classmethod + def _validate_workflow_without_edges(cls, workflow_spec: WorkflowSpec) -> bool: + sink_nodes = set([parse_edge_string(s)[0] for s in workflow_spec.sinks.values()]) + source_nodes = set( + [parse_edge_string(ss)[0] for s in workflow_spec.sources.values() for ss in s] + ) + task_nodes = workflow_spec.tasks + if not len(task_nodes) == len(sink_nodes) == len(source_nodes): + raise ValueError( + "The number of sink and source nodes should equal the number of tasks " + "when defining a workflow without edges." + ) + # "Single"-operation workflows aren't required to have edges + workflow_spec.edges = [] + return True + + @classmethod + def _validate_edges(cls, workflow_spec: WorkflowSpec) -> bool: + if not workflow_spec.edges: + cls._validate_workflow_without_edges(workflow_spec) + if not isinstance(workflow_spec.edges, list): + raise TypeError(f"Edges of workflow {workflow_spec.name} are not in a list.") + source_ports = [port for source in workflow_spec.sources.values() for port in source] + for edge in workflow_spec.edges: + if not isinstance(edge.destination, list): + raise TypeError(f"Destination of edge {edge} is not a list") + for source in source_ports: + if source in edge.destination: + raise ValueError( + f"Source {source} is also a destination of edge " + f"{edge.origin} -> {source}" + ) + cls._validate_node_list(workflow_spec, [edge.origin], "edge origin") + cls._validate_node_list(workflow_spec, edge.destination, "edge destination") + return True + + @classmethod + def _validate_parameter_references(cls, workflow_spec: WorkflowSpec): + """ + Validate that all defined workflow parameters are used in tasks and that all parameter + references exist + """ + + param_references = { + get_parameter_reference(v, task_name) + for task_name, task in workflow_spec.tasks.items() + for v in flat_params(task.parameters) + } + param_references.discard(None) + bad_params = [param for param in workflow_spec.parameters if param not in param_references] + bad_references = {ref for ref in param_references if ref not in workflow_spec.parameters} + if not (bad_params or bad_references): + return + error_msg = [] + for msg, bad_stuff in zip( + ( + "Workflow parameter{s} {bad_stuff_str} {is_are} not mapped to any task parameters", + "Task parameters reference undefined workflow parameter{s} {bad_stuff_str}", + ), + (bad_params, bad_references), + ): + if bad_stuff: + bad_stuff_str = ", ".join([f"'{i}'" for i in bad_stuff]) + s = "s" if len(bad_stuff) > 1 else "" + is_are = "are" if len(bad_stuff) > 1 else "is" + error_msg.append(msg.format(bad_stuff_str=bad_stuff_str, s=s, is_are=is_are)) + raise ValueError(". ".join(error_msg)) + + @classmethod + def _validate_parameter_defaults(cls, workflow_spec: WorkflowSpec): + resolver = ParameterResolver(workflow_spec.workflows_dir, workflow_spec.ops_dir) + params = resolver.resolve(workflow_spec) + bad_params = [k for k, v in params.items() if isinstance(v.default, tuple)] + if bad_params: + param_names = ", ".join([f"'{p}'" for p in bad_params]) + s = "s" if len(bad_params) > 1 else "" + s_ = "" if len(bad_params) > 1 else "s" + raise ValueError( + f"Workflow parameter{s} {param_names} map{s_} to task parameters with different " + "default values. Please define a default value in the workflow." + ) + + @classmethod + def _validate_parameters(cls, workflow_spec: WorkflowSpec): + cls._validate_parameter_references(workflow_spec) + cls._validate_parameter_defaults(workflow_spec) + + @classmethod + def validate(cls, workflow_spec: WorkflowSpec) -> WorkflowSpec: + cls._validate_sources(workflow_spec) + cls._validate_sinks(workflow_spec) + cls._validate_edges(workflow_spec) + cls._validate_parameters(workflow_spec) + + for task in workflow_spec.tasks.values(): + spec = task.load(workflow_spec.ops_dir, workflow_spec.workflows_dir) + if isinstance(spec, WorkflowSpec): + if spec.name == workflow_spec.name: + raise ValueError( + f"Recursive definition of workflow {workflow_spec.name} is not supported." + ) + cls.validate(spec) + + return workflow_spec diff --git a/src/vibe_server/vibe_server/workflow/workflow.py b/src/vibe_server/vibe_server/workflow/workflow.py new file mode 100644 index 00000000..ba704aac --- /dev/null +++ b/src/vibe_server/vibe_server/workflow/workflow.py @@ -0,0 +1,634 @@ +import logging +import os +import re +from collections import defaultdict +from copy import deepcopy +from enum import IntEnum +from re import Pattern +from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Type, TypeVar, cast + +from vibe_common.constants import DEFAULT_OPS_DIR +from vibe_common.schemas import EntryPointDict, OperationSpec +from vibe_core.data.core_types import BaseVibe, DataVibeType, TypeDictVibe, UnresolvedDataVibe +from vibe_core.data.utils import ( + get_base_type, + get_most_specific_type, + is_container_type, + is_vibe_list, +) +from vibe_core.datamodel import TaskDescription + +from . import get_workflow_dir +from .graph import Edge, Graph +from .spec_parser import ( + SpecNodeType, + WorkflowParser, + WorkflowSpec, + WorkflowSpecEdge, + WorkflowSpecNode, + get_parameter_reference, + parse_edge_string, +) +from .spec_validator import WorkflowSpecValidator + +ORIGIN = 0 +DESTINATION = 1 +LABEL = 2 +T = TypeVar("T", bound=BaseVibe) + + +class InputFanOut(OperationSpec): + input_port: str = "input" + output_port: str = "output" + + def __init__(self, name: str, data_type: DataVibeType): + if not is_container_type(data_type): + data_type = List[data_type] # type: ignore + inputs_spec = TypeDictVibe({self.input_port: data_type}) + output_spec = TypeDictVibe({self.output_port: data_type}) + ed: EntryPointDict = {"file": "", "callback_builder": ""} + td = TaskDescription() + super().__init__(name, "", inputs_spec, output_spec, ed, td, {}, {}, {}) + + +class GraphNodeType(NamedTuple): + name: str + spec: OperationSpec + + +class EdgeType(IntEnum): + single = 0 + parallel = 1 + scatter = 2 + gather = 3 + + +class EdgeLabel(NamedTuple): + srcport: str + dstport: str + type: EdgeType + + def __hash__(self): + return hash(self.srcport) * hash(self.dstport) + + +class WorkflowEdge(Edge[GraphNodeType, EdgeLabel]): + def __str__(self): + src, dst, label = self + return f"{src.name}.{label.srcport} -> {dst.name}.{label.dstport} ({label.type.name})" + + +class Workflow(Graph[GraphNodeType, EdgeLabel]): + param_pattern: "Pattern[str]" = re.compile(r"@from\((.*)\)") + logger: logging.Logger + workflow_spec: WorkflowSpec + index: Dict[str, GraphNodeType] + _sinks: Dict[GraphNodeType, List[str]] + _sources: Dict[GraphNodeType, List[str]] + + def __init__(self, workflow_spec: WorkflowSpec, resolve: bool = True): + """Instantiate workflow from a workflow specification. + Given a workflow specification, instantiate all tasks, recursively instantiating workflows, + and connect all nodes. + When `resolve = False`, do not resolve types and edge labels. + This is necessary when instantiating inner workflows in order to resolve everything + when the whole graph is in place. + """ + super().__init__() + + self.logger = logging.getLogger(f"{__name__}.Workflow") + self.workflow_spec = workflow_spec + + self._build_index() + + self.source_mappings = {k: [i for i in v] for k, v in self.workflow_spec.sources.items()} + self._sources = defaultdict(list) + for sources in self.source_mappings.values(): + for source in sources: + name, port = parse_edge_string(source, maxsplit=-1) + self._sources[self.index[name]].append(port) + + self.sink_mappings = {k: v for k, v in self.workflow_spec.sinks.items()} + self._sinks = defaultdict(list) + for sink in self.sink_mappings.values(): + name, port = parse_edge_string(sink, maxsplit=-1) + self._sinks[self.index[name]].append(port) + + if resolve: + self.resolve_types() + self.validate() + + fanout, fanin = self._find_fan_out_fan_in_edges() + self._update_edges(fanout, fanin) + + def _ensure_same_container( + self, input_type: DataVibeType, ref_type: DataVibeType + ) -> DataVibeType: + """ + Ensure the input type has (doesn't have) a container if the reference + type has (does not have) one + """ + base_type = get_base_type(input_type) + if is_vibe_list(ref_type): + return cast(Type[List[BaseVibe]], List[base_type]) + return base_type + + def _resolve_types_for_node(self, node: GraphNodeType): + """ + Resolve types for all output ports in node + """ + for port_name in node.spec.output_spec: + self._resolve_port_type(node, port_name) + + def _resolve_port_type(self, node: GraphNodeType, port_name: str): + """ + Resolve port type and update the op spec, if necessary. + This method assumes that the referred port already has a resolved type + This is the case for our current implementation because we traverse the + graph in topological order + """ + port_type = node.spec.output_spec[port_name] + if not isinstance(port_type, UnresolvedDataVibe): + # Nothing to resolve + return + + origin_port = port_type.__name__ + origin_str = f"{node.name}.{origin_port}" + port_str = f"{node.name}.{port_name}" + try: + origin_type = node.spec.inputs_spec[origin_port] + except KeyError: + raise ValueError( + f"Could not infer type of '{port_str}': " + f"'{origin_port}' is not an input port for '{node.name}'" + ) + if origin_port in self.sources.get(node, []): + # There is no one to get the type from because we refer to a source port. + # We get it from the input port for now, could try something smarter + self.logger.debug( + f"Inferring type of {port_str} directly from referenced " + f"input port {origin_str} because it is a source port" + ) + node.spec.output_spec[port_name] = origin_type + return + + # Let's get the type from what connects to the origin port + source, _, label = self.edge_to(node, origin_port) + source_port = label.srcport + source_type = source.spec.output_spec[source_port] + + if isinstance(source_type, UnresolvedDataVibe): + raise RuntimeError( + f"Unresolved type on previous level port {source.name}.{source_port}" + ) + + node.spec.output_spec[port_name] = self._ensure_same_container(source_type, origin_type) + + def resolve_types(self): + for nodes in self.topological_sort(): + for node in nodes: + self._resolve_types_for_node(node) + + def validate(self) -> bool: + if self.has_cycle(): + try: + self.topological_sort() + except ValueError as e: + raise ValueError( + f"Workflows should be Directed Acyclic Graphs, " + f"but workflow {self.workflow_spec.name} has a cycle" + ) from e + self._validate_edges_io() + self._validate_all_inputs_connected() + self._validate_sinks_exist() + # We verify compatibility of ports associated to a source when building the inputs spec + # Calling it here acts as validation of the workflow sources + self.inputs_spec + return True + + @property + def ops_dir(self) -> str: + return self.workflow_spec.ops_dir + + @property + def workflow_dir(self) -> str: + return self.workflow_spec.workflows_dir + + def _get_type_for(self, port_str: str) -> DataVibeType: + name, port = parse_edge_string(port_str, maxsplit=-1) + op = self.index[name].spec + try: + return op.inputs_spec[port] + except KeyError: + return op.output_spec[port] + + def _remove_label_from_edge( + self, edges: Iterable[Edge[GraphNodeType, EdgeLabel]] + ) -> Set[Tuple[GraphNodeType, GraphNodeType]]: + return {e[:-1] for e in edges} + + def _find_fan_out_fan_in_edges(self) -> Tuple[Set[Edge[GraphNodeType, EdgeLabel]], ...]: + fanout = set() + fanin = set() + for edge in self.edges: + source, destination, label = edge + srctype = source.spec.output_spec[label.srcport] + dsttype = destination.spec.inputs_spec[label.dstport] + if isinstance(srctype, UnresolvedDataVibe): + raise RuntimeError( + f"Unresolved type found on edge {edge}, when finding fan-out/in edges" + ) + if is_vibe_list(srctype) == is_vibe_list(dsttype): + continue + if is_vibe_list(srctype) and not is_vibe_list(dsttype): + fanout.add(edge) + elif is_vibe_list(dsttype) and not is_vibe_list(srctype): + fanin.add(edge) + else: + raise RuntimeError( + f"srctype {srctype} and dsttype {dsttype} are different " + f"but are not of the expected types List -> DataVibe " + "or DataVibe -> List" + ) + return fanout, fanin + + def _update_edges( + self, + fanout: Set[Edge[GraphNodeType, EdgeLabel]], + fanin: Set[Edge[GraphNodeType, EdgeLabel]], + ): + op_parallelism = {} + for edge in fanin: + self.relabel(edge, EdgeLabel(*edge[LABEL][:-1], EdgeType.gather)) + for edge in fanout: + self.relabel(edge, EdgeLabel(*edge[LABEL][:-1], EdgeType.scatter)) + + for root in self.sources: + self.propagate_labels(root, 0, op_parallelism) + for task, v in op_parallelism.items(): + if v < 0: + raise ValueError(f"Fan-in without parallelism at input of {task.name}") + if v > 1: + # This should never happen because we break during propagation + raise RuntimeError(f"Nested fan-out at input of {task.name}") + + def propagate_labels( + self, root: GraphNodeType, parallelism_level: int, op_parallelism: Dict[GraphNodeType, int] + ): + """Propagate parallelism labels across the graph. + + We update labels according to the parallelism level of previous edges along a path + (single -> parallel if parallelism_level > 0). + + Our parallelization strategy involves parallelizing ops if *any* of the incoming edges is + parallel. If there are both parallel and singular edges in the same op, the parallel edges + distribute items into several instances of the op, while all the data flowing into singular + edges is replicated as is to all op instances. + Due to this strategy, we keep track of the maximum parallelism level of all input ports + in an op, and propagate that into the next level. This means that in some paths the + algorithm might temporarily assign wrong parallelism levels to edges (even < 0), but they + will be overwritten to the correct level after the most parallel path is traversed. + """ + for source, neighbor, label in self.edges_from(root): + edge = WorkflowEdge((source, neighbor, label)) + label_type = label.type + neighbor_parallelism_level = parallelism_level + if label_type == EdgeType.parallel: + return + elif label_type == EdgeType.single: + if neighbor_parallelism_level > 0: + label_type = EdgeType.parallel + elif label_type == EdgeType.scatter: + if neighbor_parallelism_level > 0: + raise ValueError(f"Nested fan-out found at edge {edge} is unsupported") + neighbor_parallelism_level += 1 + elif label_type == EdgeType.gather: + # If we are not parallel, gather will just make a list of a single element + neighbor_parallelism_level = max(0, neighbor_parallelism_level - 1) + else: + raise RuntimeError(f"Found unknown label type in edge {edge}") + if neighbor in op_parallelism: + neighbor_parallelism_level = max( + neighbor_parallelism_level, op_parallelism[neighbor] + ) + op_parallelism[neighbor] = neighbor_parallelism_level + self.relabel((source, neighbor, label), EdgeLabel(*label[:-1], label_type)) + self.propagate_labels(neighbor, neighbor_parallelism_level, op_parallelism) + + def prefix_node(self, node: GraphNodeType, prefix: str) -> GraphNodeType: + return GraphNodeType(name=f"{prefix}.{node.name}", spec=node.spec) + + def merge_inner_workflow(self, inner_workflow: "Workflow", prefix: str): + inner_index = { + f"{prefix}.{k}": self.prefix_node(v, prefix) for k, v in inner_workflow.index.items() + } + # Add nodes to the graph + for v in inner_index.values(): + self.add_node(v) + # Update our index + self.index.update(inner_index) + # Add edges + for edge in inner_workflow.edges: + origin, destination, label = edge + self.add_edge( + inner_index[f"{prefix}.{origin.name}"], + inner_index[f"{prefix}.{destination.name}"], + label, + ) + + def _load_inner_workflow(self, workflow: WorkflowSpec, taskname: str) -> None: + wf = Workflow(workflow, resolve=False) + spec = wf.workflow_spec + self.workflow_spec.edges = list( + self._update_workflow_spec_edges(self.workflow_spec.edges, spec, taskname) + ) + self.workflow_spec.sources = dict( + self._update_workflow_spec_sources(self.workflow_spec.sources, spec, taskname) + ) + self.workflow_spec.sinks = dict( + self._update_workflow_spec_sinks(self.workflow_spec.sinks, spec, taskname) + ) + self.merge_inner_workflow(wf, taskname) + + def _add_workflow_edge_to_graph(self, origin: str, destination: str) -> None: + origin, srcport = parse_edge_string(origin, -1) + destination, dstport = parse_edge_string(destination, -1) + try: + if srcport not in self.index[origin].spec.output_spec: + raise ValueError(f"Port {srcport} could not be found as output of op {origin}") + if dstport not in self.index[destination].spec.inputs_spec: + raise ValueError(f"Port {dstport} could not be found as input of op {destination}") + self.add_edge( + self.index[origin], + self.index[destination], + EdgeLabel(srcport, dstport, EdgeType.single), + ) + except KeyError as e: + raise ValueError( + f"Tried to connect port {srcport} from op {origin} to " + f"port {dstport} of op {destination}, but {str(e)} does " + "not exist in the workflow graph." + ) + + def _resolve_parameters(self, task: SpecNodeType): + wf_params = self.workflow_spec.parameters + + def resolve(parameters: Dict[str, Any], default: Dict[str, Any]): + new_params = deepcopy(parameters) + for k, v in parameters.items(): + if isinstance(v, dict): + new_params[k] = resolve(parameters[k], default[k]) + ref_name = get_parameter_reference(v, task.name) + if ref_name is not None: + if wf_params is None or ref_name not in wf_params: + raise ValueError( + f"Could not find parameter '{ref_name}' in workflow '{self.name}'" + f" to substitute in task '{task.name}'" + ) + override = wf_params[ref_name] + # Keep default parameter if override is not defined + new_params[k] = default[k] if override is None else override + return new_params + + task.parameters = resolve(task.parameters, task.default_parameters) + + def _build_index(self) -> Dict[str, GraphNodeType]: + self.index: Dict[str, GraphNodeType] = {} + + for k, t in self.workflow_spec.tasks.items(): + task = t.load(self.ops_dir, self.workflow_dir) + self._resolve_parameters(task) + if isinstance(task, WorkflowSpec): + self._load_inner_workflow(task, k) + else: + self.index[k] = GraphNodeType(k, task) + self.add_node(self.index[k]) + for edge in self.workflow_spec.edges: + for destination in edge.destination: + self._add_workflow_edge_to_graph(edge.origin, destination) + + return self.index + + def _update_workflow_spec_sources( + self, + sources: Dict[str, List[str]], + included_workflow_spec: WorkflowSpec, + prefix: str, + ) -> Iterable[Tuple[str, List[str]]]: + for sourcename, targets in sources.items(): + tmp = [] + for target in targets: + target_task, target_source_name = parse_edge_string(target, -1) + if target_task != prefix: + tmp.append(target) + else: + if target_source_name not in included_workflow_spec.sources: + raise ValueError( + f"Could not find source '{target_source_name}' " + f"in inner workflow '{prefix}'" + ) + tmp.extend( + [ + f"{prefix}.{t}" + for t in included_workflow_spec.sources[target_source_name] + ] + ) + yield sourcename, tmp + + def _update_workflow_spec_sinks( + self, + sinks: Dict[str, str], + included_workflow_spec: WorkflowSpec, + prefix: str, + ) -> Iterable[Tuple[str, str]]: + for name, real_sink in sinks.items(): + sink_task, sink_name = parse_edge_string(real_sink, -1) + if sink_task != prefix: + yield name, real_sink + else: + if sink_name not in included_workflow_spec.sinks: + raise ValueError( + f"Could not find sink '{sink_name}' in inner workflow '{prefix}'" + ) + yield name, f"{prefix}.{included_workflow_spec.sinks[sink_name]}" + + def _update_workflow_spec_edges( + self, edges: List[WorkflowSpecEdge], included_workflow_spec: WorkflowSpec, prefix: str + ) -> Iterable[WorkflowSpecEdge]: + for edge in edges: + tmp = self._update_edge_destinations(edge, included_workflow_spec, prefix) + yield self._update_edge_origin(tmp, included_workflow_spec, prefix) + + def _update_edge_destinations( + self, edge: WorkflowSpecEdge, included_workflow_spec: WorkflowSpec, prefix: str + ) -> WorkflowSpecEdge: + new_edge = WorkflowSpecEdge(edge.origin, []) + for destination in edge.destination: + matched = False + for source, targets in included_workflow_spec.sources.items(): + sourcename = f"{prefix}.{source}" + if destination == sourcename: + new_edge.destination.extend( + [f"{prefix}.{target}" for target in targets], + ) + # Mask the match + matched = True + # If we match one source, we won't match others, so we're done + break + if not matched: + # We don't have any matches, let's put it back in the list + new_edge.destination.append(destination) + return new_edge + + def _update_edge_origin( + self, edge: WorkflowSpecEdge, included_workflow_spec: WorkflowSpec, prefix: str + ) -> WorkflowSpecEdge: + for spec_name, real_name in included_workflow_spec.sinks.items(): + if f"{prefix}.{spec_name}" == edge.origin: + edge.origin = f"{prefix}.{real_name}" + # We updated the edge, our work is done + return edge + return edge + + def _validate_all_inputs_connected(self): + inputs = { + f"{name}.{port}": False + for name, node in self.index.items() + for port in node.spec.inputs_spec + } + + for node, ports in self.sources.items(): + for port in ports: + key = f"{node.name}.{port}" + if key not in inputs: + raise ValueError(f"'{key}' not in inputs dictionary") + inputs[key] = True + + for _, destination, label in self.edges: + key = f"{destination.name}.{label.dstport}" + if key not in inputs: + raise ValueError(f"'{key}' not in inputs dictionary") + inputs[key] = True + + missing: List[str] = [] + for key, value in inputs.items(): + if not value: + missing.append(f"'{key}'") + + if missing: + s = "s" if len(missing) > 1 else "" + raise ValueError( + f"Operation{s} port{s} {','.join(missing)} missing inputs. " + "All tasks in a workflow must have all their inputs filled" + ) + return True + + def _validate_edges_io(self): + def check_compatible_io(edge: WorkflowEdge) -> None: + origin, destination, label = edge + origin_type = get_base_type(origin.spec.output_spec[label.srcport]) + destination_type = get_base_type(destination.spec.inputs_spec[label.dstport]) + if not issubclass(origin_type, destination_type): + raise ValueError( + "Incompatible types for edge " + f'"{origin.name}.{label.srcport}" ({origin_type.__name__})' + f' -> "{destination.name}.{label.dstport}" ({destination_type.__name__})' + ) + + for edge in self.edges: + check_compatible_io(edge) + + def _validate_sinks_exist(self): + for node, ports in self.sinks.items(): + for port in ports: + if port not in node.spec.output_spec: + raise ValueError(f"'{node.name}.{port}' not in op output spec") + + def __getitem__(self, op_name: str) -> OperationSpec: + for op in self.nodes: + if op.name == op_name: + return op.spec + raise KeyError(f"op {op_name} does not exist") + + @property + def name(self): + return self.workflow_spec.name + + @property + def inputs_spec(self) -> TypeDictVibe: + spec = {} + for k, v in self.source_mappings.items(): + try: + spec[k] = get_most_specific_type([self._get_type_for(i) for i in v]) + except ValueError as e: + raise ValueError(f"Workflow source '{k}' contains incompatible types. {e}") + return TypeDictVibe(spec) + + @property + def output_spec(self): + return TypeDictVibe({k: self._get_type_for(v) for k, v in self.sink_mappings.items()}) + + @property + def sources(self) -> Dict[GraphNodeType, List[str]]: + return {k: v for k, v in self._sources.items()} + + @property + def sinks(self) -> Dict[GraphNodeType, List[str]]: + return {k: v for k, v in self._sinks.items()} + + @property + def edges(self) -> List[WorkflowEdge]: + return [WorkflowEdge(e) for e in super().edges] + + def edges_from(self, node: GraphNodeType) -> List[WorkflowEdge]: + return [WorkflowEdge(e) for e in super().edges_from(node)] + + def edge_to(self, node: GraphNodeType, port_name: str): + edges = [e for e in self.edges if e[LABEL].dstport == port_name and e[DESTINATION] is node] + port_str = f"'{node.name}.{port_name}'" + if not edges: + raise ValueError(f"{port_str} is not a destination of any port") + if len(edges) > 1: + # Something went very wrong if we are here + raise RuntimeError(f"Found multiple edges with '{port_str}' as destination") + return edges[0] + + def get_node(self, op_name: str) -> WorkflowSpecNode: + return self.workflow_spec.tasks[op_name] + + def get_op_dir(self, op_name: str) -> Optional[str]: + return self.workflow_spec.tasks[op_name].op_dir + + def get_op_parameter(self, op_name: str) -> Optional[Dict[str, Any]]: + return self.workflow_spec.tasks[op_name].parameters + + @classmethod + def build( + cls, + workflow_path: str, + ops_base_dir: str = DEFAULT_OPS_DIR, + workflow_base_dir: str = get_workflow_dir(), + parameters_override: Optional[Dict[str, Any]] = None, + ) -> "Workflow": + spec = WorkflowParser.parse( + workflow_path, + ops_base_dir, + workflow_base_dir, + parameters_override, + ) + WorkflowSpecValidator.validate(spec) + return Workflow(spec) + + +def load_workflow_by_name( + name: str, + ops_dir: str = DEFAULT_OPS_DIR, + workflow_dir: str = get_workflow_dir(), +) -> Workflow: + """Loads a workflow in the format returned by `list_workflows()`""" + + return Workflow.build( + os.path.join(workflow_dir, f"{name}.yaml"), + ops_base_dir=ops_dir, + workflow_base_dir=workflow_dir, + ) diff --git a/workflows/data_ingestion/admag/admag_seasonal_field.yaml b/workflows/data_ingestion/admag/admag_seasonal_field.yaml new file mode 100644 index 00000000..7f7d7454 --- /dev/null +++ b/workflows/data_ingestion/admag/admag_seasonal_field.yaml @@ -0,0 +1,53 @@ +name: admag_seasonal_field +sources: + admag_input: + - admag_seasonal_field.admag_input +sinks: + seasonal_field: admag_seasonal_field.seasonal_field +parameters: + base_url: + client_id: + client_secret: + authority: + default_scope: +tasks: + admag_seasonal_field: + op: admag_seasonal_field + op_dir: admag + parameters: + base_url: "@from(base_url)" + client_id: "@from(client_id)" + client_secret: "@from(client_secret)" + authority: "@from(authority)" + default_scope: "@from(default_scope)" +description: + short_description: + Generates SeasonalFieldInformation using ADMAg (Microsoft Azure Data + Manager for Agriculture). + long_description: + The workflow creates a DataVibe subclass SeasonalFieldInformation that + contains farm-related operations (e.g., fertilization, harvest, tillage, + planting, crop name). + sources: + admag_input: Unique identifiers for ADMAg seasonal field, and party. + sinks: + seasonal_field: + Crop SeasonalFieldInformation which contains SeasonalFieldInformation that + contains farm-related operations (e.g., fertilization, harvest, tillage, + planting, crop name). + parameters: + base_url: + Azure Data Manager for Agriculture host. Please visit https://aka.ms/farmvibesDMA to check how + to get these credentials. + client_id: + Azure Data Manager for Agriculture client id. Please visit https://aka.ms/farmvibesDMA to check how + to get these credentials. + client_secret: + Azure Data Manager for Agriculture client secret. Please visit https://aka.ms/farmvibesDMA to check how + to get these credentials. + authority: + Azure Data Manager for Agriculture authority. Please visit https://aka.ms/farmvibesDMA to check how + to get these credentials. + default_scope: + Azure Data Manager for Agriculture default scope. Please visit https://aka.ms/farmvibesDMA to check how + to get these credentials. diff --git a/workflows/data_ingestion/admag/prescriptions.yaml b/workflows/data_ingestion/admag/prescriptions.yaml new file mode 100644 index 00000000..5acf812b --- /dev/null +++ b/workflows/data_ingestion/admag/prescriptions.yaml @@ -0,0 +1,75 @@ +name: admag_prescritpions +sources: + admag_input: + - list_prescriptions.admag_input + - admag_prescriptions.admag_input +sinks: + response: admag_prescriptions.response +parameters: + base_url: + client_id: + client_secret: + authority: + default_scope: +tasks: + list_prescriptions: + op: list_prescriptions + op_dir: admag + parameters: + base_url: "@from(base_url)" + client_id: "@from(client_id)" + client_secret: "@from(client_secret)" + authority: "@from(authority)" + default_scope: "@from(default_scope)" + get_prescription: + op: get_prescription + op_dir: admag + parameters: + base_url: "@from(base_url)" + client_id: "@from(client_id)" + client_secret: "@from(client_secret)" + authority: "@from(authority)" + default_scope: "@from(default_scope)" + admag_prescriptions: + op: prescriptions + op_dir: admag + parameters: + base_url: "@from(base_url)" + client_id: "@from(client_id)" + client_secret: "@from(client_secret)" + authority: "@from(authority)" + default_scope: "@from(default_scope)" +edges: + - origin: list_prescriptions.prescriptions + destination: + - get_prescription.prescription_without_geom_input + - origin: get_prescription.prescription_with_geom + destination: + - admag_prescriptions.prescriptions_with_geom_input +description: + short_description: Fetches prescriptions using ADMAg (Microsoft Azure Data Manager for Agriculture). + long_description: + The workflow fetch prescriptions (sensor samples) linked to prescription_map_id. Each sensor sample have + the information of nutrient (Nitrogen, Carbon, Phosphorus, pH, Latitude, Longitude etc., ). The Latitude & + Longitude used to create a point geometry. Geometry and nutrient information transformed to GeoJSON. The GeoJSON + stored as asset in farmvibes-ai. + sources: + admag_input: Required inputs to access ADMAg resources, party_id and prescription_map_id that helps fetching prescriptions. + sinks: + response: Prescriptions received from ADMAg. + parameters: + base_url: + URL to access the registered app. Refer this url to create required resources for admag. + https://learn.microsoft.com/en-us/azure/data-manager-for-agri/quickstart-install-data-manager-for-agriculture + + client_id: + Value uniquely identifies registered application in the Microsoft identity platform. Visit url + https://learn.microsoft.com/en-us/azure/data-manager-for-agri/quickstart-install-data-manager-for-agriculture + to register the app. + client_secret: + Sometimes called an application password, a client secret is a string value your app can use in place of a certificate + to identity itself. + authority: + The endpoint URIs for your app are generated automatically when you register or configure your app. It is used by + client to obtain authorization from the resource owner + default_scope: URL for default azure OAuth2 permissions diff --git a/workflows/data_ingestion/airbus/airbus_download.yaml b/workflows/data_ingestion/airbus/airbus_download.yaml new file mode 100644 index 00000000..14da8a3c --- /dev/null +++ b/workflows/data_ingestion/airbus/airbus_download.yaml @@ -0,0 +1,33 @@ +name: airbus_download +sources: + user_input: + - list.input_item +sinks: + raster: download.downloaded_products +parameters: + api_key: +tasks: + list: + op: list_airbus_products + parameters: + api_key: "@from(api_key)" + download: + op: download_airbus + parameters: + api_key: "@from(api_key)" +edges: + - origin: list.airbus_products + destination: + - download.airbus_products +description: + short_description: Downloads available AirBus imagery for the input geometry and time range. + long_description: + The workflow will check available imagery, using the AirBus API, that contains the input + geometry and inside the input time range. Matching images will be purchased (if they are not + already in the user's library) and downloaded. This workflow requires an AirBus API key. + sources: + user_input: Time range and geometry of interest. + sinks: + raster: AirBus raster. + parameters: + api_key: AirBus API key. Required to run the workflow. diff --git a/workflows/data_ingestion/airbus/airbus_price.yaml b/workflows/data_ingestion/airbus/airbus_price.yaml new file mode 100644 index 00000000..ea2162ce --- /dev/null +++ b/workflows/data_ingestion/airbus/airbus_price.yaml @@ -0,0 +1,34 @@ +name: airbus_price +sources: + user_input: + - list.input_item +sinks: + price: price.products_price +parameters: + api_key: +tasks: + list: + op: list_airbus_products + parameters: + api_key: "@from(api_key)" + price: + op: price_airbus_products + parameters: + api_key: "@from(api_key)" +edges: + - origin: list.airbus_products + destination: + - price.airbus_products +description: + short_description: Prices available AirBus imagery for the input geometry and time range. + long_description: + The workflow will check available imagery, using the AirBus API, that contains the input + geometry inside the input time range. The aggregate price (in kB) for matching images will be + computed, discounting images already in the user's library. This workflow requires an AirBus API + key. + sources: + user_input: Time range and geometry of interest. + sinks: + price: Price for all matching imagery. + parameters: + api_key: AirBus API key. Required to run the workflow. diff --git a/workflows/data_ingestion/alos/alos_forest_extent_download.yaml b/workflows/data_ingestion/alos/alos_forest_extent_download.yaml new file mode 100644 index 00000000..2035be5a --- /dev/null +++ b/workflows/data_ingestion/alos/alos_forest_extent_download.yaml @@ -0,0 +1,29 @@ +name: alos_forest_extent_download +sources: + user_input: + - list.input_data +sinks: + downloaded_product: download.raster +parameters: + pc_key: +tasks: + list: + op: list_alos_products + download: + op: download_alos + parameters: + pc_key: "@from(pc_key)" +edges: + - origin: list.alos_products + destination: + - download.product +description: + short_description: Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map. + long_description: + The workflow lists all ALOS forest/non-forest classification products that intersect with the input + geometry and time range (available range 2015-2020), then downloads the data for + each of them. The data will be returned in the form of rasters. + sources: + user_input: Geometry of interest for which to download the ALOS forest/non-forest classification map. + sinks: + downloaded_product: Downloaded ALOS forest/non-forest classification map. \ No newline at end of file diff --git a/workflows/data_ingestion/alos/alos_forest_extent_download_merge.yaml b/workflows/data_ingestion/alos/alos_forest_extent_download_merge.yaml new file mode 100644 index 00000000..ccb94db1 --- /dev/null +++ b/workflows/data_ingestion/alos/alos_forest_extent_download_merge.yaml @@ -0,0 +1,41 @@ +name: alos_forest_extent_download_merge +sources: + user_input: + - alos_forest_extent_download.user_input +sinks: + merged_raster: merge.raster + categorical_raster: alos_forest_extent_download.downloaded_product +parameters: + pc_key: +tasks: + alos_forest_extent_download: + workflow: data_ingestion/alos/alos_forest_extent_download + parameters: + pc_key: "@from(pc_key)" + group_rasters_by_time: + op: group_rasters_by_time + parameters: + criterion: "year" + merge: + op: merge_rasters +edges: + - origin: alos_forest_extent_download.downloaded_product + destination: + - group_rasters_by_time.rasters + - origin: group_rasters_by_time.raster_groups + destination: + - merge.raster_sequence +description: + short_description: + Downloads Advanced Land Observing Satellite (ALOS) forest/non-forest classification map and merges it into a single raster. + long_description: + The workflow lists the ALOS forest/non-forest classification products that intersect with the input + geometry and time range (available range 2015-2020), and downloads the filtered products. The + workflow processes the downloaded products and merge them into a single raster. + sources: + user_input: Geometry of interest for which to download the ALOS forest/non-forest classification map. + sinks: + merged_raster: ALOS forest/non-forest classification products converted to raster and merged. + categorical_raster: ALOS forest/non-forest classification products that intersect with the input geometry & time range. + parameters: + pc_key: Planetary computer API key. \ No newline at end of file diff --git a/workflows/data_ingestion/bing/basemap_download.yaml b/workflows/data_ingestion/bing/basemap_download.yaml new file mode 100644 index 00000000..f4d04c4f --- /dev/null +++ b/workflows/data_ingestion/bing/basemap_download.yaml @@ -0,0 +1,35 @@ +name: basemap_download +sources: + input_geometry: + - list.user_input +sinks: + basemaps: download.basemap +parameters: + api_key: + zoom_level: +tasks: + list: + op: list_bing_maps + parameters: + api_key: "@from(api_key)" + zoom_level: "@from(zoom_level)" + download: + op: download_bing_basemap + parameters: + api_key: "@from(api_key)" +edges: + - origin: list.products + destination: + - download.input_product +description: + short_description: + Downloads Bing Maps basemaps. + long_description: + The workflow will list all tiles intersecting with the input geometry for a given zoom level + and download a basemap for each of them using Bing Maps API. The basemap tiles will be returned + as individual rasters. + sources: + input_geometry: Geometry of interest for which to download the basemap tiles. + sinks: + basemaps: Downloaded basemaps. + diff --git a/workflows/data_ingestion/bing/basemap_download_merge.yaml b/workflows/data_ingestion/bing/basemap_download_merge.yaml new file mode 100644 index 00000000..84a92fc8 --- /dev/null +++ b/workflows/data_ingestion/bing/basemap_download_merge.yaml @@ -0,0 +1,41 @@ +name: basemap_download_merge +sources: + input_geometry: + - basemap_download.input_geometry +sinks: + merged_basemap: merge.raster +parameters: + api_key: + zoom_level: + merge_resolution: highest +tasks: + basemap_download: + workflow: data_ingestion/bing/basemap_download + parameters: + api_key: "@from(api_key)" + zoom_level: "@from(zoom_level)" + to_sequence: + op: list_to_sequence + merge: + op: merge_rasters + parameters: + resolution: "@from(merge_resolution)" +edges: + - origin: basemap_download.basemaps + destination: + - to_sequence.list_rasters + - origin: to_sequence.rasters_seq + destination: + - merge.raster_sequence +description: + short_description: + Downloads Bing Maps basemap tiles and merges them into a single raster. + long_description: + The workflow will list all tiles intersecting with the input geometry for a given zoom level, + and download a basemap for each of them using Bing Maps API. The basemaps will be merged into + a single raster with the union of the geometries of all tiles. + sources: + input_geometry: Geometry of interest for which to download the basemap tiles. + sinks: + merged_basemap: Merged basemap raster. + diff --git a/workflows/data_ingestion/cdl/download_cdl.yaml b/workflows/data_ingestion/cdl/download_cdl.yaml new file mode 100644 index 00000000..e04fd1e3 --- /dev/null +++ b/workflows/data_ingestion/cdl/download_cdl.yaml @@ -0,0 +1,26 @@ +name: download_cdl +sources: + user_input: + - list_cdl.input_item +sinks: + raster: download_cdl.cdl_raster +tasks: + list_cdl: + op: list_cdl_products + download_cdl: + op: download_cdl + op_dir: download_cdl_data +edges: + - origin: list_cdl.cdl_products + destination: + - download_cdl.input_product +description: + short_description: Downloads crop classes maps in the continental USA for the input time range. + long_description: + The workflow will download crop-specific land cover maps from the USDA Cropland Data Layer, + available for the continental United States. The input geometry must intersect with the coverage + area. + sources: + user_input: Time range and geometry of interest. + sinks: + raster: CDL land cover raster. diff --git a/workflows/data_ingestion/dem/download_dem.yaml b/workflows/data_ingestion/dem/download_dem.yaml new file mode 100644 index 00000000..083198bc --- /dev/null +++ b/workflows/data_ingestion/dem/download_dem.yaml @@ -0,0 +1,39 @@ +name: download_dem +sources: + user_input: + - list.input_items +sinks: + raster: download.downloaded_product +parameters: + pc_key: + resolution: 10 + provider: "USGS3DEP" +tasks: + list: + op: list_dem_products + parameters: + resolution: "@from(resolution)" + provider: "@from(provider)" + download: + op: download_dem + parameters: + api_key: "@from(pc_key)" +edges: + - origin: list.dem_products + destination: + - download.input_product +description: + short_description: Downloads digital elevation map tiles that intersect with the input geometry and time range. + long_description: + The workflow will download digital elevation maps from the USGS 3DEP datasets (available + for the United States at 10 and 30 meters) or Copernicus DEM GLO-30 (globally at 30 meters) + through the Planetary Computer. For more information, see https://planetarycomputer.microsoft.com/dataset/3dep-seamless + and https://planetarycomputer.microsoft.com/dataset/cop-dem-glo-30 . + sources: + user_input: Time range and geometry of interest. + sinks: + raster: DEM raster. + parameters: + pc_key: Optional Planetary Computer API key. + resolution: Spatial resolution of the DEM. 10m and 30m are available. + provider: Provider of the DEM. "USGS3DEP" and "CopernicusDEM30" are available. diff --git a/workflows/data_ingestion/gedi/download_gedi.yaml b/workflows/data_ingestion/gedi/download_gedi.yaml new file mode 100644 index 00000000..d11dbd08 --- /dev/null +++ b/workflows/data_ingestion/gedi/download_gedi.yaml @@ -0,0 +1,35 @@ +name: download_gedi +sources: + user_input: + - list.input_data +sinks: + product: download.downloaded_product +parameters: + earthdata_token: + processing_level: +tasks: + list: + op: list_gedi_products + parameters: + processing_level: "@from(processing_level)" + download: + op: download_gedi_product + parameters: + token: "@from(earthdata_token)" +edges: + - origin: list.gedi_products + destination: + - download.gedi_product +description: + short_description: Downloads GEDI products for the input region and time range. + long_description: + The workflow downloads Global Ecosystem Dynamics Investigation (GEDI) products at the desired + processing level using NASA's EarthData API. This workflow requires an EarthData API token. + sources: + user_input: Time range and geometry of interest. + sinks: + product: GEDI products. + parameters: + earthdata_token: API token for the EarthData platform. Required to run the workflow. + processing_level: + GEDI product processing level. One of 'GEDI01_B.002', 'GEDI02_A.002', 'GEDI02_B.002'. diff --git a/workflows/data_ingestion/gedi/download_gedi_rh100.yaml b/workflows/data_ingestion/gedi/download_gedi_rh100.yaml new file mode 100644 index 00000000..260a221f --- /dev/null +++ b/workflows/data_ingestion/gedi/download_gedi_rh100.yaml @@ -0,0 +1,35 @@ +name: download_gedi_rh100 +sources: + user_input: + - download.user_input + - extract.roi +sinks: + rh100: extract.rh100 +parameters: + earthdata_token: + check_quality: +tasks: + download: + workflow: data_ingestion/gedi/download_gedi + parameters: + earthdata_token: "@from(earthdata_token)" + extract: + op: extract_gedi_rh100 + parameters: + check_quality: "@from(check_quality)" +edges: + - origin: download.product + destination: + - extract.gedi_product +description: + short_description: Downloads L2B GEDI products and extracts RH100 variables. + long_description: + The workflow will download the products for the input region and time range, and then extract + RH100 variables for each of the beam shots. Each value is geolocated according to the lowest + mode latitude and longitude values. + sources: + user_input: Time range and geometry of interest. + sinks: + rh100: Points in EPSG:4326 with their associated RH100 values. + parameters: + check_quality: Whether to filter points according to the quality flag. diff --git a/workflows/data_ingestion/glad/glad_forest_extent_download.yaml b/workflows/data_ingestion/glad/glad_forest_extent_download.yaml new file mode 100644 index 00000000..5d36f8ef --- /dev/null +++ b/workflows/data_ingestion/glad/glad_forest_extent_download.yaml @@ -0,0 +1,27 @@ +name: glad_forest_extent_download +sources: + input_item: + - list.input_item +sinks: + downloaded_product: download.downloaded_product +parameters: +tasks: + list: + op: list_glad_products + download: + op: download_glad + op_dir: download_glad_data +edges: + - origin: list.glad_products + destination: + - download.glad_product +description: + short_description: + Downloads Global Land Analysis (GLAD) forest extent data. + long_description: + The workflow will list all GLAD forest extent products that intersect with the input geometry + and download the data for each of them. The data will be returned as rasters. + sources: + input_item: Geometry of interest for which to download the GLAD forest extent data. + sinks: + downloaded_product: Downloaded GLAD forest extent product. diff --git a/workflows/data_ingestion/glad/glad_forest_extent_download_merge.yaml b/workflows/data_ingestion/glad/glad_forest_extent_download_merge.yaml new file mode 100644 index 00000000..da52f9d8 --- /dev/null +++ b/workflows/data_ingestion/glad/glad_forest_extent_download_merge.yaml @@ -0,0 +1,41 @@ +name: glad_forest_extent_download_merge +sources: + input_item: + - glad_forest_extent_download.input_item +parameters: +sinks: + merged_product: merge.raster + categorical_raster: glad_forest_extent_download.downloaded_product +tasks: + glad_forest_extent_download: + workflow: data_ingestion/glad/glad_forest_extent_download + group_rasters_by_time: + op: group_rasters_by_time + parameters: + criterion: "year" + merge: + op: merge_rasters +edges: + - origin: glad_forest_extent_download.downloaded_product + destination: + - group_rasters_by_time.rasters + - origin: group_rasters_by_time.raster_groups + destination: + - merge.raster_sequence +description: + short_description: + Downloads the tiles from Global Land Analysis (GLAD) forest data that intersect with the user input geometry and time range, and merges them into a single raster. + long_description: + The workflow lists the GLAD forest products that intersect with the input + geometry and time range, and downloads the filtered products. The downloaded + products are merged into a single raster and classified. The result tiles have + pixel values categorized into two classes - 0 (non-forest) and 1 (forest). + This workflow uses the same forest definition as the Food and Agriculture + Organization of the United Nations (FAO). + sources: + input_item: Geometry of interest for which to download the GLAD forest extent data. + sinks: + merged_product: Merged GLAD forest extent product to geometry of interest. + categorical_raster: Raster with the GLAD forest extent data. + + \ No newline at end of file diff --git a/workflows/data_ingestion/gnatsgo/download_gnatsgo.yaml b/workflows/data_ingestion/gnatsgo/download_gnatsgo.yaml new file mode 100644 index 00000000..3f6884f7 --- /dev/null +++ b/workflows/data_ingestion/gnatsgo/download_gnatsgo.yaml @@ -0,0 +1,79 @@ +name: download_gnatsgo +sources: + user_input: + - list.input_item +sinks: + raster: download.downloaded_raster +parameters: + pc_key: + variable: soc0_5 +tasks: + list: + op: list_gnatsgo_products + download: + op: download_gnatsgo + parameters: + api_key: "@from(pc_key)" + variable: "@from(variable)" +edges: + - origin: list.gnatsgo_products + destination: + - download.gnatsgo_product +description: + short_description: + Downloads gNATSGO raster data that intersect with the input geometry and time range. + long_description: + This workflow lists and downloads raster products of gNATSGO dataset from Planetary Computer. + Input geometry must fall within Continel USA, whereas input time range can be arbitrary (all + gNATSGO assets are from 2020-07-01). For more information on the available properties, see + https://planetarycomputer.microsoft.com/dataset/gnatsgo-rasters. + sources: + user_input: Geometry of interest (arbitrary time range). + sinks: + raster: Raster with desired property. + parameters: + pc_key: Optional Planetary Computer API key. + variable: >- + Options are: + aws{DEPTH} - Available water storage estimate (AWS) for the DEPTH zone. + soc{DEPTH} - Soil organic carbon stock estimate (SOC) for the DEPTH zone. + tk{DEPTH}a - Thickness of soil components used in the DEPTH zone for the AWS calculation. + tk{DEPTH}s - Thickness of soil components used in the DEPTH zone for the SOC calculation. + mukey - Map unit key, a unique identifier of a record for matching with gNATSGO tables. + droughty - Drought vulnerability estimate. + nccpi3all - National Commodity Crop Productivity Index that has the highest value among Corn + and Soybeans, Small Grains, or Cotton for major earthy components. + nccpi3corn - National Commodity Crop Productivity Index for Corn for major earthy + components. + nccpi3cot - National Commodity Crop Productivity Index for Cotton for major earthy + components. + nccpi3sg - National Commodity Crop Productivity Index for Small Grains for major earthy + components. + nccpi3soy - National Commodity Crop Productivity Index for Soy for major earthy components. + pctearthmc - National Commodity Crop Productivity Index map unit percent earthy is the map + unit summed comppct_r for major earthy components. + pwsl1pomu - Potential Wetland Soil Landscapes (PWSL). + rootznaws - Root zone (commodity crop) available water storage estimate (RZAWS). + rootznemc - Root zone depth is the depth within the soil profile that commodity crop (cc) + roots can effectively extract water and nutrients for growth. + musumcpct - Sum of the comppct_r (SSURGO component table) values for all listed components + in the map unit. + musumcpcta - Sum of the comppct_r (SSURGO component table) values used in the available + water storage calculation for the map unit. + musumcpcts - Sum of the comppct_r (SSURGO component table) values used in the soil organic + carbon calculation for the map unit. + + gNATSGO has properties available for multiple soil + depths. You may exchange DEPTH in the variable names above for any of the following (all + measured in cm): + 0_5 + 0_20 + 0_30 + 5_20 + 0_100 + 0_150 + 0_999 + 20_50 + 50_100 + 100_150 + 150_999 diff --git a/workflows/data_ingestion/hansen/hansen_forest_change_download.yaml b/workflows/data_ingestion/hansen/hansen_forest_change_download.yaml new file mode 100644 index 00000000..b682d005 --- /dev/null +++ b/workflows/data_ingestion/hansen/hansen_forest_change_download.yaml @@ -0,0 +1,57 @@ +name: glad_forest_change_download +sources: + input_item: + - list.input_item +sinks: + merged_raster: merge.raster + downloaded_raster: download.raster +parameters: + layer_name: + tiles_folder_url: https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/ +tasks: + list: + op: list_hansen_products + parameters: + tiles_folder_url: "@from(tiles_folder_url)" + layer_name: "@from(layer_name)" + download: + op: download_hansen + group: + op: group_rasters_by_time + parameters: + criterion: "year" + merge: + op: merge_rasters +edges: + - origin: list.hansen_products + destination: + - download.hansen_product + - origin: download.raster + destination: + - group.rasters + - origin: group.raster_groups + destination: + - merge.raster_sequence +description: + short_description: Downloads and merges Global Forest Change (Hansen) rasters that intersect the user-provided geometry/time range. + long_description: + The workflow lists Global Forest Change (Hansen) products that intersect the + user-provided geometry/time range, downloads the data for each of them, and + merges the rasters. The dataset is available at 30m resolution and is + updated annually. The data contains information on forest cover, loss, and + gain. The default dataset version is GFC-2022-v1.10 and is passed to the + workflow as the parameter tiles_folder_url. For the default version, the + dataset is available from 2000 to 2022. Dataset details can be found at + https://storage.googleapis.com/earthenginepartners-hansen/GFC-2022-v1.10/download.html. + sources: + input_item: User-provided geometry and time range. + sinks: + merged_raster: Merged Global Forest Change (Hansen) data as a raster. + downloaded_raster: Individual Global Forest Change (Hansen) rasters prior to the merge operation. + parameters: + tiles_folder_url: + URL to the Global Forest Change (Hansen) dataset. It specifies the dataset + version and is used to download the data. + layer_name: + Name of the Global Forest Change (Hansen) layer. Can be any of the following names + 'treecover2000', 'loss', 'gain', 'lossyear', 'datamask', 'first', 'last'. \ No newline at end of file diff --git a/workflows/data_ingestion/landsat/preprocess_landsat.yaml b/workflows/data_ingestion/landsat/preprocess_landsat.yaml new file mode 100644 index 00000000..303f96f3 --- /dev/null +++ b/workflows/data_ingestion/landsat/preprocess_landsat.yaml @@ -0,0 +1,43 @@ +name: preprocess_landsat +sources: + user_input: + - list.input_item +sinks: + raster: stack.landsat_raster +parameters: + pc_key: + qa_mask_value: 64 +tasks: + list: + op: list_landsat_products_pc + download: + op: download_landsat_from_pc + parameters: + api_key: "@from(pc_key)" + stack: + op: stack_landsat + parameters: + qa_mask_value: "@from(qa_mask_value)" +edges: + - origin: list.landsat_products + destination: + - download.landsat_product + - origin: download.downloaded_product + destination: + - stack.landsat_product +description: + short_description: + Downloads and preprocesses LANDSAT tiles that intersect with the input geometry and time range. + long_description: + The workflow will download the tile bands from the Planetary Computer and stack them into a + single raster at 30m resolution. + sources: + user_input: Time range and geometry of interest. + sinks: + raster: LANDSAT rasters at 30m resolution. + parameters: + pc_key: Optional Planetary Computer API key. + qa_mask_value: + Bitmap for which pixel to be included. See documentation for each bit in + https://www.usgs.gov/media/images/landsat-collection-2-pixel-quality-assessment-bit-index + For example, the default value 64 (i.e. 1<<6 ) corresponds to "Clear" pixels diff --git a/workflows/data_ingestion/modis/download_modis_surface_reflectance.yaml b/workflows/data_ingestion/modis/download_modis_surface_reflectance.yaml new file mode 100644 index 00000000..bdda6ae0 --- /dev/null +++ b/workflows/data_ingestion/modis/download_modis_surface_reflectance.yaml @@ -0,0 +1,41 @@ +name: download_modis_surface_reflectance +sources: + user_input: + - list.input_data +sinks: + raster: download.raster +parameters: + pc_key: + resolution_m: +tasks: + list: + op: list_modis_sr + parameters: + resolution: "@from(resolution_m)" + download: + op: download_modis_sr + parameters: + pc_key: "@from(pc_key)" +edges: + - origin: list.modis_products + destination: + - download.product +description: + short_description: + Downloads MODIS 8-day surface reflectance rasters that intersect with the input geometry and + time range. + long_description: + The workflow will download MODIS raster images either at 250m or 500m resolution. The products + are available at a 8-day interval and pixel values are selected based on low clouds, low view + angle, and highest index value. Notice that only bands 1, 2 and quality control are available + on 250m. + For more information, see + https://planetarycomputer.microsoft.com/dataset/modis-09Q1-061 + https://planetarycomputer.microsoft.com/dataset/modis-09A1-061 + sources: + user_input: Time range and geometry of interest. + sinks: + raster: Products containing MODIS reflectance bands and data. + parameters: + pc_key: Optional Planetary Computer API key. + resolution_m: Product resolution, in meters. Either 250 or 500. diff --git a/workflows/data_ingestion/modis/download_modis_vegetation_index.yaml b/workflows/data_ingestion/modis/download_modis_vegetation_index.yaml new file mode 100644 index 00000000..cb513533 --- /dev/null +++ b/workflows/data_ingestion/modis/download_modis_vegetation_index.yaml @@ -0,0 +1,42 @@ +name: download_modis_vegetation_index +sources: + user_input: + - list.input_data +sinks: + index: download.index +parameters: + index: + pc_key: + resolution_m: +tasks: + list: + op: list_modis_vegetation + parameters: + resolution: "@from(resolution_m)" + download: + op: download_modis_vegetation + parameters: + pc_key: "@from(pc_key)" + index: "@from(index)" +edges: + - origin: list.modis_products + destination: + - download.product +description: + short_description: + Downloads MODIS 16-day vegetation index products that intersect with the input geometry and time + range. + long_description: + The workflow will download products at the chosen index and resolution. The products are + available at a 16-day interval and pixel values are selected based on low clouds, low view + angle, and highest index value. Vegetation index values range from (-2000 to 10000). + For more information, see https://planetarycomputer.microsoft.com/dataset/modis-13Q1-061 + and https://lpdaac.usgs.gov/products/mod13a1v061/ . + sources: + user_input: Time range and geometry of interest. + sinks: + index: Products containing the chosen index at the chosen resolution. + parameters: + index: Vegetation index that should be downloaded. Either 'evi' or 'ndvi'. + pc_key: Optional Planetary Computer API key. + resolution_m: Product resolution, in meters. Either 250 or 500. diff --git a/workflows/data_ingestion/naip/download_naip.yaml b/workflows/data_ingestion/naip/download_naip.yaml new file mode 100644 index 00000000..cafaf1b1 --- /dev/null +++ b/workflows/data_ingestion/naip/download_naip.yaml @@ -0,0 +1,28 @@ +name: download_naip +sources: + user_input: + - list.input_item +sinks: + raster: download.downloaded_product +parameters: + pc_key: +tasks: + list: + op: list_naip_products + download: + op: download_naip + parameters: + api_key: "@from(pc_key)" +edges: + - origin: list.naip_products + destination: + - download.input_product +description: + short_description: Downloads NAIP tiles that intersect with the input geometry and time range. + long_description: + sources: + user_input: Time range and geometry of interest. + sinks: + raster: NAIP tiles. + parameters: + pc_key: Optional Planetary Computer API key. diff --git a/workflows/data_ingestion/osm_road_geometries.yaml b/workflows/data_ingestion/osm_road_geometries.yaml new file mode 100644 index 00000000..b2f97f8b --- /dev/null +++ b/workflows/data_ingestion/osm_road_geometries.yaml @@ -0,0 +1,37 @@ +name: osm_road_geometries +sources: + user_input: + - download.input_region +sinks: + roads: download.roads +parameters: + network_type: + buffer_size: +tasks: + download: + op: download_road_geometries + parameters: + network_type: "@from(network_type)" + buffer_size: "@from(buffer_size)" +description: + short_description: Downloads road geometry for input region from Open Street Maps. + long_description: + The workflow downloads information from Open Street Maps for the target region and generates + geometries for roads that intercept the input region bounding box. + sources: + user_input: List of external references. + sinks: + roads: Geometry collection with road geometries that intercept the input region bounding box. + parameters: + network_type: >- + Type of roads that will be selected. One of: + - 'drive_service': get drivable streets, including service roads. + - 'walk': get all streets and paths that pedestrians can use (this network type ignores + one-way directionality). + - 'bike': get all streets and paths that cyclists can use. + - 'all': download all non-private OSM streets and paths (this is the default network type + unless you specify a different one). + - 'all_private': download all OSM streets and paths, including private-access ones. + - 'drive': get drivable public streets (but not service roads). + For more information see https://osmnx.readthedocs.io/en/stable/index.html. + buffer_size: Size of buffer, in meters, to search for nodes in OSM. diff --git a/workflows/data_ingestion/sentinel1/preprocess_s1.yaml b/workflows/data_ingestion/sentinel1/preprocess_s1.yaml new file mode 100644 index 00000000..fa718d17 --- /dev/null +++ b/workflows/data_ingestion/sentinel1/preprocess_s1.yaml @@ -0,0 +1,75 @@ +name: preprocess_s1_rtc +sources: + user_input: + - merge_geom_tr.time_range + s2_products: + - union.items + - filter.bounds_items + - tile.sentinel2_products +sinks: + raster: merge.merged_product +parameters: + pc_key: + min_cover: .4 + dl_timeout: +tasks: + union: + op: merge_geometries + merge_geom_tr: + op: merge_geometry_and_time_range + list: + op: list_sentinel1_products_pc + op_dir: list_sentinel1_products + filter: + op: select_necessary_coverage_items + parameters: + min_cover: "@from(min_cover)" + group_attribute: orbit_number + download: + op: download_sentinel1 + parameters: + api_key: "@from(pc_key)" + timeout_s: "@from(dl_timeout)" + tile: + op: tile_sentinel1_rtc + op_dir: tile_sentinel1 + group: + op: group_sentinel1_orbits + merge: + op: merge_sentinel1_orbits +edges: + - origin: union.merged + destination: + - merge_geom_tr.geometry + - origin: merge_geom_tr.merged + destination: + - list.input_item + - origin: list.sentinel_products + destination: + - filter.items + - origin: filter.filtered_items + destination: + - download.sentinel_product + - origin: download.downloaded_product + destination: + - tile.sentinel1_products + - origin: tile.tiled_products + destination: + - group.rasters + - origin: group.raster_groups + destination: + - merge.raster_group +description: + short_description: + Downloads and preprocesses tiles of Sentinel-1 imagery that intersect with the input Sentinel-2 + products in the input time range. + long_description: + The workflow fetches Sentinel-1 tiles that intersects with the Sentinel-2 products, downloads + and preprocesses them, and produces Sentinel-1 rasters in the Sentinel-2 tiling system. + sources: + user_input: Time range of interest. + s2_products: Sentinel-2 products whose geometries are used to select Sentinel-1 tiles. + sinks: + raster: Sentinel-1 rasters in the Sentinel-2 tiling system. + parameters: + pc_key: Planetary Computer API key. diff --git a/workflows/data_ingestion/sentinel2/cloud_ensemble.yaml b/workflows/data_ingestion/sentinel2/cloud_ensemble.yaml new file mode 100644 index 00000000..004e34bc --- /dev/null +++ b/workflows/data_ingestion/sentinel2/cloud_ensemble.yaml @@ -0,0 +1,60 @@ +name: cloud_ensemble +sources: + sentinel_raster: + - cloud1.sentinel_raster + - cloud2.sentinel_raster + - cloud3.sentinel_raster + - cloud4.sentinel_raster + - cloud5.sentinel_raster +sinks: + cloud_probability: ensemble.cloud_probability +tasks: + cloud1: + op: compute_cloud_prob + parameters: + model_path: cloud_model1_cpu.onnx + cloud2: + op: compute_cloud_prob + parameters: + model_path: cloud_model2_cpu.onnx + cloud3: + op: compute_cloud_prob + parameters: + model_path: cloud_model3_cpu.onnx + cloud4: + op: compute_cloud_prob + parameters: + model_path: cloud_model4_cpu.onnx + cloud5: + op: compute_cloud_prob + parameters: + model_path: cloud_model5_cpu.onnx + ensemble: + op: ensemble_cloud_prob +edges: + - origin: cloud1.cloud_probability + destination: + - ensemble.cloud1 + - origin: cloud2.cloud_probability + destination: + - ensemble.cloud2 + - origin: cloud3.cloud_probability + destination: + - ensemble.cloud3 + - origin: cloud4.cloud_probability + destination: + - ensemble.cloud4 + - origin: cloud5.cloud_probability + destination: + - ensemble.cloud5 +description: + short_description: + Computes the cloud probability of a Sentinel-2 L2A raster using an ensemble of five cloud + segmentation models. + long_description: + The workflow computes cloud probabilities for each model independently, and averages them to + obtain a single probability map. + sources: + sentinel_raster: Sentinel-2 L2A raster. + sinks: + cloud_probability: Cloud probability map. diff --git a/workflows/data_ingestion/sentinel2/improve_cloud_mask.yaml b/workflows/data_ingestion/sentinel2/improve_cloud_mask.yaml new file mode 100644 index 00000000..0a1c8452 --- /dev/null +++ b/workflows/data_ingestion/sentinel2/improve_cloud_mask.yaml @@ -0,0 +1,63 @@ +name: improve_cloud_mask +sources: + s2_raster: + - cloud.sentinel_raster + - shadow.sentinel_raster + product_mask: + - merge.product_mask +sinks: + mask: merge.merged_cloud_mask +parameters: + cloud_thr: + shadow_thr: + in_memory: + cloud_model: + shadow_model: +tasks: + cloud: + op: compute_cloud_prob + parameters: + in_memory: "@from(in_memory)" + model_path: "@from(cloud_model)" + shadow: + op: compute_shadow_prob + parameters: + in_memory: "@from(in_memory)" + model_path: "@from(shadow_model)" + merge: + op: merge_cloud_masks_simple + op_dir: merge_cloud_masks + parameters: + cloud_prob_threshold: "@from(cloud_thr)" + shadow_prob_threshold: "@from(shadow_thr)" +edges: + - origin: cloud.cloud_probability + destination: + - merge.cloud_probability + - origin: shadow.shadow_probability + destination: + - merge.shadow_probability +description: + short_description: + Improves cloud masks by merging the product cloud mask with cloud and shadow masks computed by + machine learning segmentation models. + long_description: + This workflow computes cloud and shadow probabilities using segmentation models, thresholds + them, and merges the models' masks with the product mask. + sources: + s2_raster: Sentinel-2 L2A raster. + product_mask: Cloud mask obtained from the product's quality indicators. + sinks: + mask: Improved cloud mask. + parameters: + cloud_thr: Confidence threshold to assign a pixel as cloud. + shadow_thr: Confidence threshold to assign a pixel as shadow. + in_memory: + Whether to load the whole raster in memory when running predictions. Uses more memory + (~4GB/worker) but speeds up inference for fast models. + cloud_model: + ONNX file for the cloud model. Available models are 'cloud_model{idx}_cpu.onnx' with idx ∈ {1, + 2} being FPN-based models, which are more accurate but slower, and idx ∈ {3, 4, 5} being + cheaplab models, which are less accurate but faster. + shadow_model: + ONNX file for the shadow model. 'shadow.onnx' is the only currently available model. diff --git a/workflows/data_ingestion/sentinel2/improve_cloud_mask_ensemble.yaml b/workflows/data_ingestion/sentinel2/improve_cloud_mask_ensemble.yaml new file mode 100644 index 00000000..5460e8e7 --- /dev/null +++ b/workflows/data_ingestion/sentinel2/improve_cloud_mask_ensemble.yaml @@ -0,0 +1,45 @@ +name: improve_cloud_mask_ensemble +sources: + s2_raster: + - cloud.sentinel_raster + - shadow.sentinel_raster + product_mask: + - merge.product_mask +sinks: + mask: merge.merged_cloud_mask +parameters: + cloud_thr: + shadow_thr: +tasks: + cloud: + workflow: data_ingestion/sentinel2/cloud_ensemble + shadow: + op: compute_shadow_prob + merge: + op: merge_cloud_masks_simple + op_dir: merge_cloud_masks + parameters: + cloud_prob_threshold: "@from(cloud_thr)" + shadow_prob_threshold: "@from(shadow_thr)" +edges: + - origin: cloud.cloud_probability + destination: + - merge.cloud_probability + - origin: shadow.shadow_probability + destination: + - merge.shadow_probability +description: + short_description: + Improves cloud masks by merging the product cloud mask with cloud and shadow masks computed by + an ensemble of machine learning segmentation models. + long_description: + This workflow computes cloud and shadow probabilities using and ensemble of segmentation models, + thresholds them, and merges the models' masks with the product mask. + sources: + s2_raster: Sentinel-2 L2A raster. + product_mask: Cloud mask obtained from the product's quality indicators. + sinks: + mask: Improved cloud mask. + parameters: + cloud_thr: Confidence threshold to assign a pixel as cloud. + shadow_thr: Confidence threshold to assign a pixel as shadow. diff --git a/workflows/data_ingestion/sentinel2/preprocess_s2.yaml b/workflows/data_ingestion/sentinel2/preprocess_s2.yaml new file mode 100644 index 00000000..251a698f --- /dev/null +++ b/workflows/data_ingestion/sentinel2/preprocess_s2.yaml @@ -0,0 +1,66 @@ +name: preprocess_s2 +sources: + user_input: + - list.input_item + - filter.bounds_items +sinks: + raster: merge.output_raster + mask: merge.output_mask +parameters: + min_tile_cover: + max_tiles_per_time: + pc_key: + dl_timeout: +tasks: + list: + op: list_sentinel2_products_pc + op_dir: list_sentinel2_products + filter: + op: select_necessary_coverage_items + parameters: + min_cover: "@from(min_tile_cover)" + max_items: "@from(max_tiles_per_time)" + download: + op: download_stack_sentinel2 + parameters: + api_key: "@from(pc_key)" + timeout_s: "@from(dl_timeout)" + group: + op: group_sentinel2_orbits + merge: + op: merge_sentinel2_orbits +edges: + - origin: list.sentinel_products + destination: + - filter.items + - origin: filter.filtered_items + destination: + - download.sentinel_product + - origin: download.raster + destination: + - group.rasters + - origin: download.cloud + destination: + - group.masks + - origin: group.raster_groups + destination: + - merge.raster_group + - origin: group.mask_groups + destination: + - merge.mask_group +description: + short_description: + Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range. + long_description: + This workflow selects a minimum set of tiles that covers the input geometry, downloads + Sentinel-2 imagery for the selected time range, and preprocesses it by generating a single + multi-band raster at 10m resolution. + sources: + user_input: Time range and geometry of interest. + sinks: + raster: Sentinel-2 L2A rasters with all bands resampled to 10m resolution. + mask: Cloud mask at 10m resolution from the product's quality indicators. + parameters: + min_tile_cover: Minimum RoI coverage to consider a set of tiles sufficient. + max_tiles_per_time: Maximum number of tiles used to cover the RoI in each date. + pc_key: Optional Planetary Computer API key. diff --git a/workflows/data_ingestion/sentinel2/preprocess_s2_ensemble_masks.yaml b/workflows/data_ingestion/sentinel2/preprocess_s2_ensemble_masks.yaml new file mode 100644 index 00000000..7c3e6d59 --- /dev/null +++ b/workflows/data_ingestion/sentinel2/preprocess_s2_ensemble_masks.yaml @@ -0,0 +1,46 @@ +name: preprocess_s2_ensemble_masks +sources: + user_input: + - s2.user_input +sinks: + raster: s2.raster + mask: cloud.mask +parameters: + min_tile_cover: + max_tiles_per_time: + cloud_thr: + shadow_thr: + pc_key: +tasks: + s2: + workflow: data_ingestion/sentinel2/preprocess_s2 + parameters: + min_tile_cover: "@from(min_tile_cover)" + max_tiles_per_time: "@from(max_tiles_per_time)" + pc_key: "@from(pc_key)" + cloud: + workflow: data_ingestion/sentinel2/improve_cloud_mask_ensemble + parameters: + cloud_thr: "@from(cloud_thr)" + shadow_thr: "@from(shadow_thr)" +edges: + - origin: s2.raster + destination: + - cloud.s2_raster + - origin: s2.mask + destination: + - cloud.product_mask +description: + short_description: + Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range, and + computes improved cloud masks using an ensemble of cloud and shadow segmentation models. + long_description: + This workflow selects a minimum set of tiles that covers the input geometry, downloads + Sentinel-2 imagery for the selected time range, and preprocesses it by generating a single + multi-band raster at 10m resolution. It then improves cloud masks by merging the product mask + with cloud and shadow masks computed using an ensemble of cloud and shadow segmentation models. + sources: + user_input: Time range and geometry of interest. + sinks: + raster: Sentinel-2 L2A rasters with all bands resampled to 10m resolution. + mask: Cloud masks at 10m resolution. diff --git a/workflows/data_ingestion/sentinel2/preprocess_s2_improved_masks.yaml b/workflows/data_ingestion/sentinel2/preprocess_s2_improved_masks.yaml new file mode 100644 index 00000000..1f8bb126 --- /dev/null +++ b/workflows/data_ingestion/sentinel2/preprocess_s2_improved_masks.yaml @@ -0,0 +1,54 @@ +name: preprocess_s2_improved_masks +sources: + user_input: + - s2.user_input +sinks: + raster: s2.raster + mask: cloud.mask +parameters: + min_tile_cover: + max_tiles_per_time: + cloud_thr: + shadow_thr: + in_memory: + cloud_model: + shadow_model: + pc_key: + dl_timeout: +tasks: + s2: + workflow: data_ingestion/sentinel2/preprocess_s2 + parameters: + min_tile_cover: "@from(min_tile_cover)" + max_tiles_per_time: "@from(max_tiles_per_time)" + pc_key: "@from(pc_key)" + dl_timeout: "@from(dl_timeout)" + cloud: + workflow: data_ingestion/sentinel2/improve_cloud_mask + parameters: + cloud_thr: "@from(cloud_thr)" + shadow_thr: "@from(shadow_thr)" + in_memory: "@from(in_memory)" + cloud_model: "@from(cloud_model)" + shadow_model: "@from(shadow_model)" +edges: + - origin: s2.raster + destination: + - cloud.s2_raster + - origin: s2.mask + destination: + - cloud.product_mask +description: + short_description: + Downloads and preprocesses Sentinel-2 imagery that covers the input geometry and time range, and + computes improved cloud masks using cloud and shadow segmentation models. + long_description: + This workflow selects a minimum set of tiles that covers the input geometry, downloads + Sentinel-2 imagery for the selected time range, and preprocesses it by generating a single + multi-band raster at 10m resolution. It then improves cloud masks by merging the product mask + with cloud and shadow masks computed using cloud and shadow segmentation models. + sources: + user_input: Time range and geometry of interest. + sinks: + raster: Sentinel-2 L2A rasters with all bands resampled to 10m resolution. + mask: Cloud masks at 10m resolution. diff --git a/workflows/data_ingestion/soil/soilgrids.yaml b/workflows/data_ingestion/soil/soilgrids.yaml new file mode 100644 index 00000000..6296b365 --- /dev/null +++ b/workflows/data_ingestion/soil/soilgrids.yaml @@ -0,0 +1,53 @@ +name: soilgrids +sources: + input_item: + - download_soilgrids.input_item +sinks: + downloaded_raster: download_soilgrids.downloaded_raster +parameters: + map: wrb + identifier: MostProbable +tasks: + download_soilgrids: + op: download_soilgrids + parameters: + map: "@from(map)" + identifier: "@from(identifier)" +edges: +description: + short_description: + Downloads digital soil mapping information from SoilGrids for the input geometry. + long_description: >- + The workflow downloads a raster containing the map and identifiers for the input geometry. + SoilGrids is a system for digital soil mapping based on global compilation of soil profile data + and environmental layers. + sources: + input_item: Input geometry. + sinks: + downloaded_raster: Raster with the map and identifiers requested. + parameters: + map: >- + Map to download. Options: + - wrb - World Reference Base classes and probabilites + - bdod - Bulk density - kg/dm^3 + - cec - Cation exchange capacity at ph 7 - cmol(c)/kg + - cfvo - Coarse fragments volumetric) - cm3/100cm3 (vol%) + - clay - Clay content - g/100g (%) + - nitrogen - Nitrogen - g/kg + - phh2o - Soil pH in H2O - pH + - sand - Sand content - g/100g (%) + - silt - Silt content - g/100g (%) + - soc - Soil organic carbon content - g/kg + - ocs - Soil organic carbon stock - kg/m^3 + - ocd - Organic carbon densities - kg/m^3 + identifier: >- + Variable identifier to be downloaded. Depends on map. + - wrb: Acrisols, Albeluvisols, Alisols, Andosols, Arenosols, Calcisols, Cambisols, + Chernozems, Cryosols, Durisols, Ferralsols, Fluvisols, Gleysols, Gypsisols, Histosols, + Kastanozems, Leptosols, Lixisols, Luvisols, MostProbable, Nitisols, Phaeozems, Planosols, + Plinthosols, Podzols, Regosols, Solonchaks, Solonetz, Stagnosols, Umbrisols, Vertisols. + + Other identifiers follow the nomenclature defined in the + [link=https://www.isric.org/explore/soilgrids/faq-soilgrids#What_do_the_filename_codes_mean]SoilGrids + documentation page: + https://www.isric.org/explore/soilgrids/faq-soilgrids#What_do_the_filename_codes_mean[/]. diff --git a/workflows/data_ingestion/soil/usda.yaml b/workflows/data_ingestion/soil/usda.yaml new file mode 100644 index 00000000..e2e1e51c --- /dev/null +++ b/workflows/data_ingestion/soil/usda.yaml @@ -0,0 +1,29 @@ +name: usda_soils +sources: + input_item: + - datavibe_filter.input_item +sinks: + downloaded_raster: download_usda_soils.downloaded_raster +parameters: + ignore: all +tasks: + datavibe_filter: + op: datavibe_filter + parameters: + filter_out: "@from(ignore)" + download_usda_soils: + op: download_usda_soils +edges: + - origin: datavibe_filter.output_item + destination: + - download_usda_soils.input_item +description: + short_description: Downloads USDA soil classification raster. + long_description: + The workflow will download a global raster with USDA soil classes at 1/30 degree resolution. + sources: + input_item: Dummy input. + sinks: + downloaded_raster: Raster with USDA soil classes. + parameters: + ignore: Selection of each field of input item should be ignored (among "time_range", "geometry", or "all" for both of them). \ No newline at end of file diff --git a/workflows/data_ingestion/spaceeye/spaceeye.yaml b/workflows/data_ingestion/spaceeye/spaceeye.yaml new file mode 100644 index 00000000..0aa2defb --- /dev/null +++ b/workflows/data_ingestion/spaceeye/spaceeye.yaml @@ -0,0 +1,56 @@ +name: spaceeye +sources: + user_input: + - preprocess.user_input + - spaceeye.input_data +sinks: + raster: spaceeye.raster +parameters: + duration: + time_overlap: + min_tile_cover: + max_tiles_per_time: + cloud_thr: + shadow_thr: + pc_key: + s2_timeout: +tasks: + preprocess: + workflow: data_ingestion/spaceeye/spaceeye_preprocess + parameters: + min_tile_cover: "@from(min_tile_cover)" + max_tiles_per_time: "@from(max_tiles_per_time)" + cloud_thr: "@from(cloud_thr)" + shadow_thr: "@from(shadow_thr)" + pc_key: "@from(pc_key)" + s2_timeout: "@from(s2_timeout)" + spaceeye: + workflow: data_ingestion/spaceeye/spaceeye_inference + parameters: + duration: "@from(duration)" + time_overlap: "@from(time_overlap)" +edges: + - origin: preprocess.s2_raster + destination: + - spaceeye.s2_rasters + - origin: preprocess.s1_raster + destination: + - spaceeye.s1_rasters + - origin: preprocess.cloud_mask + destination: + - spaceeye.cloud_rasters +description: + short_description: + Runs the SpaceEye cloud removal pipeline, yielding daily cloud-free images for the input + geometry and time range. + long_description: >- + The workflow fetches both Sentinel-1 and Sentinel-2 tiles that cover the input geometry and time + range, preprocesses them, computes cloud masks, and runs SpaceEye inference in a sliding window + on the retrieved tiles. This workflow can be reused as a preprocess step in many applications + that require cloud-free Sentinel-2 data. For more information about SpaceEye, read the paper: + https://arxiv.org/abs/2106.08408. + sources: + user_input: Time range and geometry of interest. + sinks: + raster: Cloud-free rasters. + parameters: diff --git a/workflows/data_ingestion/spaceeye/spaceeye_inference.yaml b/workflows/data_ingestion/spaceeye/spaceeye_inference.yaml new file mode 100644 index 00000000..cdd4006e --- /dev/null +++ b/workflows/data_ingestion/spaceeye/spaceeye_inference.yaml @@ -0,0 +1,81 @@ +name: spaceeye_inference +sources: + input_data: + - group_s1.input_data + - group_s2.input_data + - group_mask.input_data + s1_rasters: + - group_s1.rasters + s2_rasters: + - group_s2.rasters + cloud_rasters: + - group_mask.rasters +sinks: + raster: split.rasters +parameters: + duration: 48 + time_overlap: 0.5 +tasks: + group_s1: + op: group_s1_tile_sequence + op_dir: group_tile_sequence + parameters: + duration: "@from(duration)" + overlap: "@from(time_overlap)" + group_s2: + op: group_s2_tile_sequence + op_dir: group_tile_sequence + parameters: + duration: "@from(duration)" + overlap: "@from(time_overlap)" + group_mask: + op: group_s2cloudmask_tile_sequence + op_dir: group_tile_sequence + parameters: + duration: "@from(duration)" + overlap: "@from(time_overlap)" + spaceeye: + op: remove_clouds + parameters: + duration: "@from(duration)" + split: + op: split_spaceeye_sequence + op_dir: split_sequence +edges: + - origin: group_s1.tile_sequences + destination: + - spaceeye.s1_products + - origin: group_s2.tile_sequences + destination: + - spaceeye.s2_products + - origin: group_mask.tile_sequences + destination: + - spaceeye.cloud_masks + - origin: spaceeye.spaceeye_sequence + destination: + - split.sequences +description: + short_description: + Performs SpaceEye inference to generate daily cloud-free images given Sentinel data and cloud + masks. + long_description: >- + The workflow will group input Sentinel-1, Sentinel-2, and cloud mask rasters into + spatio-temporal windows and perform inference of each window. The windows will then be merged + into rasters for the RoI. More information about SpaceEye available in the paper: + https://arxiv.org/abs/2106.08408. + sources: + input_data: + Time range and region of interest. Will determine the spatio-temporal windows and region for + the output rasters. + s1_rasters: Sentinel-1 rasters tiled to the Sentinel-2 grid. + s2_rasters: Sentinel-2 tile rasters for the input time range. + cloud_rasters: Cloud masks for each of the Sentinel-2 tiles. + sinks: + raster: Cloud-free rasters for the input time range and region of interest. + parameters: + duration: + Time window, in days, considered in the inference. Controls the amount of temporal context for + inpainting clouds. Larger windows require more compute and memory. + time_overlap: + Overlap ratio of each temporal window. Controls the temporal step between windows as a + fraction of the window size. diff --git a/workflows/data_ingestion/spaceeye/spaceeye_interpolation.yaml b/workflows/data_ingestion/spaceeye/spaceeye_interpolation.yaml new file mode 100644 index 00000000..1a71dcfd --- /dev/null +++ b/workflows/data_ingestion/spaceeye/spaceeye_interpolation.yaml @@ -0,0 +1,62 @@ +name: spaceeye_interpolation +sources: + user_input: + - preprocess.user_input + - spaceeye.input_data +sinks: + raster: spaceeye.raster +parameters: + duration: + time_overlap: + min_tile_cover: + max_tiles_per_time: + cloud_thr: + shadow_thr: + pc_key: +tasks: + preprocess: + workflow: data_ingestion/sentinel2/preprocess_s2_improved_masks + parameters: + min_tile_cover: "@from(min_tile_cover)" + max_tiles_per_time: "@from(max_tiles_per_time)" + cloud_thr: "@from(cloud_thr)" + shadow_thr: "@from(shadow_thr)" + pc_key: "@from(pc_key)" + spaceeye: + workflow: data_ingestion/spaceeye/spaceeye_interpolation_inference + parameters: + duration: "@from(duration)" + time_overlap: "@from(time_overlap)" +edges: + - origin: preprocess.raster + destination: + - spaceeye.s2_rasters + - origin: preprocess.mask + destination: + - spaceeye.cloud_rasters +description: + short_description: + Runs the SpaceEye cloud removal pipeline using an interpolation-based algorithm, yielding daily + cloud-free images for the input geometry and time range. + long_description: >- + The workflow fetches Sentinel-2 tiles that cover the input geometry and time range, preprocesses + them, computes cloud masks, and runs SpaceEye inference in a sliding window on the retrieved + tiles. This workflow can be reused as a preprocess step in many applications that require + cloud-free Sentinel-2 data. For more information about SpaceEye, read the + [link=https://arxiv.org/abs/2106.08408]paper: https://arxiv.org/abs/2106.08408[/link]. + sources: + user_input: Time range and geometry of interest. + sinks: + raster: Cloud-free rasters. + parameters: + duration: + Time window, in days, considered in the inference. Controls the amount of temporal context for + inpainting clouds. Larger windows require more compute and memory. + time_overlap: + Overlap ratio of each temporal window. Controls the temporal step between windows as a + fraction of the window size. + min_tile_cover: Minimum RoI coverage to consider a set of tiles sufficient. + max_tiles_per_time: Maximum number of tiles used to cover the RoI in each date. + cloud_thr: Confidence threshold to assign a pixel as cloud. + shadow_thr: Confidence threshold to assign a pixel as shadow. + pc_key: Optional Planetary Computer API key. diff --git a/workflows/data_ingestion/spaceeye/spaceeye_interpolation_inference.yaml b/workflows/data_ingestion/spaceeye/spaceeye_interpolation_inference.yaml new file mode 100644 index 00000000..c3aaa9c2 --- /dev/null +++ b/workflows/data_ingestion/spaceeye/spaceeye_interpolation_inference.yaml @@ -0,0 +1,68 @@ +name: spaceeye_interpolation_inference +sources: + input_data: + - group_s2.input_data + - group_mask.input_data + s2_rasters: + - group_s2.rasters + cloud_rasters: + - group_mask.rasters +sinks: + raster: split.rasters +parameters: + duration: 48 + time_overlap: 0.5 +tasks: + group_s2: + op: group_s2_tile_sequence + op_dir: group_tile_sequence + parameters: + duration: "@from(duration)" + overlap: "@from(time_overlap)" + group_mask: + op: group_s2cloudmask_tile_sequence + op_dir: group_tile_sequence + parameters: + duration: "@from(duration)" + overlap: "@from(time_overlap)" + spaceeye: + op: remove_clouds_interpolation + op_dir: remove_clouds + parameters: + duration: "@from(duration)" + split: + op: split_spaceeye_sequence + op_dir: split_sequence +edges: + - origin: group_s2.tile_sequences + destination: + - spaceeye.s2_products + - origin: group_mask.tile_sequences + destination: + - spaceeye.cloud_masks + - origin: spaceeye.spaceeye_sequence + destination: + - split.sequences +description: + short_description: + Performs temporal damped interpolation to generate daily cloud-free images given Sentinel-2 data + and cloud masks. + long_description: >- + The workflow will group input Sentinel-2 and cloud mask rasters into spatio-temporal windows and + perform inference of each window. The windows will then be merged into rasters for the RoI. More + information about SpaceEye available in the paper: https://arxiv.org/abs/2106.08408. + sources: + input_data: + Time range and region of interest. Will determine the spatio-temporal windows and region for + the output rasters. + s2_rasters: Sentinel-2 tile rasters for the input time range. + cloud_rasters: Cloud masks for each of the Sentinel-2 tiles. + sinks: + raster: Cloud-free rasters for the input time range and region of interest. + parameters: + duration: + Time window, in days, considered in the inference. Controls the amount of temporal context for + inpainting clouds. Larger windows require more compute and memory. + time_overlap: + Overlap ratio of each temporal window. Controls the temporal step between windows as a + fraction of the window size. diff --git a/workflows/data_ingestion/spaceeye/spaceeye_preprocess.yaml b/workflows/data_ingestion/spaceeye/spaceeye_preprocess.yaml new file mode 100644 index 00000000..6ea55cfc --- /dev/null +++ b/workflows/data_ingestion/spaceeye/spaceeye_preprocess.yaml @@ -0,0 +1,49 @@ +name: spaceeye_preprocess_rtc +sources: + user_input: + - s2.user_input + - s1.user_input +sinks: + s2_raster: s2.raster + s1_raster: s1.raster + cloud_mask: s2.mask +parameters: + min_tile_cover: .4 + max_tiles_per_time: + cloud_thr: + shadow_thr: + pc_key: + s1_timeout: + s2_timeout: +tasks: + s2: + workflow: data_ingestion/sentinel2/preprocess_s2_improved_masks + parameters: + min_tile_cover: "@from(min_tile_cover)" + max_tiles_per_time: "@from(max_tiles_per_time)" + cloud_thr: "@from(cloud_thr)" + shadow_thr: "@from(shadow_thr)" + pc_key: "@from(pc_key)" + in_memory: true + dl_timeout: "@from(s2_timeout)" + s1: + workflow: data_ingestion/sentinel1/preprocess_s1 + parameters: + pc_key: "@from(pc_key)" + dl_timeout: "@from(s1_timeout)" +edges: + - origin: s2.raster + destination: + - s1.s2_products +description: + short_description: Runs the SpaceEye preprocessing pipeline. + long_description: + The workflow fetches both Sentinel-1 and Sentinel-2 tiles that cover the input geometry and time + range and preprocesses them. It also computes improved cloud masks using cloud and shadow + segmentation models. + sources: + user_input: Time range and geometry of interest. + sinks: + s2_raster: Sentinel-2 rasters. + s1_raster: Sentinel-1 rasters. + cloud_mask: Cloud and cloud shadow mask. diff --git a/workflows/data_ingestion/spaceeye/spaceeye_preprocess_ensemble.yaml b/workflows/data_ingestion/spaceeye/spaceeye_preprocess_ensemble.yaml new file mode 100644 index 00000000..bb56c7c9 --- /dev/null +++ b/workflows/data_ingestion/spaceeye/spaceeye_preprocess_ensemble.yaml @@ -0,0 +1,39 @@ +name: spaceeye_preprocess_ensemble +sources: + user_input: + - s2.user_input + - s1.user_input +sinks: + s2_raster: s2.raster + s1_raster: s1.raster + cloud_mask: s2.mask +parameters: + pc_key: +tasks: + s2: + workflow: data_ingestion/sentinel2/preprocess_s2_ensemble_masks + parameters: + pc_key: "@from(pc_key)" + s1: + workflow: data_ingestion/sentinel1/preprocess_s1 + parameters: + pc_key: "@from(pc_key)" +edges: + - origin: s2.raster + destination: + - s1.s2_products +description: + short_description: + Runs the SpaceEye preprocessing pipeline with an ensemble of cloud segmentation models. + long_description: + The workflow fetches both Sentinel-1 and Sentinel-2 tiles that cover the input geometry and time + range and preprocesses them, it also computes improved cloud masks using cloud and shadow + segmentation models. Cloud probabilities are computed with an ensemble of five models. + sources: + user_input: Time range and geometry of interest. + sinks: + s2_raster: Sentinel-2 rasters. + s1_raster: Sentinel-1 rasters. + cloud_mask: Cloud and cloud shadow mask. + parameters: + pc_key: Planetary Computer API key. diff --git a/workflows/data_ingestion/user_data/ingest_geometry.yaml b/workflows/data_ingestion/user_data/ingest_geometry.yaml new file mode 100644 index 00000000..966a670a --- /dev/null +++ b/workflows/data_ingestion/user_data/ingest_geometry.yaml @@ -0,0 +1,26 @@ +name: ingest_geometry +sources: + user_input: + - unpack.input_refs +sinks: + geometry: download.downloaded +tasks: + unpack: + op: unpack_refs + download: + op: download_geometry_from_ref + op_dir: download_from_ref +edges: + - origin: unpack.ref_list + destination: + - download.input_ref +description: + short_description: + Adds user geometries into the cluster storage, allowing for them to be used on workflows. + long_description: + The workflow downloads geometries provided in the references and generates GeometryCollection + objects with local assets that can be used in other operations. + sources: + user_input: List of external references. + sinks: + geometry: GeometryCollections with downloaded assets. diff --git a/workflows/data_ingestion/user_data/ingest_raster.yaml b/workflows/data_ingestion/user_data/ingest_raster.yaml new file mode 100644 index 00000000..13842bb9 --- /dev/null +++ b/workflows/data_ingestion/user_data/ingest_raster.yaml @@ -0,0 +1,26 @@ +name: ingest_raster +sources: + user_input: + - unpack.input_refs +sinks: + raster: download.downloaded +tasks: + unpack: + op: unpack_refs + download: + op: download_raster_from_ref + op_dir: download_from_ref +edges: + - origin: unpack.ref_list + destination: + - download.input_ref +description: + short_description: + Adds user rasters into the cluster storage, allowing for them to be used on workflows. + long_description: + The workflow downloads rasters provided in the references and generates Raster objects with + local assets that can be used in other operations. + sources: + user_input: List of external references. + sinks: + raster: Rasters with downloaded assets. diff --git a/workflows/data_ingestion/user_data/ingest_smb.yaml b/workflows/data_ingestion/user_data/ingest_smb.yaml new file mode 100644 index 00000000..20dffe39 --- /dev/null +++ b/workflows/data_ingestion/user_data/ingest_smb.yaml @@ -0,0 +1,42 @@ +name: ingest_smb +sources: + user_input: + - download.user_input +sinks: + rasters: download.rasters +parameters: + server_name: + server_ip: + server_port: 445 + username: + password: + share_name: + directory_path: "/" + bands: ["red", "green", "blue"] +tasks: + download: + op: download_rasters_from_smb + op_dir: download_from_smb + parameters: + server_name: "@from(server_name)" + server_ip: "@from(server_ip)" + server_port: "@from(server_port)" + username: "@from(username)" + password: "@from(password)" + share_name: "@from(share_name)" + directory_path: "@from(directory_path)" + bands: "@from(bands)" +edges: +description: + short_description: + Adds user rasters into the cluster storage from an SMB share, allowing for them to be + used on workflows. + long_description: + The workflow downloads rasters from the provided SMB share and generates Raster objects with + local assets that can be used in other operations. + sources: + user_input: + DataVibe containing the time range and geometry metadata of the set rasters + to be downloaded. + sinks: + rasters: Rasters with downloaded assets. diff --git a/workflows/data_ingestion/weather/download_chirps.yaml b/workflows/data_ingestion/weather/download_chirps.yaml new file mode 100755 index 00000000..fb0793b9 --- /dev/null +++ b/workflows/data_ingestion/weather/download_chirps.yaml @@ -0,0 +1,32 @@ +name: chirps +sources: + user_input: + - list_chirps.input_item +sinks: + product: download_chirps.downloaded_product +parameters: + freq: daily + res: p05 +tasks: + list_chirps: + op: list_chirps + parameters: + freq: "@from(freq)" + res: "@from(res)" + download_chirps: + op: download_chirps +edges: + - origin: list_chirps.chirps_products + destination: + - download_chirps.chirps_product +description: + short_description: Downloads accumulated precipitation data from the CHIRPS dataset. + long_description: + sources: + user_input: Time range and geometry of interest. + sinks: + product: TIFF file containing accumulated precipitation. + parameters: + freq: daily or monthly frequencies + res: p05 for 0.05 degree resolution or p25 for 0.25 degree resolution, + p25 is only available daily diff --git a/workflows/data_ingestion/weather/download_era5.yaml b/workflows/data_ingestion/weather/download_era5.yaml new file mode 100644 index 00000000..e5f6df16 --- /dev/null +++ b/workflows/data_ingestion/weather/download_era5.yaml @@ -0,0 +1,48 @@ +name: download_era5 +sources: + user_input: + - list.input_item +sinks: + downloaded_product: download.downloaded_product +parameters: + pc_key: + variable: 2t +tasks: + list: + op: list_era5 + parameters: + variable: "@from(variable)" + download: + op: download_era5 + parameters: + api_key: "@from(pc_key)" +edges: + - origin: list.era5_products + destination: + - download.era5_product +description: + short_description: Hourly estimated weather variables. + long_description: + Hourly weather variables obtained from combining observations and numerical model runs to + estimate the state of the atmosphere. + sources: + user_input: Time range and geometry of interest. + sinks: + downloaded_product: 30km resolution weather variables. + parameters: + pc_key: Optional Planetary Computer API key. + variable: >- + Options are: + 2t - 2 meter temperature (default) + 100u - 100 meter U wind component + 100v - 100 meter V wind component + 10u - 10 meter U wind component + 10v - 10 meter V wind component + 2d - 2 meter dewpoint temperature + mn2t - Minimum temperature at 2 meters since previous post-processing + msl - Mean sea level pressure + mx2t - Maximum temperature at 2 meters since previous post-processing + sp - Surface pressure + ssrd - Surface solar radiation downwards + sst - Sea surface temperature + tp - Total precipitation diff --git a/workflows/data_ingestion/weather/download_era5_monthly.yaml b/workflows/data_ingestion/weather/download_era5_monthly.yaml new file mode 100644 index 00000000..b180015a --- /dev/null +++ b/workflows/data_ingestion/weather/download_era5_monthly.yaml @@ -0,0 +1,47 @@ +name: download_era5_monthly +sources: + user_input: + - list.input_item +sinks: + downloaded_product: download.downloaded_product +parameters: + cds_api_key: + variable: 2t +tasks: + list: + op: list_era5_cds + op_dir: list_era5 + parameters: + variable: "@from(variable)" + download: + op: download_era5 + parameters: + api_key: "@from(cds_api_key)" +edges: + - origin: list.era5_products + destination: + - download.era5_product +description: + short_description: Monthly estimated weather variables. + long_description: + Monthly weather variables obtained from combining observations and numerical model runs to + estimate the state of the atmosphere. + sources: + user_input: Time range and geometry of interest. + sinks: + downloaded_product: 30km resolution weather variables. + parameters: + cds_api_key: api key for Copernicus CDS (https://cds.climate.copernicus.eu/user/register) + variable: >- + Options are: + 2t - 2 meter temperature (default) + 100u - 100 meter U wind component + 100v - 100 meter V wind component + 10u - 10 meter U wind component + 10v - 10 meter V wind component + 2d - 2 meter dewpoint temperature + msl - Mean sea level pressure + sp - Surface pressure + ssrd - Surface solar radiation downwards + sst - Sea surface temperature + tp - Total precipitation diff --git a/workflows/data_ingestion/weather/download_gridmet.yaml b/workflows/data_ingestion/weather/download_gridmet.yaml new file mode 100644 index 00000000..ca85319f --- /dev/null +++ b/workflows/data_ingestion/weather/download_gridmet.yaml @@ -0,0 +1,49 @@ +name: download_gridmet +sources: + user_input: + - list.input_item +sinks: + downloaded_product: download.downloaded_product +parameters: + variable: pet +tasks: + list: + op: list_gridmet + op_dir: list_climatology_lab + parameters: + variable: "@from(variable)" + download: + op: download_climatology_lab +edges: + - origin: list.products + destination: + - download.input_product +description: + short_description: Daily surface meteorological properties from GridMET. + long_description: >- + The workflow downloads weather and hydrological data for the input time range. Data is + available for the contiguous US and southern British Columbia surfaces from 1979-present, with a + daily temporal resolution and a ~4-km (1/24th degree) spatial resolution. + sources: + user_input: Time range of interest. + sinks: + downloaded_product: Downloaded variable for each year in the input time range. + parameters: + variable: >- + Options are: + bi - Burning Index + erc - Energy Release Component + etr - Daily reference evapotranspiration (alfafa, units = mm) + fm100 - Fuel Moisture (100-hr, units = %) + fm1000 - Fuel Moisture (1000-hr, units = %) + pet - Potential evapotranspiration (reference grass evapotranspiration, units = mm) + pr - Precipitation amount (daily total, units = mm) + rmax - Maximum relative humidity (units = %) + rmin - Minimum relative humidity (units = %) + sph - Specific humididy (units = kg/kg) + srad - Downward surface shortwave radiation (units = W/m^2) + th - Wind direction (degrees clockwise from North) + tmmn - Minimum temperature (units = K) + tmmx - Maximum temperature (units = K) + vpd - Vapor Pressure Deficit (units = kPa) + vs - Wind speed at 10m (units = m/s) diff --git a/workflows/data_ingestion/weather/download_herbie.yaml b/workflows/data_ingestion/weather/download_herbie.yaml new file mode 100755 index 00000000..71154bb4 --- /dev/null +++ b/workflows/data_ingestion/weather/download_herbie.yaml @@ -0,0 +1,70 @@ +name: download_herbie +sources: + user_input: + - list_herbie.input_item +sinks: + forecast: download_herbie.forecast +parameters: + model: "hrrr" + product: + frequency: 1 + forecast_lead_times: + forecast_start_date: + search_text: ":TMP:2 m" +tasks: + list_herbie: + op: list_herbie + parameters: + model: "@from(model)" + product: "@from(product)" + frequency: "@from(frequency)" + forecast_lead_times: "@from(forecast_lead_times)" + forecast_start_date: "@from(forecast_start_date)" + search_text: "@from(search_text)" + download_herbie: + op: download_herbie +edges: + - origin: list_herbie.product + destination: + - download_herbie.herbie_product +description: + short_description: Downloads forecast data for provided location & time range using herbie python package. + long_description: + Herbie is a python package that downloads recent and archived numerical weather prediction (NWP) model + outputs from different cloud archive sources. Its most popular capability is to download HRRR model data. + NWP data in GRIB2 format can be read with xarray+cfgrib. Model data Herbie can retrieve includes the High + Resolution Rapid Refresh (HRRR), Rapid Refresh (RAP), Global Forecast System (GFS), National Blend of Models (NBM), + Rapid Refresh Forecast System - Prototype (RRFS), and ECMWF open data forecast products (ECMWF). + sources: + user_input: Time range and geometry of interest. + sinks: + forecast: Grib file with the requested forecast. + parameters: + model: + Model name as defined in the models template folder. CASE INSENSITIVE Below are examples of model types + 'hrrr' HRRR contiguous United States model + 'hrrrak' HRRR Alaska model (alias 'alaska') + 'rap' RAP model + 'gfs' Global Forecast System (atmosphere) + 'gfs_wave' Global Forecast System (wave) + 'rrfs' Rapid Refresh Forecast System prototype + for more information see https://herbie.readthedocs.io/en/latest/user_guide/model_info.html + product: + Output variable product file type (sfc (surface fields), prs (pressure fields), nat (native fields), + subh (subhourly fields)). Not specifying this will use the first product in model template file. + frequency: frequency in hours of the forecast + forecast_lead_times: + Forecast lead time in the format [start_time, end_time, increment] (in hours). This parameter can + be None, and in this case see parameter 'forecast_start_date' for more details. You cannot specify + 'forecast_lead_times' and 'forecast_start_date' at the same time. + forecast_start_date: + latest datetime (in the format "%Y-%m-%d %H:%M") for which analysis (zero lead time) are retrieved. + After this datetime, forecasts with progressively increasing lead times are retrieved. If this parameter + is set to None and 'forecast_lead_times' is also set to None, then the workflow returns analysis + (zero lead time) up to the latest analysis available, and from that point it returns forecasts with + progressively increasing lead times. + search_text: + It's a regular expression used to search on GRIB2 Index files and allow you to download just the layer + of the file required instead of complete file. + For more information on search_text refer to below url. + https://blaylockbk.github.io/Herbie/_build/html/user_guide/searchString.html diff --git a/workflows/data_ingestion/weather/download_terraclimate.yaml b/workflows/data_ingestion/weather/download_terraclimate.yaml new file mode 100644 index 00000000..dcb2bfa3 --- /dev/null +++ b/workflows/data_ingestion/weather/download_terraclimate.yaml @@ -0,0 +1,47 @@ +name: download_terraclimate +sources: + user_input: + - list.input_item +sinks: + downloaded_product: download.downloaded_product +parameters: + variable: tmax +tasks: + list: + op: list_terraclimate + op_dir: list_climatology_lab + parameters: + variable: "@from(variable)" + download: + op: download_climatology_lab +edges: + - origin: list.products + destination: + - download.input_product +description: + short_description: Monthly climate and hydroclimate properties from TerraClimate. + long_description: >- + The workflow downloads weather and hydrological data for the input time range. Data is + available for global terrestrial surfaces from 1958-present, with a monthly temporal resolution + and a ~4-km (1/24th degree) spatial resolution. + sources: + user_input: Time range of interest. + sinks: + downloaded_product: Downloaded variable for each year in the input time range. + parameters: + variable: >- + Options are: + aet - Actual Evapotranspiration (monthly total, units = mm) + def - Climate Water Deficit (monthly total, units = mm) + pet - Potential evapotranspiration (monthly total, units = mm) + ppt - Precipitation (monthly total, units = mm) + q - Runoff (monthly total, units = mm) + soil - Soil Moisture (total column at end of month, units = mm) + srad - Downward surface shortwave radiation (units = W/m2) + swe - Snow water equivalent (at end of month, units = mm) + tmax - Max Temperature (average for month, units = C) + tmin - Min Temperature (average for month, units = C) + vap - Vapor pressure (average for month, units = kPa) + ws - Wind speed (average for month, units = m/s) + vpd - Vapor Pressure Deficit (average for month, units = kPa) + PDSI - Palmer Drought Severity Index (at end of month, units = unitless) diff --git a/workflows/data_ingestion/weather/get_ambient_weather.yaml b/workflows/data_ingestion/weather/get_ambient_weather.yaml new file mode 100644 index 00000000..790f1a92 --- /dev/null +++ b/workflows/data_ingestion/weather/get_ambient_weather.yaml @@ -0,0 +1,37 @@ +name: get_ambient_weather +sources: + user_input: + - get_weather.user_input +sinks: + weather: get_weather.weather +parameters: + api_key: + app_key: + limit: -1 # No limit + feed_interval: +tasks: + get_weather: + op: download_ambient_weather + op_dir: download_ambient_weather + parameters: + api_key: "@from(api_key)" + app_key: "@from(app_key)" + limit: "@from(limit)" + feed_interval: "@from(feed_interval)" +edges: +description: + short_description: Downloads weather data from an Ambient Weather station. + long_description: + The workflow connects to the Ambient Weather REST API and requests data for the input time + range. The input geometry will be used to find a device inside the region. If not devices are + found in the geometry, the workflow will fail. Connection to the API requires an API key and an + App key. + sources: + user_input: Time range and geometry of interest. + sinks: + weather: Weather data from the station. + parameters: + api_key: Ambient Weather API key. + app_key: Ambient Weather App key. + limit: Maximum number of data points. If -1, do not limit. + feed_interval: Interval between samples. Defined by the weather station. diff --git a/workflows/data_ingestion/weather/get_forecast.yaml b/workflows/data_ingestion/weather/get_forecast.yaml new file mode 100644 index 00000000..578d7f92 --- /dev/null +++ b/workflows/data_ingestion/weather/get_forecast.yaml @@ -0,0 +1,45 @@ +name: get_forecast +sources: + user_input: + - preprocessing.user_input +sinks: + forecast: read_forecast.local_forecast +parameters: + noaa_gfs_token: +tasks: + preprocessing: + op: gfs_preprocess + op_dir: gfs_preprocess + parameters: + sas_token: "@from(noaa_gfs_token)" + gfs_download: + op: gfs_download + op_dir: gfs_download + parameters: + sas_token: "@from(noaa_gfs_token)" + read_forecast: + op: read_grib_forecast + op_dir: read_grib_forecast +edges: + - origin: preprocessing.time + destination: + - gfs_download.time + - origin: preprocessing.location + destination: + - read_forecast.location + - origin: gfs_download.global_forecast + destination: + - read_forecast.global_forecast +description: + short_description: + Downloads weather forecast data from NOAA Global Forecast System (GFS) for the input time range. + long_description: + The workflow downloads global forecast data from the Planetary Computer with 13km resolution + between grid points. The workflow requires a SAS token to access the blob storage, which can be + found at https://planetarycomputer.microsoft.com/dataset/storage/noaa-gfs. + sources: + user_input: Time range and geometry of interest. + sinks: + forecast: Weather forecast data. + parameters: + noaa_gfs_token: SAS token to access blob storage. diff --git a/workflows/data_ingestion/weather/herbie_forecast.yaml b/workflows/data_ingestion/weather/herbie_forecast.yaml new file mode 100644 index 00000000..4257c1c7 --- /dev/null +++ b/workflows/data_ingestion/weather/herbie_forecast.yaml @@ -0,0 +1,74 @@ +name: forecast_weather +sources: + user_input: + - forecast_range.user_input +sinks: + weather_forecast: forecast_download.weather_forecast + forecast_range: forecast_range.download_period +parameters: + forecast_lead_times: + search_text: + weather_type: + model: + overwrite: + product: +tasks: + forecast_range: + op: forecast_range_split + op_dir: download_herbie + parameters: + forecast_lead_times: "@from(forecast_lead_times)" + weather_type: "@from(weather_type)" + forecast_download: + op: forecast_weather + op_dir: download_herbie + parameters: + model: "@from(model)" + overwrite: "@from(overwrite)" + product: "@from(product)" + forecast_lead_times: "@from(forecast_lead_times)" + search_text: "@from(search_text)" + weather_type: "@from(weather_type)" +edges: + - origin: forecast_range.download_period + destination: + - forecast_download.user_input +description: + short_description: Downloads forecast observations for provided location & time range using herbie python package. + long_description: + Herbie is a python package that downloads recent and archived numerical weather prediction (NWP) model + outputs from different cloud archive sources. Its most popular capability is to download HRRR model data. + NWP data in GRIB2 format can be read with xarray+cfgrib. Model data Herbie can retrieve includes the High + Resolution Rapid Refresh (HRRR), Rapid Refresh (RAP), Global Forecast System (GFS), National Blend of Models (NBM), + Rapid Refresh Forecast System - Prototype (RRFS), and ECMWF open data forecast products (ECMWF). + sources: + user_input: Time range and geometry of interest. + sinks: + weather_forecast: Downloaded Forecast observations, cleaned, interpolated and mapped to each hour. + forecast_range: Time range of forecast observations. + parameters: + model: + Model name as defined in the models template folder. CASE INSENSITIVE Below are examples of model types + 'hrrr' HRRR contiguous United States model + 'hrrrak' HRRR Alaska model (alias 'alaska') + 'rap' RAP model + 'gfs' Global Forecast System (atmosphere) + 'gfs_wave' Global Forecast System (wave) + 'rrfs' Rapid Refresh Forecast System prototype + overwrite: If true, look for GRIB2 file even if local copy exists. If false, use the local copy + product: + Output variable product file type (sfc (surface fields), prs (pressure fields), nat (native fields), + subh (subhourly fields)). Not specifying this will use the first product in model template file. + forecast_lead_times: + Help to define forecast lead time in hours. Accept the input in range format. + Example - (1, 25, 1) + For more information refer below url. + https://blaylockbk.github.io/Herbie/_build/html/reference_guide/_autosummary/herbie.archive.Herbie.html + search_text: + It's a regular expression used to search on GRIB2 Index files and allow you to download just the layer + of the file required instead of complete file. + For more information on search_text refer to below url. + https://blaylockbk.github.io/Herbie/_build/html/user_guide/searchString.html + weather_type: + It's a user preferred text to represent weather parameter type (temperature, humidity, wind_speed etc). + This is used as column name for the output returned by operator. diff --git a/workflows/data_processing/chunk_onnx/chunk_onnx.yaml b/workflows/data_processing/chunk_onnx/chunk_onnx.yaml new file mode 100644 index 00000000..4d30c2ef --- /dev/null +++ b/workflows/data_processing/chunk_onnx/chunk_onnx.yaml @@ -0,0 +1,55 @@ +name: chunk_onnx +sources: + rasters: + - chunk_raster.rasters + - list_to_sequence.list_rasters +sinks: + raster: combine_chunks.raster +parameters: + model_file: + step: 100 +tasks: + chunk_raster: + op: chunk_raster + parameters: + step_y: "@from(step)" + step_x: "@from(step)" + list_to_sequence: + op: list_to_sequence + compute_onnx: + op: compute_onnx_from_chunks + op_dir: compute_onnx + parameters: + model_file: "@from(model_file)" + window_size: "@from(step)" + combine_chunks: + op: combine_chunks +edges: + - origin: chunk_raster.chunk_series + destination: + - compute_onnx.chunk + - origin: list_to_sequence.rasters_seq + destination: + - compute_onnx.input_raster + - origin: compute_onnx.output_raster + destination: + - combine_chunks.chunks +description: + short_description: Runs an Onnx model over all rasters in the input to produce a single raster. + long_description: >- + This workflow is intended to apply an Onnx model over all rasters in the input to produce + a single raster output. This can be used, for instance, to compute time-series analysis of + a list of rasters that span multiple times. The analysis can be any computation that can + be expressed as an Onnx model (for an example, see notebooks/crop_cycles/crop_cycles.ipynb). + In order to run the model in parallel (and avoid running out of memory if the list of rasters + is large), the input rasters are divided spatially into chunks (that span all times). The + Onnx model is applied to these chunks and then combined back to produce the final output. + sources: + rasters: Input rasters. + sinks: + raster: Result of the Onnx model run. + parameters: + model_file: + An Onnx model which needs to be deployed with "farmvibes-ai local add-onnx" command. + step: + Size of the chunk in pixels. diff --git a/workflows/data_processing/chunk_onnx/chunk_onnx_sequence.yaml b/workflows/data_processing/chunk_onnx/chunk_onnx_sequence.yaml new file mode 100644 index 00000000..6aac04e7 --- /dev/null +++ b/workflows/data_processing/chunk_onnx/chunk_onnx_sequence.yaml @@ -0,0 +1,51 @@ +name: chunk_onnx_sequence +sources: + rasters: + - chunk_raster.rasters + - compute_onnx.input_raster +sinks: + raster: combine_chunks.raster +parameters: + model_file: + step: 100 +tasks: + chunk_raster: + op: chunk_sequence_raster + op_dir: chunk_raster + parameters: + step_y: "@from(step)" + step_x: "@from(step)" + compute_onnx: + op: compute_onnx_from_chunks + op_dir: compute_onnx + parameters: + model_file: "@from(model_file)" + window_size: "@from(step)" + combine_chunks: + op: combine_chunks +edges: + - origin: chunk_raster.chunk_series + destination: + - compute_onnx.chunk + - origin: compute_onnx.output_raster + destination: + - combine_chunks.chunks +description: + short_description: Runs an Onnx model over all rasters in the input to produce a single raster. + long_description: >- + This workflow is intended to run an Onnx model on all input rasters to produce + a single raster output. This can be used, for instance, to compute time-series analysis of + a list of rasters that span multiple times. The analysis can be any computation that can + be expressed as an Onnx model (for an example, see notebooks/crop_cycles/crop_cycles.ipynb). + In order to run the model in parallel (and avoid running out of memory if the list of rasters + is large), the input rasters are divided spatially into chunks (that span all times). The + Onnx model is applied to these chunks and then combined back to produce the final output. + sources: + rasters: Input rasters. + sinks: + raster: Result of the Onnx model run. + parameters: + model_file: + An Onnx model which needs to be deployed with "farmvibes-ai local add-onnx" command. + step: + Size of the chunk in pixels. diff --git a/workflows/data_processing/clip/clip.yaml b/workflows/data_processing/clip/clip.yaml new file mode 100644 index 00000000..db0b467b --- /dev/null +++ b/workflows/data_processing/clip/clip.yaml @@ -0,0 +1,32 @@ +name: clip +sources: + raster: + - clip_raster.raster + input_geometry: + - clip_raster.input_item +sinks: + clipped_raster: clip_raster.clipped_raster +parameters: + hard_clip: false +tasks: + clip_raster: + op: clip_raster + parameters: + hard_clip: "@from(hard_clip)" +edges: +description: + short_description: Performs a clip on an input raster based on a provided reference geometry. + long_description: + The workflow outputs a new raster copied from the input raster with its geometry metadata as the + intersection between the input raster's geometry and the provided reference geometry. If the + parameter hard_clip is set to true, then only data in the intersection is kept in output. The + workflow raises an error if there is no intersection between both geometries. + sources: + raster: Input raster to be clipped. + input_geometry: Reference geometry. + sinks: + clipped_raster: Clipped raster with the reference geometry. + parameters: + hard_clip: > + if true, keeps only data inside the intersection of reference and input geometries, soft clip + otherwise diff --git a/workflows/data_processing/gradient/raster_gradient.yaml b/workflows/data_processing/gradient/raster_gradient.yaml new file mode 100644 index 00000000..62be413b --- /dev/null +++ b/workflows/data_processing/gradient/raster_gradient.yaml @@ -0,0 +1,18 @@ +name: raster_gradient +sources: + raster: + - gradient.input_raster +sinks: + gradient: gradient.output_raster +tasks: + gradient: + op: compute_raster_gradient +edges: +description: + short_description: Computes the gradient of each band of the input raster with a Sobel operator. + long_description: + sources: + raster: Input raster. + sinks: + gradient: Raster with the gradients. + parameters: diff --git a/workflows/data_processing/heatmap/classification.yaml b/workflows/data_processing/heatmap/classification.yaml new file mode 100644 index 00000000..c2be97ca --- /dev/null +++ b/workflows/data_processing/heatmap/classification.yaml @@ -0,0 +1,108 @@ +name: heatmap_intermediate +sources: + input_raster: + - compute_index.raster + samples: + - soil_sample_heatmap.samples +sinks: + result: soil_sample_heatmap.result +parameters: + attribute_name: + buffer: + index: + bins: + simplify: + tolerance: + data_scale: + max_depth: + n_estimators: + random_state: +tasks: + compute_index: + workflow: data_processing/index/index + parameters: + index: "@from(index)" + soil_sample_heatmap: + op: soil_sample_heatmap_using_classification + op_dir: heatmap_sensor + parameters: + attribute_name: "@from(attribute_name)" + buffer: "@from(buffer)" + bins: "@from(bins)" + simplify: "@from(simplify)" + tolerance: "@from(tolerance)" + data_scale: "@from(data_scale)" + max_depth: "@from(max_depth)" + n_estimators: "@from(n_estimators)" + random_state: "@from(random_state)" +edges: + - origin: compute_index.index_raster + destination: + - soil_sample_heatmap.raster +description: + short_description: > + Utilizes input Sentinel-2 satellite imagery & the sensor samples as labeled data that contain + nutrient information (Nitrogen, Carbon, pH, Phosphorus) to train a model using Random Forest classifier. + The inference operation predicts nutrients in soil for the chosen farm boundary. + long_description: |- + The workflow generates a heatmap for selected nutrient. It relies on sample soil data that + contain information of nutrients. The quantity of samples define the accuracy of the heat map + generation. During the research performed testing with samples spaced at 200 feet, 100 feet and + 50 feet. The 50 feet sample spaced distance provided results matching to the ground truth. + Generating heatmaps with this approach reduces the number of samples. It utilizes the logic + below behind the scenes to generate heatmap. + - Read the sentinel raster provided. + - Sensor samples needs to be uploaded into prescriptions entity in Azure + data manager for Agriculture (ADMAg). ADMAg is having hierarchy to hold + information of Party, Field, Seasons, Crop etc. Prior to + uploading prescriptions, it is required to build hierarchy and + a `prescription_map_id`. All prescriptions uploaded to ADMAg are + related to farm hierarchy through `prescription_map_id`. Please refer to + https://learn.microsoft.com/en-us/rest/api/data-manager-for-agri/ for + more information on ADMAg. + - Compute indices using the spyndex python package. + - Clip the satellite imagery & sensor samples using farm boundary. + - Perform spatial interpolation to find raster pixels within the offset distance + from sample location and assign the value of nutrients to group of pixels. + - Classify the data based on number of bins. + - Train the model using Random Forest classifier. + - Predict the nutrients using the satellite imagery. + - Generate a shape file using the predicted outputs. + sources: + input_raster: Input raster for index computation. + samples: External references to sensor samples for nutrients. + sinks: + result: Zip file containing cluster geometries. + parameters: + attribute_name: + Nutrient property name in sensor samples geojson file. For example + CARBON (C), Nitrogen (N), Phosphorus (P) etc., + buffer: Offset distance from sample to perform interpolate operations with raster. + index: Type of index to be used to generate heatmap. For example - evi, pri etc., + bins: + Possible number of groups used to move value to nearest group using [numpy + histogram](https://numpy.org/doc/stable/reference/generated/numpy.histogram.html) + and to pre-process the data to support model training with classification . + simplify: + Replace small polygons in input with value of their largest neighbor + after converting from raster to vector. Accepts 'simplify' or 'convex' or 'none'. + tolerance: + All parts of a [simplified geometry](https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.simplify.html) + will be no more than tolerance distance from the original. It has the same units + as the coordinate reference system of the GeoSeries. For example, using tolerance=100 + in a projected CRS with meters as units means a distance of 100 meters in reality. + data_scale: + Accepts True or False. Default is False. On True, it scale data using + [StandardScalar] (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html) + from scikit-learn package. It Standardize features by removing the mean and + scaling to unit variance. + max_depth: + The maximum depth of the tree. If None, then nodes are expanded until + all leaves are pure or until all leaves contain less than min_samples_split + samples. For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) + n_estimators: The number of trees in the forest. For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) + random_state: + Controls both the randomness of the bootstrapping of the samples + used when building trees (if bootstrap=True) and the sampling of the features + to consider when looking for the best split at each node (if max_features < + n_features). For more details refer to (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) diff --git a/workflows/data_processing/index/index.yaml b/workflows/data_processing/index/index.yaml new file mode 100644 index 00000000..0b2d2b7f --- /dev/null +++ b/workflows/data_processing/index/index.yaml @@ -0,0 +1,28 @@ +name: index +sources: + raster: + - compute_index.raster +sinks: + index_raster: compute_index.index +parameters: + index: ndvi +tasks: + compute_index: + op: compute_index + parameters: + index: "@from(index)" +edges: +description: + short_description: Computes an index from the bands of an input raster. + long_description: >- + In addition to the indices 'ndvi', 'evi', 'msavi', 'ndre', 'reci', 'ndmi', 'methane' and 'pri' + all indices in https://github.com/awesome-spectral-indices/awesome-spectral-indices are + available (depending on the bands available on the corresponding satellite product). + sources: + raster: Input raster. + sinks: + index_raster: Single-band raster with the computed index. + parameters: + index: + The choice of index to be computed ('ndvi', 'evi', 'msavi', 'ndre', 'reci', 'ndmi', + 'methane', 'pri' or any of the awesome-spectral-indices). diff --git a/workflows/data_processing/linear_trend/chunked_linear_trend.yaml b/workflows/data_processing/linear_trend/chunked_linear_trend.yaml new file mode 100644 index 00000000..42afd082 --- /dev/null +++ b/workflows/data_processing/linear_trend/chunked_linear_trend.yaml @@ -0,0 +1,43 @@ +name: chunked_linear_trend +sources: + input_rasters: + - chunk_raster.rasters + - linear_trend.rasters +sinks: + linear_trend_raster: combine_chunks.raster +parameters: + # steps used to divide the rasters into chunks + # units are grid points + chunk_step_y: + chunk_step_x: +tasks: + chunk_raster: + op: chunk_raster + parameters: + step_y: "@from(chunk_step_y)" + step_x: "@from(chunk_step_x)" + linear_trend: + op: linear_trend + combine_chunks: + op: combine_chunks +edges: + - origin: chunk_raster.chunk_series + destination: + - linear_trend.series + - origin: linear_trend.trend + destination: + - combine_chunks.chunks +description: + short_description: Computes the pixel-wise linear trend of a list of rasters (e.g. NDVI). + long_description: + The workflow computes the linear trend over chunks of data, combining them into the final + raster. + sources: + input_rasters: List of rasters to compute linear trend. + sinks: + linear_trend_raster: Raster with the trend and the test statistics. + parameters: + chunk_step_y: + steps used to divide the rasters into chunks in the y direction (units are grid points). + chunk_step_x: + steps used to divide the rasters into chunks in the x direction (units are grid points). diff --git a/workflows/data_processing/merge/match_merge_to_ref.yaml b/workflows/data_processing/merge/match_merge_to_ref.yaml new file mode 100644 index 00000000..9957539a --- /dev/null +++ b/workflows/data_processing/merge/match_merge_to_ref.yaml @@ -0,0 +1,57 @@ +name: match_merge_to_ref +sources: + rasters: + - pair.rasters2 + ref_rasters: + - pair.rasters1 + - group.group_by +sinks: + match_rasters: merge.raster +parameters: + resampling: bilinear +tasks: + pair: + op: pair_intersecting_rasters + match: + op: match_raster_to_ref + parameters: + resampling: "@from(resampling)" + group: + op: group_rasters_by_geometries + merge: + op: merge_rasters + parameters: + resampling: "@from(resampling)" +edges: + - origin: pair.paired_rasters1 + destination: + - match.ref_raster + - origin: pair.paired_rasters2 + destination: + - match.raster + - origin: match.output_raster + destination: + - group.rasters + - origin: group.raster_groups + destination: + - merge.raster_sequence +description: + short_description: Resamples input rasters to the reference rasters' grid. + long_description: + The workflow will produce input and reference raster pairs with intersecting geometries. For + each pair, the input raster is resampled to match the reference raster's grid. Afterwards, all + resampled rasters are groupped if they are contained in a reference raster geometry, and each + raster group is matched into single raster. The output should contain the information available + in the input rasters, gridded according to the reference rasters. + sources: + rasters: Input rasters that will be resampled. + ref_rasters: Reference rasters. + sinks: + match_rasters: Rasters with information from the input rasters on the reference grid. + parameters: + resampling: >- + Type of resampling when reprojecting the rasters. See + [link=https://rasterio.readthedocs.io/en/latest/api/rasterio.enums.html#rasterio.enums.Resampling] + rasterio documentation: + https://rasterio.readthedocs.io/en/latest/api/rasterio.enums.html#rasterio.enums.Resampling[/] + for all available resampling options. diff --git a/workflows/data_processing/outlier/detect_outlier.yaml b/workflows/data_processing/outlier/detect_outlier.yaml new file mode 100644 index 00000000..ac7070dd --- /dev/null +++ b/workflows/data_processing/outlier/detect_outlier.yaml @@ -0,0 +1,36 @@ +name: detect_outlier +sources: + rasters: + - outlier.rasters +sinks: + segmentation: outlier.segmentation + heatmap: outlier.heatmap + outliers: outlier.outliers + mixture_means: outlier.mixture_means +parameters: + threshold: +tasks: + outlier: + op: detect_outliers + parameters: + threshold: "@from(threshold)" +edges: +description: + short_description: + Fits a single-component Gaussian Mixture Model (GMM) over input data to detect outliers + according to the threshold parameter. + long_description: + The workflow outputs segmentation and outlier maps based on the threshold parameter and the + likelihood of each sample belonging to the GMM component. It also yields heatmaps of the + likelihood, and the mean of GMM's component. + sources: + rasters: Input rasters. + sinks: + segmentation: + Segmentation maps based on the likelihood of each sample belonging to the GMM's + single-component. + heatmap: Likelihood maps. + outliers: Outlier maps based on the thresholded likelihood map. + mixture_means: Mean of the GMM. + parameters: + threshold: Likelihood threshold value to consider a sample as an outlier. diff --git a/workflows/data_processing/threshold/threshold_raster.yaml b/workflows/data_processing/threshold/threshold_raster.yaml new file mode 100644 index 00000000..ef47fcb5 --- /dev/null +++ b/workflows/data_processing/threshold/threshold_raster.yaml @@ -0,0 +1,23 @@ +name: threshold_raster +sources: + raster: + - threshold_task.raster +sinks: + thresholded_raster: threshold_task.thresholded +parameters: + threshold: +tasks: + threshold_task: + op: threshold_raster + parameters: + threshold: "@from(threshold)" +edges: +description: + short_description: Thresholds values of the input raster if higher than the threshold parameter. + long_description: + sources: + raster: Input raster. + sinks: + thresholded_raster: Thresholded raster. + parameters: + threshold: Threshold value. diff --git a/workflows/data_processing/timeseries/timeseries_aggregation.yaml b/workflows/data_processing/timeseries/timeseries_aggregation.yaml new file mode 100644 index 00000000..0048eecf --- /dev/null +++ b/workflows/data_processing/timeseries/timeseries_aggregation.yaml @@ -0,0 +1,27 @@ +name: timeseries_aggregation +sources: + raster: + - summary.raster + input_geometry: + - summary.input_geometry +sinks: + timeseries: timeseries.timeseries +tasks: + summary: + op: summarize_raster + timeseries: + op: aggregate_statistics_timeseries +edges: + - origin: summary.summary + destination: + - timeseries.stats +description: + short_description: + Computes the mean, standard deviation, maximum, and minimum values of all regions of the raster + and aggregates them into a timeseries. + long_description: + sources: + raster: Input raster. + input_geometry: Geometry of interest. + sinks: + timeseries: Aggregated statistics of the raster. diff --git a/workflows/data_processing/timeseries/timeseries_masked_aggregation.yaml b/workflows/data_processing/timeseries/timeseries_masked_aggregation.yaml new file mode 100644 index 00000000..ff55bbb4 --- /dev/null +++ b/workflows/data_processing/timeseries/timeseries_masked_aggregation.yaml @@ -0,0 +1,40 @@ +name: timeseries_masked_aggregation +sources: + raster: + - masked_summary.raster + mask: + - masked_summary.mask + input_geometry: + - masked_summary.input_geometry +sinks: + timeseries: timeseries.timeseries +parameters: + timeseries_masked_thr: +tasks: + masked_summary: + op: summarize_masked_raster + op_dir: summarize_raster + timeseries: + op: aggregate_statistics_timeseries + parameters: + masked_thr: "@from(timeseries_masked_thr)" +edges: + - origin: masked_summary.summary + destination: + - timeseries.stats +description: + short_description: + Computes the mean, standard deviation, maximum, and minimum values of all regions of the raster + considered by the mask and aggregates them into a timeseries. + long_description: + sources: + raster: Input raster. + mask: Mask of the regions to be considered during summarization; + input_geometry: Geometry of interest. + sinks: + timeseries: Aggregated statistics of the raster considered by the mask. + parameters: + timeseries_masked_thr: + Threshold of the maximum ratio of masked content allowed in a raster. The statistics of + rasters with masked content above the threshold (e.g., heavily clouded) are not included in + the timeseries. diff --git a/workflows/farm_ai/agriculture/canopy_cover.yaml b/workflows/farm_ai/agriculture/canopy_cover.yaml new file mode 100644 index 00000000..71b1f9c6 --- /dev/null +++ b/workflows/farm_ai/agriculture/canopy_cover.yaml @@ -0,0 +1,50 @@ +name: canopy_cover +sources: + user_input: + - ndvi_summary.user_input + - canopy_summary_timeseries.input_geometry +sinks: + ndvi: ndvi_summary.compute_ndvi.compute_index.index + estimated_canopy_cover: canopy.estimated_canopy_cover + ndvi_timeseries: ndvi_summary.timeseries + canopy_timeseries: canopy_summary_timeseries.timeseries +parameters: + pc_key: +tasks: + ndvi_summary: + workflow: farm_ai/agriculture/ndvi_summary + parameters: + pc_key: "@from(pc_key)" + canopy: + op: estimate_canopy_cover + canopy_summary_timeseries: + workflow: data_processing/timeseries/timeseries_masked_aggregation +edges: + - origin: ndvi_summary.compute_ndvi.compute_index.index + destination: + - canopy.indices + - origin: canopy.estimated_canopy_cover + destination: + - canopy_summary_timeseries.raster + # We need to use the full name if this is not a sink 😭 + - origin: ndvi_summary.s2.cloud.merge.merged_cloud_mask + destination: + - canopy_summary_timeseries.mask +description: + short_description: Estimates pixel-wise canopy cover for a region and date. + long_description: + The workflow retrieves the relevant Sentinel-2 products with Planetary Computer (PC) API, and + computes the NDVI for each available tile and date. It applies a linear regressor trained with + polynomial features (up to the 3rd degree) on top of the index raster to estimate canopy cover. + The coeficients and intercept of the regressor were obtained beforehand using as ground-truth + masked/annotated drone imagery, and are used for inference in this workflow. + sources: + user_input: Time range and geometry of interest. + sinks: + ndvi: NDVI raster. + estimated_canopy_cover: Raster with pixel-wise canopy cover estimation; + ndvi_timeseries: + Aggregated NDVI statistics of the retrieved tiles within the input geometry and time range. + canopy_timeseries: Aggregated canopy cover statistics. + parameters: + pc_key: Optional Planetary Computer API key. diff --git a/workflows/farm_ai/agriculture/change_detection.yaml b/workflows/farm_ai/agriculture/change_detection.yaml new file mode 100644 index 00000000..89491b94 --- /dev/null +++ b/workflows/farm_ai/agriculture/change_detection.yaml @@ -0,0 +1,57 @@ +name: change_detection +sources: + user_input: + - spaceeye.user_input + - summary_timeseries.input_geometry +sinks: + spaceeye_raster: spaceeye.raster + index: ndvi.index_raster + timeseries: summary_timeseries.timeseries + segmentation: outliers.segmentation + heatmap: outliers.heatmap + outliers: outliers.outliers + mixture_means: outliers.mixture_means +parameters: + pc_key: +tasks: + spaceeye: + workflow: data_ingestion/spaceeye/spaceeye + parameters: + pc_key: "@from(pc_key)" + ndvi: + workflow: data_processing/index/index + parameters: + index: ndvi + summary_timeseries: + workflow: data_processing/timeseries/timeseries_aggregation + outliers: + workflow: data_processing/outlier/detect_outlier +edges: + - origin: spaceeye.raster + destination: + - ndvi.raster + - origin: ndvi.index_raster + destination: + - summary_timeseries.raster + - outliers.rasters +description: + short_description: Identifies changes/outliers over NDVI across dates. + long_description: + The workflow generates SpaceEye imagery for the input region and time range and computes NDVI + raster for each date. It aggregates NDVI statistics (mean, standard deviation, maximum and + minimum) in time and detects outliers across dates with a single-component Gaussian Mixture + Model (GMM). + sources: + user_input: Time range and geometry of interest. + sinks: + spaceeye_raster: SpaceEye cloud-free rasters. + index: NDVI rasters. + timeseries: Aggregated NDVI statistics over the time range. + segmentation: + Segmentation maps based on the likelihood of each sample belonging to the GMM's + single-component. + heatmap: Likelihood maps. + outliers: Outlier maps. + mixture_means: Means of the GMM. + parameters: + pc_key: PlanetaryComputer API key. diff --git a/workflows/farm_ai/agriculture/emergence_summary.yaml b/workflows/farm_ai/agriculture/emergence_summary.yaml new file mode 100644 index 00000000..1356019b --- /dev/null +++ b/workflows/farm_ai/agriculture/emergence_summary.yaml @@ -0,0 +1,57 @@ +name: emergence_summary +sources: + user_input: + - s2.user_input + - summary_timeseries.input_geometry +sinks: + timeseries: summary_timeseries.timeseries +parameters: + pc_key: +tasks: + s2: + workflow: data_ingestion/sentinel2/preprocess_s2_improved_masks + parameters: + max_tiles_per_time: 1 + pc_key: "@from(pc_key)" + msavi: + workflow: data_processing/index/index + parameters: + index: msavi + emergence: + workflow: data_processing/threshold/threshold_raster + parameters: + threshold: 0.2 + summary_timeseries: + workflow: data_processing/timeseries/timeseries_masked_aggregation +edges: + - origin: s2.raster + destination: + - msavi.raster + - origin: msavi.index_raster + destination: + - emergence.raster + - origin: emergence.thresholded_raster + destination: + - summary_timeseries.raster + - origin: s2.mask + destination: + - summary_timeseries.mask +description: + short_description: + Calculates emergence statistics using thresholded MSAVI (mean, standard deviation, maximum and + minimum) for the input geometry and time range. + long_description: + The workflow retrieves Sentinel2 products with Planetary Computer (PC) API, forwards them to a + cloud detection model and combines the predicted cloud mask to the mask provided by PC. It + computes the MSAVI for each available tile and date, thresholds them above a certain value and + summarizes each with the mean, standard deviation, maximum and minimum values for the regions + not obscured by clouds. Finally, it outputs a timeseries with such statistics for all available + dates, filtering out heavily-clouded tiles. + sources: + user_input: Time range and geometry of interest. + sinks: + timeseries: + Aggregated emergence statistics of the retrieved tiles within the input geometry and time + range. + parameters: + pc_key: Optional Planetary Computer API key. diff --git a/workflows/farm_ai/agriculture/green_house_gas_fluxes.yaml b/workflows/farm_ai/agriculture/green_house_gas_fluxes.yaml new file mode 100644 index 00000000..50a054d6 --- /dev/null +++ b/workflows/farm_ai/agriculture/green_house_gas_fluxes.yaml @@ -0,0 +1,28 @@ +name: green_house_gas_fluxes +sources: + user_input: + - ghg.ghg +sinks: + fluxes: ghg.fluxes +parameters: + crop_type: "corn" +tasks: + ghg: + op: compute_ghg_fluxes + parameters: + crop_type: "@from(crop_type)" +edges: +description: + short_description: Computes Green House Fluxes for a region and date range + long_description: >- + The workflow follows the GHG Protocol guidelines published for Brazil + (which are based on IPCC reports) to compute Green House Gas emission + fluxes (sequestration versus emissions) for a given crop. + sources: + user_input: The user-provided inputs for GHG computation. + sinks: + fluxes: The computed fluxes for the given area and date range considering the user input data. + parameters: + crop_type: >- + The type of the crop to compute GHG emissions. + Supported crops are 'wheat', 'corn', 'cotton', and 'soybeans'. diff --git a/workflows/farm_ai/agriculture/heatmap_using_classification.yaml b/workflows/farm_ai/agriculture/heatmap_using_classification.yaml new file mode 100644 index 00000000..0c7ca7f7 --- /dev/null +++ b/workflows/farm_ai/agriculture/heatmap_using_classification.yaml @@ -0,0 +1,49 @@ +name: heatmap_using_classification +sources: + input_samples: + - download_samples.user_input + input_raster: + - soil_sample_heatmap_classification.input_raster +sinks: + result: soil_sample_heatmap_classification.result +parameters: + attribute_name: + buffer: + index: + bins: + simplify: + tolerance: + data_scale: + max_depth: + n_estimators: + random_state: +tasks: + download_samples: + workflow: data_ingestion/user_data/ingest_geometry + soil_sample_heatmap_classification: + workflow: data_processing/heatmap/classification + parameters: + attribute_name: "@from(attribute_name)" + buffer: "@from(buffer)" + index: "@from(index)" + bins: "@from(bins)" + simplify: "@from(simplify)" + tolerance: "@from(tolerance)" + data_scale: "@from(data_scale)" + max_depth: "@from(max_depth)" + n_estimators: "@from(n_estimators)" + random_state: "@from(random_state)" +edges: + - origin: download_samples.geometry + destination: + - soil_sample_heatmap_classification.samples +description: + short_description: The workflow generates a nutrient heatmap for samples provided by user by downloading the samples from user input. + long_description: The samples provided are related with farm boundary and have required nutrient information to create a heatmap. + sources: + input_raster: Input raster for index computation. + input_samples: External references to sensor samples for nutrients. + sinks: + result: Zip file containing cluster geometries. + parameters: # Utilize parameters configured in workflow data_processing/heatmap/classification.yaml + diff --git a/workflows/farm_ai/agriculture/heatmap_using_classification_admag.yaml b/workflows/farm_ai/agriculture/heatmap_using_classification_admag.yaml new file mode 100644 index 00000000..a6ede80d --- /dev/null +++ b/workflows/farm_ai/agriculture/heatmap_using_classification_admag.yaml @@ -0,0 +1,71 @@ +name: heatmap_using_classification_admag +sources: + admag_input: + - prescriptions.admag_input + input_raster: + - soil_sample_heatmap_classification.input_raster +sinks: + result: soil_sample_heatmap_classification.result +parameters: + base_url: + client_id: + client_secret: + authority: + default_scope: + attribute_name: + buffer: + index: + bins: + simplify: + tolerance: + data_scale: + max_depth: + n_estimators: + random_state: +tasks: + prescriptions: + workflow: data_ingestion/admag/prescriptions + parameters: + base_url: "@from(base_url)" + client_id: "@from(client_id)" + client_secret: "@from(client_secret)" + authority: "@from(authority)" + default_scope: "@from(default_scope)" + soil_sample_heatmap_classification: + workflow: data_processing/heatmap/classification + parameters: + attribute_name: "@from(attribute_name)" + buffer: "@from(buffer)" + index: "@from(index)" + bins: "@from(bins)" + simplify: "@from(simplify)" + tolerance: "@from(tolerance)" + data_scale: "@from(data_scale)" + max_depth: "@from(max_depth)" + n_estimators: "@from(n_estimators)" + random_state: "@from(random_state)" +edges: + - origin: prescriptions.response + destination: + - soil_sample_heatmap_classification.samples +description: + short_description: This workflow integrate the ADMAG API to download prescriptions and generate heatmap. + long_description: The prescriptions are related with farm boundary and the nutrient information. Each prescription represent a sensor sample at a location within a farm boundary. + + sources: + input_raster: Input raster for index computation. + admag_input: Required inputs to download prescriptions from admag. + sinks: + result: Zip file containing cluster geometries. + parameters: + base_url: URL to access the registered app + client_id: + Value uniquely identifies registered application in the Microsoft identity platform. Visit url + https://learn.microsoft.com/en-us/azure/active-directory/develop/quickstart-register-app to register the app. + client_secret: + Sometimes called an application password, a client secret is a string value your app can use in place of a certificate + to identity itself. + authority: + The endpoint URIs for your app are generated automatically when you register or configure your app. It is used by + client to obtain authorization from the resource owner + default_scope: URL for default azure OAuth2 permissions diff --git a/workflows/farm_ai/agriculture/heatmap_using_neighboring_data_points.yaml b/workflows/farm_ai/agriculture/heatmap_using_neighboring_data_points.yaml new file mode 100644 index 00000000..630a00cf --- /dev/null +++ b/workflows/farm_ai/agriculture/heatmap_using_neighboring_data_points.yaml @@ -0,0 +1,68 @@ +name: heatmap_using_neighboring_data_points +sources: + input_raster: + - soil_sample_heatmap.raster + input_samples: + - download_samples.user_input + input_sample_clusters: + - download_sample_clusters.user_input +sinks: + result: soil_sample_heatmap.result +parameters: + attribute_name: + simplify: + tolerance: + algorithm: + resolution: + bins: +tasks: + download_samples: + workflow: data_ingestion/user_data/ingest_geometry + download_sample_clusters: + workflow: data_ingestion/user_data/ingest_geometry + soil_sample_heatmap: + op: soil_sample_heatmap_using_neighbors + op_dir: heatmap_sensor + parameters: + attribute_name: "@from(attribute_name)" + simplify: "@from(simplify)" + tolerance: "@from(tolerance)" + algorithm: "@from(algorithm)" + resolution: "@from(resolution)" + bins: "@from(bins)" +edges: + - origin: download_samples.geometry + destination: + - soil_sample_heatmap.samples + - origin: download_sample_clusters.geometry + destination: + - soil_sample_heatmap.samples_boundary +description: + short_description: + Creates heatmap using the neighbors by performing spatial interpolation operations. It utilizes soil information collected + at optimal sensor/sample locations and downloaded sentinel satellite imagery. + long_description: + The optimal location of nutrient samples are identified using workflow . + The quantity of samples defines the accuracy of the heatmap generation. During the research performed testing on a 100 acre farm using sample count + of approximately 20, 80, 130, 600. The research concluded that a sample count of 20 provided decent results, also accuracy of nutrient + information improved with increase in sample count. + sources: + input_raster: Sentinel-2 raster. + input_samples: Sensor samples with nutrient information. + input_sample_clusters: Clusters boundaries of sensor samples locations. + sinks: + result: Zip file containing heatmap output as shape files. + parameters: + attribute_name: "Nutrient property name in sensor samples geojson file. For example: CARBON (C), Nitrogen (N), Phosphorus (P) etc.," + simplify: Replace small polygons in input with value of their largest neighbor after converting from raster to vector. Accepts 'simplify' or 'convex' or 'none'. + tolerance: + All parts of a [simplified geometry](https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.simplify.html) will be no more + than tolerance distance from the original. It has the same units as the coordinate reference system of the GeoSeries. For example, using + tolerance=100 in a projected CRS with meters as units means a distance of 100 meters in reality. + algorithm: Algorithm used to identify nearest neighbors. Accepts 'cluster overlap' or 'nearest neighbor' or 'kriging neighbor'. + resolution: + Defines the output resolution as the ratio of input raster resolution. For example, if resolution is 5, the output + heatmap is 5 times coarser than input raster. + bins: + it defines the number of equal-width bins in the given range.Refer to this article to learn more about bins + https://numpy.org/doc/stable/reference/generated/numpy.histogram.html diff --git a/workflows/farm_ai/agriculture/methane_index.yaml b/workflows/farm_ai/agriculture/methane_index.yaml new file mode 100644 index 00000000..afcf2ab7 --- /dev/null +++ b/workflows/farm_ai/agriculture/methane_index.yaml @@ -0,0 +1,45 @@ +name: methane_index +sources: + user_input: + - s2.user_input + - clip.input_geometry +sinks: + index: methane.index_raster + s2_raster: s2.raster + cloud_mask: s2.mask +parameters: + pc_key: +tasks: + s2: + workflow: data_ingestion/sentinel2/preprocess_s2_improved_masks + parameters: + pc_key: "@from(pc_key)" + clip: + workflow: data_processing/clip/clip + methane: + workflow: data_processing/index/index + parameters: + index: methane +edges: + - origin: s2.raster + destination: + - clip.raster + - origin: clip.clipped_raster + destination: + - methane.raster +description: + short_description: Computes methane index from ultra emitters for a region and date range. + long_description: + The workflow retrieves the relevant Sentinel-2 products with Planetary Computer (PC) API and + crop the rasters for the region defined in user_input. All bands are normalized and an + anti-aliasing guassian filter is applied to smooth and remove potential artifacts. An + unsupervised K-Nearest Neighbor is applied to identify bands similar to band 12, and the index + is computed by the difference between band 12 to the pixel-wise median of top K similar bands. + sources: + user_input: Time range and geometry of interest. + sinks: + index: Methane index raster. + s2_raster: Sentinel-2 raster. + cloud_mask: Cloud mask. + parameters: + pc_key: Optional Planetary Computer API key. diff --git a/workflows/farm_ai/agriculture/ndvi_summary.yaml b/workflows/farm_ai/agriculture/ndvi_summary.yaml new file mode 100644 index 00000000..ad85b65a --- /dev/null +++ b/workflows/farm_ai/agriculture/ndvi_summary.yaml @@ -0,0 +1,47 @@ +name: ndvi_summary +sources: + user_input: + - s2.user_input + - summary_timeseries.input_geometry +sinks: + timeseries: summary_timeseries.timeseries +parameters: + pc_key: +tasks: + s2: + workflow: data_ingestion/sentinel2/preprocess_s2_improved_masks + parameters: + max_tiles_per_time: 1 + pc_key: "@from(pc_key)" + compute_ndvi: + workflow: data_processing/index/index + summary_timeseries: + workflow: data_processing/timeseries/timeseries_masked_aggregation +edges: + - origin: s2.raster + destination: + - compute_ndvi.raster + - origin: compute_ndvi.index_raster + destination: + - summary_timeseries.raster + - origin: s2.mask + destination: + - summary_timeseries.mask +description: + short_description: + Calculates NDVI statistics (mean, standard deviation, maximum and minimum) for the input + geometry and time range. + long_description: + The workflow retrieves the relevant Sentinel-2 products with Planetary Computer (PC) API, + forwards them to a cloud detection model and combines the predicted cloud mask to the mask + obtained from the product. The workflow computes the NDVI for each available tile and date, + summarizing each with the mean, standard deviation, maximum and minimum values for the regions + not obscured by clouds. Finally, it outputs a timeseries with such statistics for all available + dates, ignoring heavily-clouded tiles. + sources: + user_input: Time range and geometry of interest. + sinks: + timeseries: + Aggregated NDVI statistics of the retrieved tiles within the input geometry and time range. + parameters: + pc_key: Optional Planetary Computer API key. diff --git a/workflows/farm_ai/agriculture/weed_detection.yaml b/workflows/farm_ai/agriculture/weed_detection.yaml new file mode 100644 index 00000000..72a9a947 --- /dev/null +++ b/workflows/farm_ai/agriculture/weed_detection.yaml @@ -0,0 +1,67 @@ +name: weed_detection +sources: + user_input: + - download_raster.user_input +sinks: + result: weed_detection.result +parameters: + buffer: + no_data: + clusters: + sieve_size: + simplify: + tolerance: + samples: + bands: + alpha_index: +tasks: + download_raster: + workflow: data_ingestion/user_data/ingest_raster + weed_detection: + op: weed_detection + parameters: + buffer: "@from(buffer)" + no_data: "@from(no_data)" + clusters: "@from(clusters)" + sieve_size: "@from(sieve_size)" + simplify: "@from(simplify)" + tolerance: "@from(tolerance)" + samples: "@from(samples)" + bands: "@from(bands)" + alpha_index: "@from(alpha_index)" +edges: + - origin: download_raster.raster + destination: + - weed_detection.raster +description: + short_description: Generates shape files for similarly colored regions in the input raster. + long_description: + The workflow retrieves a remote raster and trains a Gaussian Mixture Model (GMM) over a subset + of the input data with a fixed number of components. The GMM is then used to cluster all images + pixels. Clustered regions are converted to polygons with a minimum size threshold. These + polygons are then simplified to smooth their borders. All polygons of a given cluster are + written to a single shapefile. All files are then compressed and returned as a single zip + archive. + sources: + user_input: External references to raster data. + sinks: + result: Zip file containing cluster geometries. + parameters: + buffer: + Buffer size, in projected CRS, to apply to the input geometry before sampling training points. + A negative number can be used to avoid sampling unwanted regions if the geometry is not very + precise. + no_data: Value to use as nodata when reading the raster. Uses the raster's internal nodata value + if not provided. + clusters: Number of clusters to use when segmenting the image. + sieve_size: + Area of the minimum connected region. Smaller regions will have their class assigned to the + largest adjancent region. + simplify: + Method used to simplify the geometries. Accepts 'none', for no simplification, 'simplify', for + tolerance-based simplification, and 'convex', for returning the convex hull. + tolerance: + Tolerance for simplifcation algorithm. Only applicable if simplification method is 'simplify'. + samples: Number os samples to use during training. + bands: List of band indices to use during training and inference. + alpha_index: Positive index of alpha band, if used to filter out nodata values. diff --git a/workflows/farm_ai/carbon_local/admag_carbon_integration.yaml b/workflows/farm_ai/carbon_local/admag_carbon_integration.yaml new file mode 100644 index 00000000..9ba80151 --- /dev/null +++ b/workflows/farm_ai/carbon_local/admag_carbon_integration.yaml @@ -0,0 +1,94 @@ +name: admag_carbon_integration +sources: + baseline_admag_input: + - baseline_seasonal_field_list.admag_input + scenario_admag_input: + - scenario_seasonal_field_list.admag_input +sinks: + carbon_output: admag_carbon.carbon_output +parameters: + base_url: + client_id: + client_secret: + authority: + default_scope: + comet_support_email: + ngrok_token: +tasks: + baseline_seasonal_field_list: + workflow: data_ingestion/admag/admag_seasonal_field + parameters: + base_url: "@from(base_url)" + client_id: "@from(client_id)" + client_secret: "@from(client_secret)" + authority: "@from(authority)" + default_scope: "@from(default_scope)" + scenario_seasonal_field_list: + workflow: data_ingestion/admag/admag_seasonal_field + parameters: + base_url: "@from(base_url)" + client_id: "@from(client_id)" + client_secret: "@from(client_secret)" + authority: "@from(authority)" + default_scope: "@from(default_scope)" + admag_carbon: + workflow: farm_ai/carbon_local/carbon_whatif + parameters: + comet_support_email: "@from(comet_support_email)" + ngrok_token: "@from(ngrok_token)" +edges: + - origin: baseline_seasonal_field_list.seasonal_field + destination: + - admag_carbon.baseline_seasonal_fields + - origin: scenario_seasonal_field_list.seasonal_field + destination: + - admag_carbon.scenario_seasonal_fields +description: + short_description: + Computes the offset amount of carbon that would be sequestered in a seasonal + field using Microsoft Azure Data Manager for Agriculture (ADMAg) data. + long_description: + Derives carbon sequestration information. Microsoft Azure Data Manager for + Agriculture (ADMAg) and the COMET-Farm API are used to obtain + farming data and evaluate carbon offset. ADMAg is + capable of describing important farming activities such as fertilization, + tillage, and organic amendments applications, all of which are represented in + the data manager. FarmVibes.AI retrieves this information from the data manager + and builds SeasonalFieldInformation FarmVibes.AI objects. These objects are then + used to call the COMET-Farm API and evaluate Carbon Offset Information. + sources: + baseline_admag_input: + List of ADMAgSeasonalFieldInput to retrieve SeasonalFieldInformation + objects for baseline COMET-Farm API Carbon offset evaluation. + scenario_admag_input: + List of ADMAgSeasonalFieldInput to retrieve SeasonalFieldInformation + objects for scenarios COMET-Farm API Carbon offset evaluation. + sinks: + carbon_output: Carbon sequestration received for scenario information provided as input. + parameters: + comet_support_email: + Comet support email. The email used to register for a COMET account. The + requests are forwarded to comet with this email reference. This email is + used by comet to share the information back to you for failed requests. + ngrok_token: + NGROK session token. A token that FarmVibes uses to create a web_hook url + that is shared with Comet in a request when running the workflow. Comet + can use this link to send back a response to FarmVibes. NGROK is a + service that creates temporary urls for local servers. To use NGROK, + FarmVibes needs to get a token from this website, + https://dashboard.ngrok.com/. + base_url: + Azure Data Manager for Agriculture host. Please visit https://aka.ms/farmvibesDMA to check how + to get these credentials. + client_id: + Azure Data Manager for Agriculture client id. Please visit https://aka.ms/farmvibesDMA to check how + to get these credentials. + client_secret: + Azure Data Manager for Agriculture client secret. Please visit https://aka.ms/farmvibesDMA to check how + to get these credentials. + authority: + Azure Data Manager for Agriculture authority. Please visit https://aka.ms/farmvibesDMA to check how + to get these credentials. + default_scope: + Azure Data Manager for Agriculture default scope. Please visit https://aka.ms/farmvibesDMA to check how + to get these credentials. \ No newline at end of file diff --git a/workflows/farm_ai/carbon_local/carbon_whatif.yaml b/workflows/farm_ai/carbon_local/carbon_whatif.yaml new file mode 100644 index 00000000..3a8fe012 --- /dev/null +++ b/workflows/farm_ai/carbon_local/carbon_whatif.yaml @@ -0,0 +1,54 @@ +name: carbon_whatif +sources: + baseline_seasonal_fields: + - comet_task.baseline_seasonal_fields + scenario_seasonal_fields: + - comet_task.scenario_seasonal_fields +sinks: + carbon_output: comet_task.carbon_output +parameters: + comet_support_email: + ngrok_token: +tasks: + comet_task: + op: whatif_comet_local_op + op_dir: carbon_local + parameters: + comet_support_email: "@from(comet_support_email)" + ngrok_token: "@from(ngrok_token)" +description: + short_description: + Computes the offset amount of carbon that would be sequestered in a seasonal field using the + baseline (historical) and scenario (time range interested in) information. + long_description: + To derive amount of carbon, it relies on seasonal information information provided for both baseline and + scenario. The baseline represents historical information of farm practices used during each season that + includes fertilizers, tillage, harvest and organic amendment. Minimum 2 years of baseline information required + to execute the workflow. The scenario represents future farm practices planning to do during each season that + includes fertilizers, tillage, harvest and organic amendment. For the scenario information provided, the workflow + compute the offset amount of carbon that would be sequestrated in a seasonal field. Minimum 2years of baseline + information required to execute the workflow. The requests received by workflow are forwarded to comet api. + To know more information of comet refer to https://gitlab.com/comet-api/api-docs/-/tree/master/. + To understand the enumerations and information accepted by comet refer to + https://gitlab.com/comet-api/api-docs/-/blob/master/COMET-Farm_API_File_Specification.xlsx + The request submitted get executed with in 5 minutes to max 2 hours. If response not received from comet within + this time period, check comet_support_email for information on failed requests, if no emails received check + status of requests by contacting to this support email address of comet "appnrel@colostate.edu". + For public use comet limits 50 requests each day. If more requests need to send contact support email address. + sources: + baseline_seasonal_fields: + List of seasonal fields that holds the historical information of farm practices such as fertilizers, + tillage, harvest and organic amendment. + scenario_seasonal_fields: + List of seasonal fields that holds the future information of farm practices such as fertilizers, + tillage, harvest and organic amendment. + sinks: + carbon_output: Carbon sequestration received for scenario information provided as input. + parameters: + comet_support_email: + COMET-Farm API Registered email. The requests are forwarded to comet with this email reference. + This email used by comet to share the information back to you for failed requests. + ngrok_token: + NGROK session token. FarmVibes generate web_hook url and shared url with comet along the request to receive the + response from comet. It's publicly accessible url and it's unique for each session. The url gets destroyed + once the session ends. To start the ngrok session a token, it is generated from this url https://dashboard.ngrok.com/ diff --git a/workflows/farm_ai/land_cover_mapping/conservation_practices.yaml b/workflows/farm_ai/land_cover_mapping/conservation_practices.yaml new file mode 100644 index 00000000..f3b4b92c --- /dev/null +++ b/workflows/farm_ai/land_cover_mapping/conservation_practices.yaml @@ -0,0 +1,92 @@ +name: conservation_practices +sources: + user_input: + - naip.user_input +sinks: + dem_raster: dem.raster + naip_raster: naip.raster + dem_gradient: gradient.gradient + cluster: cluster.output_raster + average_elevation: avg_elev.output_raster + practices: practice.output_raster +parameters: + clustering_iterations: + pc_key: +tasks: + naip: + workflow: data_ingestion/naip/download_naip + parameters: + pc_key: "@from(pc_key)" + cluster: + op: compute_raster_cluster + parameters: + number_iterations: "@from(clustering_iterations)" + dem: + workflow: data_ingestion/dem/download_dem + parameters: + pc_key: "@from(pc_key)" + gradient: + workflow: data_processing/gradient/raster_gradient + match_grad: + workflow: data_processing/merge/match_merge_to_ref + match_elev: + workflow: data_processing/merge/match_merge_to_ref + avg_elev: + op: compute_raster_class_windowed_average + practice: + op: compute_conservation_practice +edges: + - origin: naip.raster + destination: + - dem.user_input + - cluster.input_raster + - match_elev.ref_rasters + - match_grad.ref_rasters + - origin: dem.raster + destination: + - gradient.raster + - match_elev.rasters + - origin: gradient.gradient + destination: + - match_grad.rasters + - origin: cluster.output_raster + destination: + - avg_elev.input_cluster_raster + - origin: match_elev.match_rasters + destination: + - avg_elev.input_dem_raster # This is not a DemRaster anymore! + - origin: avg_elev.output_raster + destination: + - practice.average_elevation + - origin: match_grad.match_rasters + destination: + - practice.elevation_gradient +description: + short_description: + Identifies conservation practices (terraces and grassed waterways) using elevation data. + long_description: + The workflow classifies pixels in terraces or grassed waterways. It starts downloading NAIP and + USGS 3DEP tiles. Then, it computes the elevation gradient using a Sobel filter. And it computes + local clusters using an overlap clustering method. Then, it combines cluster and elevation tiles + to compute the average elevation per cluster. Finally, it uses a CNN model to classify pixels in + either terraces or grassed waterways. + sources: + user_input: Time range and geometry of interest. + sinks: + dem_raster: USGS 3DEP tiles that overlap the NAIP tiles that overlap the area of interest. + naip_raster: NAIP tiles that overlap the area of interest. + dem_gradient: + A copy of the USGS 3DEP tiles where the pixel values are the gradient computed using the Sobel + filter. + cluster: + A copy of the NAIP tiles with one band representing the output of the overlap clustering + method. Each pixel has a value between one and four. + average_elevation: + A combination of the dem_gradient and cluster sinks, where each pixel value is the average + elevation of all pixels that fall in the same cluster. + practices: + A copy of the NAIP tile with one band where each pixel value refers to a conservation practice + (0 = none, 1 = terraces, 2 = grassed waterways). + parameters: + clustering_iterations: The number of iterations used in the overlap clustering method. + pc_key: Optional Planetary Computer API key. diff --git a/workflows/farm_ai/land_degradation/landsat_ndvi_trend.yaml b/workflows/farm_ai/land_degradation/landsat_ndvi_trend.yaml new file mode 100644 index 00000000..a7d6788f --- /dev/null +++ b/workflows/farm_ai/land_degradation/landsat_ndvi_trend.yaml @@ -0,0 +1,34 @@ +name: landsat_ndvi_trend +sources: + user_input: + - landsat.user_input +sinks: + ndvi: trend.ndvi_raster + linear_trend: trend.linear_trend +parameters: + pc_key: +tasks: + landsat: + workflow: data_ingestion/landsat/preprocess_landsat + parameters: + pc_key: "@from(pc_key)" + trend: + workflow: farm_ai/land_degradation/ndvi_linear_trend +edges: + - origin: landsat.raster + destination: + - trend.raster +description: + short_description: + Estimates a linear trend over NDVI computer over LANDSAT tiles that intersect with the input + geometry and time range. + long_description: + The workflow downloads LANDSAT data, compute NDVI over them, and estimate a linear trend over + chunks of data, combining them into a final trend raster. + sources: + user_input: Time range and geometry of interest. + sinks: + ndvi: NDVI rasters. + linear_trend: Raster with the trend and the test statistics. + parameters: + pc_key: Optional Planetary Computer API key. diff --git a/workflows/farm_ai/land_degradation/ndvi_linear_trend.yaml b/workflows/farm_ai/land_degradation/ndvi_linear_trend.yaml new file mode 100755 index 00000000..3d418b44 --- /dev/null +++ b/workflows/farm_ai/land_degradation/ndvi_linear_trend.yaml @@ -0,0 +1,31 @@ +name: ndvi_linear_trend +sources: + raster: + - ndvi.raster +sinks: + ndvi_raster: ndvi.index_raster + linear_trend: chunked_linear_trend.linear_trend_raster +tasks: + ndvi: + workflow: data_processing/index/index + parameters: + index: ndvi + chunked_linear_trend: + workflow: data_processing/linear_trend/chunked_linear_trend + parameters: + chunk_step_y: 512 + chunk_step_x: 512 +edges: + - origin: ndvi.index_raster + destination: + - chunked_linear_trend.input_rasters +description: + short_description: Computes the pixel-wise NDVI linear trend over the input raster. + long_description: + The workflow computes the NDVI from the input raster, calculates the linear trend over chunks of + data, combining them into the final raster. + sources: + raster: Input raster. + sinks: + ndvi_raster: NDVI raster. + linear_trend: Raster with the trend and the test statistics. diff --git a/workflows/farm_ai/segmentation/auto_segment_basemap.yaml b/workflows/farm_ai/segmentation/auto_segment_basemap.yaml new file mode 100644 index 00000000..e9b6fe9e --- /dev/null +++ b/workflows/farm_ai/segmentation/auto_segment_basemap.yaml @@ -0,0 +1,77 @@ +name: auto_segment_basemap +sources: + user_input: + - basemap_download.input_geometry + - basemap_automatic_segmentation.input_geometry +sinks: + basemap: basemap_download.merged_basemap + segmentation_mask: basemap_automatic_segmentation.segmentation_mask +parameters: + bingmaps_api_key: + basemap_zoom_level: 14 + model_type: vit_b + spatial_overlap: 0.5 + points_per_side: 16 + n_crop_layers: 0 + crop_overlap_ratio: 0.0 + crop_n_points_downscale_factor: 1 + pred_iou_thresh: 0.88 + stability_score_thresh: 0.95 + stability_score_offset: 1.0 + points_per_batch: 16 + num_workers: 0 + in_memory: True + chip_nms_thr: 0.7 + mask_nms_thr: 0.5 +tasks: + basemap_download: + workflow: data_ingestion/bing/basemap_download_merge + parameters: + api_key: "@from(bingmaps_api_key)" + zoom_level: "@from(basemap_zoom_level)" + basemap_automatic_segmentation: + workflow: ml/segment_anything/automatic_segmentation + parameters: + model_type: "@from(model_type)" + band_names: ["red", "green", "blue"] + band_scaling: null + band_offset: null + spatial_overlap: "@from(spatial_overlap)" + points_per_side: "@from(points_per_side)" + n_crop_layers: "@from(n_crop_layers)" + crop_overlap_ratio: "@from(crop_overlap_ratio)" + crop_n_points_downscale_factor: "@from(crop_n_points_downscale_factor)" + pred_iou_thresh: "@from(pred_iou_thresh)" + stability_score_thresh: "@from(stability_score_thresh)" + stability_score_offset: "@from(stability_score_offset)" + points_per_batch: "@from(points_per_batch)" + num_workers: "@from(num_workers)" + in_memory: "@from(in_memory)" + chip_nms_thr: "@from(chip_nms_thr)" + mask_nms_thr: "@from(mask_nms_thr)" +edges: + - origin: basemap_download.merged_basemap + destination: + - basemap_automatic_segmentation.input_raster +description: + short_description: >- + Downloads basemap with BingMaps API and runs Segment Anything Model (SAM) automatic segmentation over + them. + long_description: >- + The workflow lists and downloads basemaps tiles with BingMaps API, and merges them into a + single raster. The raster is then split into chips of 1024x1024 pixels with an overlap defined + by `spatial_overlap`. Each chip is processed by SAM's image encoder, and a point grid is defined + within each chip, with each point being used as a prompt for the segmentation. Each point is + used to generate a mask, and the masks are combined using multiple non-maximal suppression + steps to generate the final segmentation mask. Before running the workflow, make sure the model + has been imported into the cluster by running `scripts/export_prompt_segmentation_models.py`. + The script will download the desired model weights from SAM repository, export the image encoder + and mask decoder to ONNX format, and add them to the cluster. For more information, refer to the + [FarmVibes.AI + troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) + page in the documentation. + sources: + user_input: Time range and geometry of interest. + sinks: + basemap: Merged basemap used as input to the segmentation. + segmentation_mask: Output segmentation masks. diff --git a/workflows/farm_ai/segmentation/auto_segment_s2.yaml b/workflows/farm_ai/segmentation/auto_segment_s2.yaml new file mode 100644 index 00000000..2e8a7f25 --- /dev/null +++ b/workflows/farm_ai/segmentation/auto_segment_s2.yaml @@ -0,0 +1,75 @@ +name: auto_segment_s2 +sources: + user_input: + - preprocess_s2.user_input + - s2_automatic_segmentation.input_geometry +sinks: + s2_raster: preprocess_s2.raster + segmentation_mask: s2_automatic_segmentation.segmentation_mask +parameters: + pc_key: + model_type: vit_b + spatial_overlap: 0.5 + points_per_side: 16 + n_crop_layers: 0 + crop_overlap_ratio: 0.0 + crop_n_points_downscale_factor: 1 + pred_iou_thresh: 0.88 + stability_score_thresh: 0.95 + stability_score_offset: 1.0 + points_per_batch: 16 + num_workers: 0 + in_memory: True + chip_nms_thr: 0.7 + mask_nms_thr: 0.5 +tasks: + preprocess_s2: + workflow: data_ingestion/sentinel2/preprocess_s2 + parameters: + pc_key: "@from(pc_key)" + s2_automatic_segmentation: + workflow: ml/segment_anything/automatic_segmentation + parameters: + model_type: "@from(model_type)" + band_names: ["R", "G", "B"] + band_scaling: null + band_offset: null + spatial_overlap: "@from(spatial_overlap)" + points_per_side: "@from(points_per_side)" + n_crop_layers: "@from(n_crop_layers)" + crop_overlap_ratio: "@from(crop_overlap_ratio)" + crop_n_points_downscale_factor: "@from(crop_n_points_downscale_factor)" + pred_iou_thresh: "@from(pred_iou_thresh)" + stability_score_thresh: "@from(stability_score_thresh)" + stability_score_offset: "@from(stability_score_offset)" + points_per_batch: "@from(points_per_batch)" + num_workers: "@from(num_workers)" + in_memory: "@from(in_memory)" + chip_nms_thr: "@from(chip_nms_thr)" + mask_nms_thr: "@from(mask_nms_thr)" +edges: + - origin: preprocess_s2.raster + destination: + - s2_automatic_segmentation.input_raster +description: + short_description: >- + Downloads Sentinel-2 imagery and runs Segment Anything Model (SAM) automatic segmentation over + them. + long_description: >- + The workflow retrieves the relevant Sentinel-2 products with the Planetary Computer (PC) API, + and splits the input rasters into chips of 1024x1024 pixels with an overlap defined by + `spatial_overlap`. Each chip is processed by SAM's image encoder, and a point grid is defined + within each chip, with each point being used as a prompt for the segmentation. Each point is + used to generate a mask, and the masks are combined using multiple non-maximal suppression + steps to generate the final segmentation mask. Before running the workflow, make sure the model + has been imported into the cluster by running `scripts/export_prompt_segmentation_models.py`. + The script will download the desired model weights from SAM repository, export the image encoder + and mask decoder to ONNX format, and add them to the cluster. For more information, refer to the + [FarmVibes.AI + troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) + page in the documentation. + sources: + user_input: Time range and geometry of interest. + sinks: + s2_raster: Sentinel-2 rasters used as input for the segmentation. + segmentation_mask: Output segmentation masks. diff --git a/workflows/farm_ai/segmentation/segment_basemap.yaml b/workflows/farm_ai/segmentation/segment_basemap.yaml new file mode 100644 index 00000000..ce5778af --- /dev/null +++ b/workflows/farm_ai/segmentation/segment_basemap.yaml @@ -0,0 +1,57 @@ +name: segment_basemap +sources: + user_input: + - basemap_download.input_geometry + - basemap_segmentation.input_geometry + prompts: + - basemap_segmentation.input_prompts +sinks: + basemap: basemap_download.merged_basemap + segmentation_mask: basemap_segmentation.segmentation_mask +parameters: + bingmaps_api_key: + basemap_zoom_level: 14 + model_type: vit_b + spatial_overlap: 0.5 +tasks: + basemap_download: + workflow: data_ingestion/bing/basemap_download_merge + parameters: + api_key: "@from(bingmaps_api_key)" + zoom_level: "@from(basemap_zoom_level)" + basemap_segmentation: + workflow: ml/segment_anything/prompt_segmentation + parameters: + model_type: "@from(model_type)" + band_names: ["red", "green", "blue"] + band_scaling: null + band_offset: null + spatial_overlap: "@from(spatial_overlap)" +edges: + - origin: basemap_download.merged_basemap + destination: + - basemap_segmentation.input_raster +description: + short_description: >- + Downloads basemap with BingMaps API and runs Segment Anything Model (SAM) over them with points and/or + bounding boxes as prompts. + long_description: >- + The workflow lists and downloads basemaps tiles with BingMaps API, and merges them into a + single raster. The raster is then split into chips of 1024x1024 pixels with an overlap defined + by `spatial_overlap`. Chips intersecting with prompts are processed by SAM's image encoder, + followed by prompt encoder and mask decoder. Before running the workflow, make sure the model + has been imported into the cluster by running `scripts/export_prompt_segmentation_models.py`. + The script will download the desired model weights from SAM repository, export the image encoder + and mask decoder to ONNX format, and add them to the cluster. For more information, refer to the + [FarmVibes.AI + troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) + page in the documentation. + sources: + user_input: Time range and geometry of interest. + prompts: >- + ExternalReferences to the point and/or bounding box prompts. These are GeoJSON with + coordinates, label (foreground/background) and prompt id (in case the raster contains + multiple entities that should be segmented in a single workflow run). + sinks: + basemap: Merged basemap used as input to the segmentation. + segmentation_mask: Output segmentation masks. diff --git a/workflows/farm_ai/segmentation/segment_s2.yaml b/workflows/farm_ai/segmentation/segment_s2.yaml new file mode 100644 index 00000000..5980d2bd --- /dev/null +++ b/workflows/farm_ai/segmentation/segment_s2.yaml @@ -0,0 +1,55 @@ +name: segment_s2 +sources: + user_input: + - preprocess_s2.user_input + - s2_segmentation.input_geometry + prompts: + - s2_segmentation.input_prompts +sinks: + s2_raster: preprocess_s2.raster + segmentation_mask: s2_segmentation.segmentation_mask +parameters: + model_type: vit_b + spatial_overlap: 0.5 + pc_key: +tasks: + preprocess_s2: + workflow: data_ingestion/sentinel2/preprocess_s2 + parameters: + pc_key: "@from(pc_key)" + s2_segmentation: + workflow: ml/segment_anything/prompt_segmentation + parameters: + model_type: "@from(model_type)" + band_names: ["R", "G", "B"] + band_scaling: null + band_offset: null + spatial_overlap: "@from(spatial_overlap)" +edges: + - origin: preprocess_s2.raster + destination: + - s2_segmentation.input_raster +description: + short_description: >- + Downloads Sentinel-2 imagery and runs Segment Anything Model (SAM) over them with points and/or + bounding boxes as prompts. + long_description: >- + The workflow retrieves the relevant Sentinel-2 products with the Planetary Computer (PC) API, + and splits the input rasters into chips of 1024x1024 pixels with an overlap defined by + `spatial_overlap`. Chips intersecting with prompts are processed by SAM's image encoder, + followed by prompt encoder and mask decoder. Before running the workflow, make sure the model + has been imported into the cluster by running `scripts/export_prompt_segmentation_models.py`. + The script will download the desired model weights from SAM repository, export the image encoder + and mask decoder to ONNX format, and add them to the cluster. For more information, refer to the + [FarmVibes.AI + troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) + page in the documentation. + sources: + user_input: Time range and geometry of interest. + prompts: >- + ExternalReferences to the point and/or bounding box prompts. These are GeoJSON with + coordinates, label (foreground/background) and prompt id (in case, the raster contains + multiple entities that should be segmented in a single workflow run). + sinks: + s2_raster: Sentinel-2 rasters used as input for the segmentation. + segmentation_mask: Output segmentation masks. diff --git a/workflows/farm_ai/sensor/optimal_locations.yaml b/workflows/farm_ai/sensor/optimal_locations.yaml new file mode 100644 index 00000000..c104deff --- /dev/null +++ b/workflows/farm_ai/sensor/optimal_locations.yaml @@ -0,0 +1,58 @@ +name: optimal_locations +sources: + user_input: + - find_samples.user_input + input_raster: + - compute_index.raster +sinks: + result: find_samples.locations +parameters: + n_clusters: + sieve_size: + index: +tasks: + compute_index: + workflow: data_processing/index/index + parameters: + index: "@from(index)" + find_samples: + op: find_soil_sample_locations + op_dir: minimum_samples + parameters: + n_clusters: "@from(n_clusters)" + sieve_size: "@from(sieve_size)" +edges: + - origin: compute_index.index_raster + destination: + - find_samples.raster +description: + short_description: Identify optimal locations by performing clustering operation using Gaussian Mixture model on computed raster indices. + long_description: |- + The clustering operation separate computed raster indices values into n groups of equal variance, each group assigned a location and that location is considered as a + optimal locations. The sample locations generated provide information of latitude and longitude. The optimal location can be utilized to install sensors and collect + soil information. The index parameter used as input to run the computed index workflow internally using the input raster submitted. The selection of index parameter varies + based on requirement. The workflow supports all the indices supported by spyndex library (https://github.com/awesome-spectral-indices/awesome-spectral-indices#vegetation). + Below provided various indices that are used to identify optimal locations and generated a nutrients heatmap. + Enhanced Vegetation Index (EVI) - EVI is designed to minimize the influence of soil brightness and atmospheric conditions on vegetation assessment. It is calculated + using the red, blue, and near-infrared (NIR) bands. EVI is particularly useful for monitoring vegetation in regions with high canopy cover and in areas where atmospheric + interference is significant. This indices also used in notebook (notebooks/heatmaps/nutrients_using_neighbors.ipynb) that derive nutrient information for Carbon, Nitrogen, + and Phosphorus. + Photochemical Reflectance Index (PRI) - It is a vegetation index used to assess the light-use efficiency of plants in terms of photosynthesis and their response to + changes in light conditions, particularly variations in the blue and red parts of the electromagnetic spectrum. This index also used in notebook + (notebooks/heatmaps/nutrients_using_neighbors.ipynb) that derive nutrient information for pH. + The number of sample locations generated depend on input parameters submitted. Tune n_clusters and sieve_size parameters to generate more or less location data points. + For a 100 acre farm, + - 20 sample locations are generated using n_clusters=5 and sieve_size=10. + - 30 sample locations are generated using n_clusters=5 and sieve_size=20. + - 80 sample locations are generated using n_clusters=5 and sieve_size=5. + - 130 sample locations are generated using n_clusters=8 and sieve_size=5. + + sources: + input_raster: List of computed raster indices generated using the sentinel 2 satellite imagery. + user_input: DataVibe with time range information. + sinks: + result: Zip file containing sample locations in a shape file (.shp) format. + parameters: + n_clusters: number of clusters used to generate sample locations. + sieve_size: Group the nearest neighbor pixel values. + index: Index used to generate sample locations. diff --git a/workflows/farm_ai/water/irrigation_classification.yaml b/workflows/farm_ai/water/irrigation_classification.yaml new file mode 100644 index 00000000..462c230c --- /dev/null +++ b/workflows/farm_ai/water/irrigation_classification.yaml @@ -0,0 +1,140 @@ +name: irrigation_classification +sources: + user_input: + - landsat.user_input + - merge_geom_time_range.time_range +sinks: + landsat_bands: landsat.raster + ndvi: ndvi.index + cloud_water_mask: cloud_water_mask.cloud_water_mask + dem: match_dem.match_rasters + evaporative_fraction: evaporative_fraction.evaporative_fraction + ngi: ngi_egi_layers.ngi + egi: ngi_egi_layers.egi + lst: ngi_egi_layers.lst + irrigation_probability: irrigation_probability.irrigation_probability +parameters: + ndvi_threshold: 0.0 + ndvi_hot_threshold: 0.02 + coef_ngi: -0.50604148 + coef_egi: -0.93103156 + coef_lst: -0.14612046 + intercept: 1.99036986 + dem_resolution: 30 + dem_provider: CopernicusDEM30 + pc_key: +tasks: + landsat: + workflow: data_ingestion/landsat/preprocess_landsat + parameters: + pc_key: "@from(pc_key)" + ndvi: + op: compute_index + merge_geom: + op: merge_geometries + merge_geom_time_range: + op: merge_geometry_and_time_range + cloud_water_mask: + op: compute_cloud_water_mask + parameters: + ndvi_threshold: "@from(ndvi_threshold)" + dem: + workflow: data_ingestion/dem/download_dem + parameters: + resolution: "@from(dem_resolution)" + provider: "@from(dem_provider)" + match_dem: + workflow: data_processing/merge/match_merge_to_ref + evaporative_fraction: + op: compute_evaporative_fraction + parameters: + ndvi_hot_threshold: "@from(ndvi_hot_threshold)" + ngi_egi_layers: + op: compute_ngi_egi_layers + irrigation_probability: + op: compute_irrigation_probability + parameters: + coef_ngi: "@from(coef_ngi)" + coef_egi: "@from(coef_egi)" + coef_lst: "@from(coef_lst)" + intercept: "@from(intercept)" +edges: + - origin: landsat.raster + destination: + - merge_geom.items + - ndvi.raster + - cloud_water_mask.landsat_raster + - match_dem.ref_rasters + - evaporative_fraction.landsat_raster + - ngi_egi_layers.landsat_raster + - irrigation_probability.landsat_raster + - origin: ndvi.index + destination: + - cloud_water_mask.ndvi_raster + - evaporative_fraction.ndvi_raster + - ngi_egi_layers.ndvi_raster + - origin: merge_geom.merged + destination: + - merge_geom_time_range.geometry + - origin: merge_geom_time_range.merged + destination: + - dem.user_input + - origin: dem.raster + destination: + - match_dem.rasters + - origin: match_dem.match_rasters + destination: + - evaporative_fraction.dem_raster + - origin: evaporative_fraction.evaporative_fraction + destination: + - ngi_egi_layers.evaporative_fraction + - origin: cloud_water_mask.cloud_water_mask + destination: + - evaporative_fraction.cloud_water_mask_raster + - ngi_egi_layers.cloud_water_mask_raster + - irrigation_probability.cloud_water_mask_raster + - origin: ngi_egi_layers.ngi + destination: + - irrigation_probability.ngi + - origin: ngi_egi_layers.egi + destination: + - irrigation_probability.egi + - origin: ngi_egi_layers.lst + destination: + - irrigation_probability.lst +description: + short_description: Develops 30m pixel-wise irrigation probability map. + long_description: + The workflow retrieves LANDSAT 8 Surface Reflectance (SR) image tile and land surface elevation DEM data, + and runs four ops to compute irrigation probability map. The land surface elevation data source are 10m + USGS DEM, or 30m Copernicus DEM; but Copernicus DEM is set as the default source in the workflow. Landsat Op + compute_cloud_water_mask utilizes the qa_pixel band of image and NDVI index to generate mask of cloud cover and + water bodies. Op compute_evaporative_fraction utilizes NDVI index, land surface temperature (LST), green and + near infra-red bands, and DEM data to estimate evaporative flux (ETRF). Op compute_ngi_egi_layers utilizes NDVI index, + ETRF estimates, green and near infra-red bands to generate NGI and EGI irrigation layers. Lastly op + compute_irrigation_probability uses NGI and EGI layers along with LST band; and applies optimized logistic regression + model to compute 30m pixel-wise irrigation probability map. The coeficients and intercept of the model were obtained + beforehand using as ground-truth data from Nebraska state, USA for the year 2015. + sources: + user_input: Time range and geometry of interest. + sinks: + landsat_bands: Raster of Landsat bands. + ndvi: NDVI raster. + cloud_water_mask: Mask of cloud cover and water bodies. + dem: DEM raster. Options are CopernicusDEM30 and USGS3DEP. + evaporative_fraction: Raster with estimates of evaporative fraction flux. + ngi: Raster of NGI irrigation layer. + egi: Raster of EGI irrigation layer. + lst: Raster of land surface temperature. + irrigation_probability: Raster of irrigation probability map in 30m resolution. + parameters: + ndvi_threshold: NDVI index threshold value for masking water bodies. + ndvi_hot_threshold: Maximum NDVI index threshold value for selecting hot pixel. + coef_ngi: Coefficient of NGI layer in optimized logistic regression model. + coef_egi: Coefficient of EGI layer in optimized logistic regression model. + coef_lst: Coefficient of land surface temperature band in optimized logistic regression model. + intercept: Intercept value of optimized logistic regression model. + pc_key: Optional Planetary Computer API key. + + + diff --git a/workflows/forest_ai/deforestation/alos_trend_detection.yaml b/workflows/forest_ai/deforestation/alos_trend_detection.yaml new file mode 100644 index 00000000..5aaa6d92 --- /dev/null +++ b/workflows/forest_ai/deforestation/alos_trend_detection.yaml @@ -0,0 +1,61 @@ +name: alos_trend_detection +sources: + user_input: + - alos_forest_extent_download_merge.user_input + - ordinal_trend_detection.input_geometry +sinks: + merged_raster: alos_forest_extent_download_merge.merged_raster + categorical_raster: alos_forest_extent_download_merge.categorical_raster + recoded_raster: ordinal_trend_detection.recoded_raster + clipped_raster: ordinal_trend_detection.clipped_raster + trend_test_result: ordinal_trend_detection.trend_test_result +parameters: + pc_key: + from_values: [4, 3, 0, 2, 1] + to_values: [0, 0, 0, 1, 1] +tasks: + alos_forest_extent_download_merge: + workflow: data_ingestion/alos/alos_forest_extent_download_merge + parameters: + pc_key: "@from(pc_key)" + ordinal_trend_detection: + workflow: forest_ai/deforestation/ordinal_trend_detection + parameters: + from_values: "@from(from_values)" + to_values: "@from(to_values)" +edges: + - origin: alos_forest_extent_download_merge.merged_raster + destination: + - ordinal_trend_detection.raster +description: + short_description: + Detects increase/decrease trends in forest pixel levels over the user-input geometry and time range for the ALOS forest map. + long_description: + This workflow combines the alos_forest_extent_download_merge and + ordinal_trend_detection workflows to detect increase/decrease trends in the + forest pixel levels over the user-provided geometry and time range for the + ALOS forest map. The ALOS PALSAR 2.1 Forest/Non-Forest Maps are downloaded + in the alos_forest_extent_download_merge workflow. Then the + ordinal_trend_detection workflow clips the ordinal raster to the + user-provided geometry and time range and determines if there is an + increasing or decreasing trend in the forest pixel levels over them. + alos_trend_detection uses the Cochran-Armitage test to detect trends in the + forest levels over the years. The null hypothesis is that there is no trend + in the pixel levels over the list of rasters. The alternative hypothesis is + that there is a trend in the forest pixel levels over the list of rasters + (one for each year). It returns a p-value and a z-score. If the p-value is + less than some significance level, the null hypothesis is rejected and the + alternative hypothesis is accepted. If the z-score is positive, the trend is + increasing. If the z-score is negative, the trend is decreasing. + sources: + user_input: Time range and geometry of interest. + sinks: + merged_raster: Merged raster of the ALOS PALSAR 2.1 Forest/Non-Forest Map for the user-provided geometry and time range. + categorical_raster: Categorical raster of the ALOS PALSAR 2.1 Forest/Non-Forest Map for the user-provided geometry and time range before the merge operation. + recoded_raster: Recoded raster of the ALOS PALSAR 2.1 Forest/Non-Forest Map for the user-provided geometry and time range. + clipped_raster: Clipped ordinal raster for the user-provided geometry and time range. + trend_test_result: Cochran-armitage test results composed of p-value and z-score. + parameters: + pc_key: Planetary Computer API key. + from_values: Values to recode from. + to_values: Values to recode to. \ No newline at end of file diff --git a/workflows/forest_ai/deforestation/ordinal_trend_detection.yaml b/workflows/forest_ai/deforestation/ordinal_trend_detection.yaml new file mode 100644 index 00000000..821fee96 --- /dev/null +++ b/workflows/forest_ai/deforestation/ordinal_trend_detection.yaml @@ -0,0 +1,62 @@ +name: ordinal_trend_detection +sources: + raster: + - recode_raster.raster + input_geometry: + - clip.input_geometry +sinks: + recoded_raster: recode_raster.recoded_raster + trend_test_result: trend_test.ordinal_trend_result + clipped_raster: clip.clipped_raster +parameters: + from_values: [] + to_values: [] +tasks: + recode_raster: + op: recode_raster + parameters: + from_values: "@from(from_values)" + to_values: "@from(to_values)" + clip: + workflow: data_processing/clip/clip + compute_pixel_count: + op: compute_pixel_count + trend_test: + op: ordinal_trend_test +edges: + - origin: recode_raster.recoded_raster + destination: + - clip.raster + - origin: clip.clipped_raster + destination: + - compute_pixel_count.raster + - origin: compute_pixel_count.pixel_count + destination: + - trend_test.pixel_count +description: + short_description: + Detects increase/decrease trends in the pixel levels over the user-input geometry and time range. + long_description: + This workflow prepares rasters to perform the Cochran-Armitage trend test + over a user-provided geometry and time range. Initially, it recodes the + input raster according to the 'from_values' and 'to_values' parameters. For + example, if the original raster has values (2, 1, 3, 4, 5) and the default + values of 'from_values' and 'to_values' are respectively [1, 2, 3, 4, 5] and + [6, 7, 8, 9, 10], the recoded raster will have values (7, 6, 8, 9, 10). The + workflow then clips the user-provided geometries and computes an ordinal + raster. It also counts each unique pixel present in the recoded rasters to + create a pixel frequency contingency table. This data is used to determine + if there is an increasing or decreasing trend in pixel levels. The + Cochran-Armitage test is a non-parametric test used to ascertain this trend. + The null hypothesis assumes no trend in pixel levels, while the alternative + hypothesis assumes a trend exists. The test returns a p-value and a z-score. + If the p-value is less than some significance level, the null hypothesis is + rejected in favor of the alternative. A positive z-score indicates an + increasing trend, while a negative one indicates a decreasing trend. + sources: + raster: Raster to be processed and tested for trends. + input_geometry: Reference geometry. + sinks: + recoded_raster: Recoded raster for the user-provided geometry and time range. + trend_test_result: Cochran-armitage test results composed of p-value and z-score. + clipped_raster: Clipped ordinal raster for the user-provided geometry and time range. \ No newline at end of file diff --git a/workflows/helloworld.yaml b/workflows/helloworld.yaml new file mode 100644 index 00000000..eef79e9d --- /dev/null +++ b/workflows/helloworld.yaml @@ -0,0 +1,18 @@ +name: helloworld +sources: + user_input: + - hello.user_input +sinks: + raster: hello.raster +tasks: + hello: + op: helloworld +description: + short_description: Hello world! + long_description: + Small test workflow that generates an image of the Earth with countries that intersect with the + input geometry highlighted in orange. + sources: + user_input: Input geometry. + sinks: + raster: Raster with highlighted countries. diff --git a/workflows/ml/crop_segmentation.yaml b/workflows/ml/crop_segmentation.yaml new file mode 100644 index 00000000..58319f6a --- /dev/null +++ b/workflows/ml/crop_segmentation.yaml @@ -0,0 +1,58 @@ +name: crop_segmentation +sources: + user_input: + - spaceeye.user_input +sinks: + segmentation: inference.output_raster +parameters: + pc_key: + model_file: + model_bands: 37 +tasks: + spaceeye: + workflow: data_ingestion/spaceeye/spaceeye_interpolation + parameters: + pc_key: "@from(pc_key)" + ndvi: + workflow: data_processing/index/index + parameters: + index: ndvi + group: + op: select_sequence_from_list + op_dir: select_sequence + parameters: + num: "@from(model_bands)" + criterion: regular + inference: + op: compute_onnx_from_sequence + op_dir: compute_onnx + parameters: + model_file: "@from(model_file)" + window_size: 256 + overlap: .25 + num_workers: 4 +edges: + - origin: spaceeye.raster + destination: + - ndvi.raster + - origin: ndvi.index_raster + destination: + - group.rasters + - origin: group.sequence + destination: + - inference.input_raster +description: + short_description: + Runs a crop segmentation model based on NDVI from SpaceEye imagery along the year. + long_description: + The workflow generates SpaceEye cloud-free data for the input region and time range and computes + NDVI over those. NDVI values sampled regularly along the year are stacked as bands and used as + input to the crop segmentation model. + sources: + user_input: Time range and geometry of interest. + sinks: + segmentation: Crop segmentation map at 10m resolution. + parameters: + pc_key: Optional Planetary Computer API key. + model_file: Path to the ONNX file containing the model architecture and weights. + model_bands: Number of NDVI bands to stack as the model input. diff --git a/workflows/ml/dataset_generation/datagen_crop_segmentation.yaml b/workflows/ml/dataset_generation/datagen_crop_segmentation.yaml new file mode 100644 index 00000000..d92da847 --- /dev/null +++ b/workflows/ml/dataset_generation/datagen_crop_segmentation.yaml @@ -0,0 +1,38 @@ +name: datagen_crop_segmentation +sources: + user_input: + - spaceeye.user_input + - cdl.user_input +sinks: + ndvi: ndvi.index_raster + cdl: cdl.raster +parameters: + pc_key: +tasks: + spaceeye: + workflow: data_ingestion/spaceeye/spaceeye_interpolation + parameters: + pc_key: "@from(pc_key)" + ndvi: + workflow: data_processing/index/index + parameters: + index: ndvi + cdl: + workflow: data_ingestion/cdl/download_cdl +edges: + - origin: spaceeye.raster + destination: + - ndvi.raster +description: + short_description: + Generates a dataset for crop segmentation, based on NDVI raster and Crop Data Layer (CDL) maps. + long_description: + The workflow generates SpaceEye cloud-free data for the input region and time range and computes + NDVI over those. It also downloads CDL maps for the years comprised in the time range. + sources: + user_input: Time range and geometry of interest. + sinks: + ndvi: NDVI rasters. + cdl: CDL map for the years comprised in the input time range. + parameters: + pc_key: Optional Planetary Computer API key. diff --git a/workflows/ml/driveway_detection.yaml b/workflows/ml/driveway_detection.yaml new file mode 100644 index 00000000..1b45f18f --- /dev/null +++ b/workflows/ml/driveway_detection.yaml @@ -0,0 +1,62 @@ +name: driveway_detection +sources: + input_raster: + - segment.input_raster + - detect.input_raster + - osm.user_input + property_boundaries: + - detect.property_boundaries +sinks: + properties: detect.properties_with_driveways + driveways: detect.driveways +parameters: + min_region_area: + ndvi_thr: + car_size: + num_kernels: + car_thr: +tasks: + segment: + op: segment_driveway + osm: + workflow: data_ingestion/osm_road_geometries + parameters: + network_type: drive_service + buffer_size: 100 + detect: + op: detect_driveway + parameters: + min_region_area: "@from(min_region_area)" + ndvi_thr: "@from(ndvi_thr)" + car_size: "@from(car_size)" + num_kernels: "@from(num_kernels)" + car_thr: "@from(car_thr)" +edges: + - origin: segment.segmentation_raster + destination: + - detect.segmentation_raster + - origin: osm.roads + destination: + - detect.roads +description: + short_description: Detects driveways in front of houses. + long_description: + The workflow downloads road geometry from Open Street Maps and segments the front of houses in + the input image using a machine learning model. It then uses the input image, segmentation map, + road geometry, and input property boundaries to detect the presence of driveways in the front of + each house. + sources: + input_raster: Aerial imagery of the region of interest with RBG + NIR bands. + property_boundaries: Property boundary information for the region of interest. + sinks: + properties: Boundaries of properties that contain a driveway. + driveways: Regions of each property boundary where a driveway was detected. + parameters: + min_region_area: + Minimum contiguous region that will be considered as a potential driveway, in meters. + ndvi_thr: Only areas under this NDVI threshold will be considered for driveways. + car_size: Expected size of a car, in pixels, defined as [height, width]. + num_kernels: Number of rotated kernels to try to fit a car inside a potential driveway region. + car_thr: + Ratio of pixels of a kernel that have to be inside a region in order to consider it a parkable + spot. diff --git a/workflows/ml/segment_anything/automatic_segmentation.yaml b/workflows/ml/segment_anything/automatic_segmentation.yaml new file mode 100644 index 00000000..54c76a56 --- /dev/null +++ b/workflows/ml/segment_anything/automatic_segmentation.yaml @@ -0,0 +1,82 @@ +name: automatic_segmentation +sources: + input_raster: + - clip.raster + input_geometry: + - clip.input_geometry +sinks: + segmentation_mask: combine_masks.output_mask +parameters: + model_type: vit_b + band_names: null + band_scaling: null + band_offset: null + spatial_overlap: 0.5 + points_per_side: 16 + n_crop_layers: 0 + crop_overlap_ratio: 0.0 + crop_n_points_downscale_factor: 1 + pred_iou_thresh: 0.88 + stability_score_thresh: 0.95 + stability_score_offset: 1.0 + points_per_batch: 16 + num_workers: 0 + in_memory: True + chip_nms_thr: 0.7 + mask_nms_thr: 0.5 +tasks: + clip: + workflow: data_processing/clip/clip + sam_inference: + op: automatic_segmentation + op_dir: segment_anything + parameters: + model_type: "@from(model_type)" + band_names: "@from(band_names)" + band_scaling: "@from(band_scaling)" + band_offset: "@from(band_offset)" + spatial_overlap: "@from(spatial_overlap)" + points_per_side: "@from(points_per_side)" + n_crop_layers: "@from(n_crop_layers)" + crop_overlap_ratio: "@from(crop_overlap_ratio)" + crop_n_points_downscale_factor: "@from(crop_n_points_downscale_factor)" + pred_iou_thresh: "@from(pred_iou_thresh)" + stability_score_thresh: "@from(stability_score_thresh)" + stability_score_offset: "@from(stability_score_offset)" + points_per_batch: "@from(points_per_batch)" + num_workers: "@from(num_workers)" + in_memory: "@from(in_memory)" + combine_masks: + op: combine_sam_masks + op_dir: segment_anything_combine_masks + parameters: + chip_nms_thr: "@from(chip_nms_thr)" + mask_nms_thr: "@from(mask_nms_thr)" +edges: + - origin: clip.clipped_raster + destination: + - sam_inference.input_raster + - origin: sam_inference.segmented_chips + destination: + - combine_masks.input_masks +description: + short_description: >- + Runs a Segment Anything Model (SAM) automatic segmentation over input rasters. + long_description: >- + The workflow splits the input rasters into chips of 1024x1024 pixels with an overlap + defined by `spatial_overlap`. Each chip is processed by SAM's image encoder, and a point grid + is defined within each chip, with each point being used as a prompt for the segmentation. + Each point is used to generate a mask, and the masks are combined using multiple non-maximal + suppression steps to generate the final segmentation mask. Before running the workflow, make + sure the model has been imported into the cluster by running + `scripts/export_prompt_segmentation_models.py`. The script will download the desired model + weights from SAM repository, export the image encoder and mask decoder to ONNX format, and add + them to the cluster. For more information, refer to the + [FarmVibes.AI + troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) + page in the documentation. + sources: + input_raster: Rasters used as input for the segmentation. + input_geometry: Geometry of interest within the raster for the segmentation. + sinks: + segmentation_mask: Output segmentation masks. diff --git a/workflows/ml/segment_anything/prompt_segmentation.yaml b/workflows/ml/segment_anything/prompt_segmentation.yaml new file mode 100644 index 00000000..03d587e8 --- /dev/null +++ b/workflows/ml/segment_anything/prompt_segmentation.yaml @@ -0,0 +1,61 @@ +name: prompt_segmentation +sources: + input_raster: + - clip.raster + input_geometry: + - clip.input_geometry + input_prompts: + - ingest_points.user_input +sinks: + segmentation_mask: sam_inference.segmentation_mask +parameters: + model_type: vit_b + band_names: null + band_scaling: null + band_offset: null + spatial_overlap: 0.5 +tasks: + ingest_points: + workflow: data_ingestion/user_data/ingest_geometry + clip: + workflow: data_processing/clip/clip + sam_inference: + op: prompt_segmentation + op_dir: segment_anything + parameters: + model_type: "@from(model_type)" + band_names: "@from(band_names)" + band_scaling: "@from(band_scaling)" + band_offset: "@from(band_offset)" + spatial_overlap: "@from(spatial_overlap)" +edges: + - origin: ingest_points.geometry + destination: + - sam_inference.input_prompts + - origin: clip.clipped_raster + destination: + - sam_inference.input_raster +description: + short_description: >- + Runs Segment Anything Model (SAM) over input rasters with points and/or bounding boxes + as prompts. + long_description: >- + The workflow splits the input input rasters into chips of 1024x1024 pixels with an overlap + defined by `spatial_overlap`. Chips intersecting with prompts are processed by SAM's image + encoder, followed by prompt encoder and mask decoder. Before running the workflow, make sure + the model has been imported into the cluster by running + `scripts/export_prompt_segmentation_models.py`. The script will download the desired model + weights from SAM repository, export the image encoder and mask decoder to ONNX format, and add + them to the cluster. For more information, refer to the + [FarmVibes.AI + troubleshooting](https://microsoft.github.io/farmvibes-ai/docfiles/markdown/TROUBLESHOOTING.html) + page in the documentation. + sources: + input_geometry: Geometry of interest within the raster for the segmentation. + input_raster: Rasters used as input for the segmentation. + input_prompts: >- + ExternalReferences to the point and/or bounding box prompts. These are GeoJSON with + coordinates, label (foreground/background) and prompt id (in case, the raster contains + multiple entities that should be segmented in a single workflow run). + sinks: + segmentation_mask: Output segmentation masks. diff --git a/workflows/ml/spectral_extension.yaml b/workflows/ml/spectral_extension.yaml new file mode 100644 index 00000000..d80eca76 --- /dev/null +++ b/workflows/ml/spectral_extension.yaml @@ -0,0 +1,68 @@ +name: spectral_extension +sources: + raster: + - ingest_raster.input_ref +sinks: + s2_rasters: s2.raster + matched_raster: match.output_raster + extended_raster: compute_onnx.output_raster +parameters: + resampling: nearest +tasks: + ingest_raster: + op: download_raster_from_ref + op_dir: download_from_ref + s2: + workflow: data_ingestion/sentinel2/preprocess_s2 + select: + op: select_sequence_from_list + op_dir: select_sequence + parameters: + num: 1 + criterion: first + match: + op: match_raster_to_ref + parameters: + resampling: "@from(resampling)" + sequence: + op: create_raster_sequence + compute_onnx: + op: compute_onnx_from_sequence + op_dir: compute_onnx + parameters: + model_file: /opt/terravibes/ops/resources/spectral_extension_model/spectral_extension.onnx + nodata: 0 +edges: + - origin: ingest_raster.downloaded + destination: + - s2.user_input + - match.ref_raster + - sequence.rasters1 + - origin: s2.raster + destination: + - select.rasters + - origin: select.sequence + destination: + - match.raster + - origin: match.output_raster + destination: + - sequence.rasters2 + - origin: sequence.sequence + destination: + - compute_onnx.input_raster +description: + short_description: Generates high-resolution Sentinel-2 bands by combining UAV and Sentinel-2 data. + long_description: + The workflow will download a user-specified UAV raster, download and resample the corresponding + Sentinel-2 raster, and run the spectral extension model to generate 8 Sentinel-2 bands + at 0.125m resolution. + The input raster should contain three bands (RGB) at 0.125m/px resolution in the range 0-255. + sources: + raster: + The UAV input raster with three bands (red, green, blue, in this order) at 0.125m resolution. + sinks: + s2_rasters: The original Sentinel-2 raster used in the spectral extension. + matched_raster: Sentinel-2 data resampled to the UAV raster's grid (low-resolution). + extended_raster: The generated raster, containing 8 of the 12 Sentinel-2 bands. + parameters: + resampling: Resampling to use when reprojecting the Sentinel-2 data into the UAV raster's grid. \ No newline at end of file From 75ef72cfe2ac61c643576c37dce673cfee211375 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roberto=20Estev=C3=A3o?= Date: Tue, 16 Jul 2024 14:21:03 +0000 Subject: [PATCH 02/13] Add workflow for linting and testing (#179) Add workflow for linting, static type checking, and testing. - Check each package individually - Check ops and integration tests - Check docstrings for `vibe_core` --- .github/workflows/cluster-build.yml | 11 ++- .github/workflows/lint-test.yml | 129 ++++++++++++++++++++++++++++ scripts/export_sam_models.py | 11 +++ 3 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/lint-test.yml diff --git a/.github/workflows/cluster-build.yml b/.github/workflows/cluster-build.yml index 5ab517e8..4b84cf20 100644 --- a/.github/workflows/cluster-build.yml +++ b/.github/workflows/cluster-build.yml @@ -1,6 +1,15 @@ name: Build FarmVibes.AI cluster run-name: Cluster build and helloworld test -on: [push, pull_request, workflow_dispatch] +on: + push: + branches: + - dev + - main + pull_request: + branches: + - dev + - main + workflow_dispatch: env: FARMVIBES_AI_SKIP_DOCKER_FREE_SPACE_CHECK: yes jobs: diff --git a/.github/workflows/lint-test.yml b/.github/workflows/lint-test.yml new file mode 100644 index 00000000..0698d743 --- /dev/null +++ b/.github/workflows/lint-test.yml @@ -0,0 +1,129 @@ +name: Linting and testing +on: + push: + branches: + - dev + - main + pull_request: + branches: + - dev + - main + workflow_dispatch: + +env: + PYRIGHT_PYTHON_FORCE_VERSION: 1.1.268 + +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.11 + uses: actions/setup-python@v3 + with: + python-version: '3.11' + - name: Install dependencies + run: | + pip install wheel setuptools + - name: Build packages + run: | + for pkg in vibe_core vibe_common vibe_agent vibe_server vibe_dev; do cd src/$pkg && python setup.py bdist_wheel --dist-dir ../../dist; cd ../../; done + - name: Save packages + uses: actions/upload-artifact@v4 + with: + name: packages + path: dist + test: + needs: build + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + package-to-test: [vibe_core, vibe_common, vibe_server, vibe_agent] + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.11 + uses: actions/setup-python@v3 + with: + python-version: '3.11' + - name: Retrieve packages + uses: actions/download-artifact@v4 + with: + name: packages + path: dist + - name: Install dependencies + run: | + pip install pyright ruff + - name: Install package + run: | + pip install ${{ matrix.package-to-test }}[test] --find-links dist + - name: Lint with ruff + run: | + ruff check ./src/${{ matrix.package-to-test }} --config ./.ruff.toml + - name: Type checking with pyright + run: | + pyright ./src/${{ matrix.package-to-test }} + - name: Test with pytest + run: | + pip install vibe_dev --find-links dist + pytest ./src/${{ matrix.package-to-test}} -v --junitxml=junit/test-results.xml --cov=. --cov-report=xml + + ops-test: + runs-on: ubuntu-latest + container: + image: mcr.microsoft.com/farmai/terravibes/worker-base:12380 + steps: + - uses: actions/checkout@v4 + - name: Install dependencies + run: | + pip install pyright ruff + - name: Setup op resources + run: | + mkdir -p /opt/terravibes/ops + ln -sf $(pwd)/op_resources /opt/terravibes/ops/resources + mkdir /app + ln -sf $(pwd)/ops /app/ops + ln -sf $(pwd)/workflows /app/workflows + - name: Install packages + run: | + pip install ./src/vibe_core + pip install ./src/vibe_common + pip install ./src/vibe_agent + pip install ./src/vibe_server + pip install ./src/vibe_lib + pip install ./src/vibe_dev + - name: Linting ops + run: | + ruff check ./ops --config ./.ruff.toml + - name: Type checking ops + run: | + pyright ./ops + - name: Get SAM model + run: | + pip install git+https://github.com/facebookresearch/segment-anything.git + mkdir -p /mnt/onnx_resources + python -c "from scripts.export_sam_models import dev; dev()" + - name: Run integration tests + run: | + pytest ./src/vibe_lib ./ops ./src/tests -v --durations=0 --full-trace --junitxml=test-output.xml + check-docstrings: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.11 + uses: actions/setup-python@v3 + with: + python-version: '3.11' + - name: Install ruff + run: | + pip install ruff + - name: "Check docstrings for vibe_core" + run: | + ruff check --select D,D401 --ignore D105 --force-exclude --exclude src/vibe_core/vibe_core/farmvibes_ai_hello_world.py --config "lint.pydocstyle.convention = 'google'" src/vibe_core/vibe_core/*.py + - name: "Check docstrings for vibe_core/data" + run: | + ruff check --select D,D401 --ignore D105 --config "lint.pydocstyle.convention = 'google'" src/vibe_core/vibe_core/data/*.py \ No newline at end of file diff --git a/scripts/export_sam_models.py b/scripts/export_sam_models.py index dbd8f26c..2f2aaf4d 100644 --- a/scripts/export_sam_models.py +++ b/scripts/export_sam_models.py @@ -263,5 +263,16 @@ def main(): add_to_cluster(exported_paths, args.cluster) + +def dev(): + model_type = "vit_b" + out_path = "/mnt/onnx_resources/" + with TemporaryDirectory() as tmp_dir: + model_url = MODELS[model_type].url + downloaded_path = download_file(model_url, os.path.join(tmp_dir, f"{model_type}.pth")) + export_model(model_type, downloaded_path, out_path) + + + if __name__ == "__main__": main() From b711b42171f85bb824d98c234c6fd2eaa5898c0b Mon Sep 17 00:00:00 2001 From: Naga Bilwanth Gangarapu <82965480+v-ngangarapu@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:13:56 -0700 Subject: [PATCH 03/13] Micro climate prediction using Neighbors (DeepMC) (#185) This code changes are enhancements for deepmc. It help to find the weather forecast for stations having no historical data or missing sensor data. It will add new model created using Pytorch Graphical Neural Network (GNN). The code changes also having enhancements to existing deepmc scripts that add datetime in preprocess output. It help to find data belong to which date while performing GNN model training. Co-authored-by: Naga Bilwanth Gangarapu --- notebooks/deepmc/mc_forecast.ipynb | 556 +++++++-------- notebooks/deepmc/notebook_lib/forecast.py | 69 +- notebooks/deepmc/notebook_lib/modules.py | 4 +- notebooks/deepmc/notebook_lib/post_models.py | 34 - notebooks/deepmc/notebook_lib/train.py | 196 +++++- .../notebook_lib/transformer_models_ts.py | 367 ---------- notebooks/deepmc/notebook_lib/utils.py | 104 --- .../deepmc_neighbors_env.yaml | 19 + notebooks/deepmc_neighbors/gnn_forecast.ipynb | 643 ++++++++++++++++++ notebooks/deepmc_neighbors/graph.svg | 1 + .../notebook_lib/base_dataset.py | 318 +++++++++ .../notebook_lib/base_deepmc.py | 47 ++ .../notebook_lib/base_model.py | 154 +++++ .../notebook_lib/base_modules.py | 50 ++ .../notebook_lib/data_utils.py | 241 +++++++ .../notebook_lib/embeddings.py | 235 +++++++ .../notebook_lib/post_deepmc.py | 271 ++++++++ .../notebook_lib/post_deepmc_inference.py | 268 ++++++++ .../deepmc_neighbors/notebook_lib/schema.py | 23 + .../deepmc_neighbors/notebook_lib/train.py | 516 ++++++++++++++ notebooks/deepmc_neighbors/sample_data.csv | 92 +++ .../vibe_notebook/deepmc/__init__.py | 0 .../vibe_notebook/deepmc/forecast.py | 164 +++++ .../vibe_notebook/deepmc}/prediction.py | 29 +- .../vibe_notebook/deepmc}/preprocess.py | 116 +++- .../vibe_notebook/deepmc/utils.py | 200 ++++++ 26 files changed, 3808 insertions(+), 909 deletions(-) delete mode 100644 notebooks/deepmc/notebook_lib/post_models.py delete mode 100644 notebooks/deepmc/notebook_lib/transformer_models_ts.py delete mode 100644 notebooks/deepmc/notebook_lib/utils.py create mode 100644 notebooks/deepmc_neighbors/deepmc_neighbors_env.yaml create mode 100644 notebooks/deepmc_neighbors/gnn_forecast.ipynb create mode 100755 notebooks/deepmc_neighbors/graph.svg create mode 100644 notebooks/deepmc_neighbors/notebook_lib/base_dataset.py create mode 100644 notebooks/deepmc_neighbors/notebook_lib/base_deepmc.py create mode 100644 notebooks/deepmc_neighbors/notebook_lib/base_model.py create mode 100644 notebooks/deepmc_neighbors/notebook_lib/base_modules.py create mode 100644 notebooks/deepmc_neighbors/notebook_lib/data_utils.py create mode 100644 notebooks/deepmc_neighbors/notebook_lib/embeddings.py create mode 100644 notebooks/deepmc_neighbors/notebook_lib/post_deepmc.py create mode 100644 notebooks/deepmc_neighbors/notebook_lib/post_deepmc_inference.py create mode 100644 notebooks/deepmc_neighbors/notebook_lib/schema.py create mode 100644 notebooks/deepmc_neighbors/notebook_lib/train.py create mode 100644 notebooks/deepmc_neighbors/sample_data.csv create mode 100644 src/vibe_notebook/vibe_notebook/deepmc/__init__.py create mode 100644 src/vibe_notebook/vibe_notebook/deepmc/forecast.py rename {notebooks/deepmc/notebook_lib => src/vibe_notebook/vibe_notebook/deepmc}/prediction.py (90%) rename {notebooks/deepmc/notebook_lib => src/vibe_notebook/vibe_notebook/deepmc}/preprocess.py (64%) create mode 100644 src/vibe_notebook/vibe_notebook/deepmc/utils.py diff --git a/notebooks/deepmc/mc_forecast.ipynb b/notebooks/deepmc/mc_forecast.ipynb index d41b43b7..7fbfa798 100755 --- a/notebooks/deepmc/mc_forecast.ipynb +++ b/notebooks/deepmc/mc_forecast.ipynb @@ -15,7 +15,9 @@ "```bash\n", "$ micromamba env create -f ./deepmc_env.yaml\n", "$ micromamba activate deepmc-pytorch\n", - "```\n" + "```\n", + "\n", + "**We currently only support Unix-based systems (Linux and MacOS) for running this notebook.**" ] }, { @@ -55,33 +57,22 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/azureuser/.conda/envs/deepmc-pytorch/lib/python3.8/site-packages/torchvision/io/image.py:11: UserWarning: Failed to load image Python extension: /home/azureuser/.conda/envs/deepmc-pytorch/lib/python3.8/site-packages/torchvision/image.so: undefined symbol: _ZNK3c1010TensorImpl36is_contiguous_nondefault_policy_implENS_12MemoryFormatE\n", - " warn(f\"Failed to load image Python extension: {e}\")\n" - ] - } - ], + "outputs": [], "source": [ - "import pandas as pd\n", - "import numpy as np\n", + "import warnings\n", + "from datetime import datetime\n", "\n", - "from datetime import datetime, timedelta\n", + "import numpy as np\n", + "import pandas as pd\n", "from matplotlib import pyplot as plt\n", - "\n", - "from shapely import geometry\n", - "\n", - "from notebook_lib import utils\n", - "from notebook_lib import prediction\n", "from notebook_lib import train\n", - "from notebook_lib.forecast import Forecast\n", + "from shapely import geometry\n", + "\n", + "from vibe_notebook.deepmc import prediction, utils\n", + "from vibe_notebook.deepmc.forecast import Forecast\n", "\n", - "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, @@ -90,7 +81,7 @@ "metadata": {}, "source": [ "### Workflows\n", - "The notebook utilize below workflows available in farmvibes" + "The notebook utilizes the workflow below, which is available in FarmVibes.AI: " ] }, { @@ -107,9 +98,9 @@ "metadata": {}, "source": [ "### Data\n", - "The notebook utilizing two types of datasets\n", + "The notebook utilizes two types of datasets:\n", "\n", - "1. The historical observations recorded by weather stations\n", + "1. The historical observations recorded by weather stations.\n", "2. The forecast observations downloaded using the [herbie package](https://blaylockbk.github.io/Herbie/_build/html/). This package helps to download recent and archived numerical weather prediction (NWP) model output from different cloud archive sources. Its most popular capability is to download HRRR model data.\n" ] }, @@ -118,7 +109,7 @@ "metadata": {}, "source": [ "### AGWeatherNet\n", - "In this notebook, we utilize historical observations downloaded from AGWeatherNet for a station \\\"Palouse\\\". The data used for training range from May 2020 to June 2022. For more information check [AGWeatherNet documentation](http://weather.wsu.edu/?p=92850&desktop)." + "In this notebook, we utilize historical observations downloaded from AGWeatherNet for the station `Palouse`. The data used for training range from May 2020 to June 2022. For more information check [AGWeatherNet documentation](http://weather.wsu.edu/?p=92850&desktop)." ] }, { @@ -146,8 +137,8 @@ "metadata": {}, "outputs": [], "source": [ - "PREDICT=\"%s\"\n", - "RELEVANT=\"%s\"\n", + "PREDICT = \"%s\"\n", + "RELEVANT = \"%s\"\n", "ROOT_PATH = f\"./data/model_{PREDICT}/\"\n", "DATA_EXPORT_PATH = ROOT_PATH + f\"{STATION_NAME}/{RELEVANT}/train_data.pkl\"" ] @@ -166,7 +157,7 @@ "outputs": [], "source": [ "# weather dataset filtered and model training limited to train features.\n", - "HISTORICAL_MODEL_TRAIN_FEATURES = ['humidity', 'wind_speed', 'temperature']\n", + "HISTORICAL_MODEL_TRAIN_FEATURES = [\"humidity\", \"wind_speed\", \"temperature\"]\n", "\n", "# Historical data aligned using INDEX variable\n", "INDEX = \"date\"" @@ -203,7 +194,7 @@ "outputs": [], "source": [ "# Models trained to predict out features\n", - "OUT_FEATURES = ['wind_speed' , 'temperature']" + "OUT_FEATURES = [\"wind_speed\", \"temperature\"]" ] }, { @@ -211,14 +202,16 @@ "metadata": {}, "source": [ "### Relevant vs Not Relevant\n", - "The notebook support performing micro climate predictions with below approaches. \n", + "The scenario is considered relevant when there is a close match between historical data and forecasts, with minimal discrepancies else it's not relevant.\n", + "\n", + "The notebook supports performing micro climate predictions with the following approaches: \n", "\n", "1. Utilizing both Historical & Forecast observations. This approach is suggested to use if both observations are relevant.

\n", "\n", "2. Utilizing only Historical dataset. This approach is suggested to use if both Historical & Forecast observations are not relevant or Forecast dataset doesn't exist.

\n", "\n", "\n", - "In next cells, demonstrated training & prediction process for both relevant and not relevant scenarios. " + "In next cells, we demonstrate the training and prediction processes for both relevant and non-relevant scenarios. " ] }, { @@ -270,7 +263,7 @@ "1. The index variable is converted to datetime\n", "2. The input data is interpolated to fill the missing values using their neighbors\n", "3. The script focuses on training the model with a 60-minute frequency, hence the data is grouped for this frequency.\n", - "4. The data is scaled using the scikit-learn StandardScalar. For more information check [scikit-learn documentaion](https://github.com/scikit-learn/scikit-learn)" + "4. The data is scaled using the scikit-learn StandardScalar. For more information check [scikit-learn documentation](https://github.com/scikit-learn/scikit-learn)" ] }, { @@ -296,7 +289,7 @@ "metadata": {}, "outputs": [], "source": [ - "historical_dataset = utils.get_csv_data(path=file_path)" + "historical_dataset = utils.get_csv_data(path=file_path, interpolate=False, fill_na=False)" ] }, { @@ -319,7 +312,7 @@ "  - humidity - \"RH:2 m\"
\n", "  - wind speed - The forecast observations of wind speed are derived using data downloaded for u & v components. The algebraic expression used to calculate wind speed is
\n", " $$ \n", - " ws = \\sqrt{u^2 + v^2}\n", + " ws(u, v) = \\sqrt{u^2 + v^2}\n", " $$\n", "   i. u component - \"UGRD:10 m\"
\n", "   ii. v component - \"VGRD:10 m\"" @@ -348,24 +341,14 @@ "start_date = datetime(year=2020, month=5, day=31)\n", "end_date = datetime(year=2022, month=8, day=2)\n", "time_range = (start_date, end_date)\n", - "date_column=\"date\"\n", + "date_column = \"date\"\n", "\n", - "parameters = [{\n", - " \"weather_type\": \"temperature\",\n", - " \"search_text\": \"TMP:2 m\"\n", - " },\n", - " {\n", - " \"weather_type\": \"humidity\",\n", - " \"search_text\": \"RH:2 m\"\n", - " },\n", - " {\n", - " \"weather_type\": \"u-component\",\n", - " \"search_text\": \"UGRD:10 m\"\n", - " },\n", - " {\n", - " \"weather_type\": \"v-component\",\n", - " \"search_text\": \"VGRD:10 m\"\n", - " }]" + "parameters = [\n", + " {\"weather_type\": \"temperature\", \"search_text\": \"TMP:2 m\"},\n", + " {\"weather_type\": \"humidity\", \"search_text\": \"RH:2 m\"},\n", + " {\"weather_type\": \"u-component\", \"search_text\": \"UGRD:10 m\"},\n", + " {\"weather_type\": \"v-component\", \"search_text\": \"VGRD:10 m\"},\n", + "]" ] }, { @@ -373,7 +356,7 @@ "metadata": {}, "source": [ "### Submit Request to Worker\n", - "Download forecast observations by submitting request to worker running in background. If more than one worker instance running in background, it process the request in parallel for each parameter. Workflow execution utilize below parameters while processing requests, this can be overwritten using the parameter argument.\n", + "We download forecast observations by submitting a request to the worker running in the background. If more than one worker instance is running in the background, the request is processed in parallel for each parameter. Workflow execution utilizes the parameters below while processing requests, this can be overwritten using the parameter argument.\n", "\n", "- fxx: [1, 25, 1] # start, stop, step\n", "- search_text: \"TMP:2 m\"\n", @@ -388,24 +371,48 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "'VibeWorkflowRun'(id='d7c0dc6a-339f-45b9-81d1-2fb93d2938f6', name='forecast_temperature', workflow='data_ingestion/weather/herbie_forecast', status='done')\n", - "'VibeWorkflowRun'(id='61d952d1-b068-4c2c-b522-a680efed450f', name='forecast_humidity', workflow='data_ingestion/weather/herbie_forecast', status='running')\n", - "'VibeWorkflowRun'(id='8c95f7ab-6d6b-40e8-a3bd-c12b854d0a7b', name='forecast_u-component', workflow='data_ingestion/weather/herbie_forecast', status='running')\n", - "'VibeWorkflowRun'(id='7490cd70-9731-4cac-ab36-051d3903776a', name='forecast_v-component', workflow='data_ingestion/weather/herbie_forecast', status='running')\n" - ] + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c00b48cb983f4c2184d411cd346f2bdb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
     }
    ],
    "source": [
-    "forecast_ = Forecast(\n",
-    "                workflow_name=HERBIE_DOWNLOAD_WORKFLOW,\n",
-    "                geometry=STATION_GEOMETRY,\n",
-    "                time_range=time_range,\n",
-    "                parameters=parameters,\n",
-    "                )\n",
-    "run_list = forecast_.submit_download_request()"
+    "forecast = Forecast(\n",
+    "    workflow_name=HERBIE_DOWNLOAD_WORKFLOW,\n",
+    "    geometry=STATION_GEOMETRY,\n",
+    "    time_range=time_range,\n",
+    "    parameters=parameters,\n",
+    ")\n",
+    "run_list = forecast.submit_download_request()"
    ]
   },
   {
@@ -413,12 +420,14 @@
    "metadata": {},
    "source": [
     "### Monitor download of Forecast observations\n",
-    "Check the download status and fetch the downloaded data from the cluster running in backend. The execution time of download depends on time_range. The downloaded data undergoes below changes.\n",
+    "Check the download status and fetch the downloaded data from FarmVibes.AI. The execution time of the download depends on the time range. \n",
+    "\n",
+    "The downloaded data undergoes the following changes:\n",
     "\n",
-    "1. concatenate the output of all requests submitted.\n",
+    "1. Concatenate the output of all submitted requests.\n",
     "2. Set index on date column.\n",
-    "3. Does interpolate to derive the missing data.\n",
-    "4. The data downloaded follows the utc timezone. It's required to transform the data to the timezone of historical observations. The historical observations used in this notebook follows pst timezone, hence the data offset by -8 hours."
+    "3. Interpolate to derive the missing data.\n",
+    "4. The data downloaded follows the UTC timezone. It's required to transform the data to the timezone of historical observations. The historical observations used in this notebook follows the PST timezone, hence the data is offset by -8 hours."
    ]
   },
   {
@@ -501,14 +510,14 @@
        "2020-05-30 17:00:00             -2.861307              1.178179  "
       ]
      },
-     "execution_count": 15,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "# transform downloaded data from utc to pst timezone\n",
-    "forecast_dataset = forecast_.get_downloaded_data(run_list=run_list, offset_hours=-8)\n",
+    "forecast_dataset = forecast.get_downloaded_data(run_list=run_list, offset_hours=-8)\n",
     "forecast_dataset.to_csv(f\"{STATION_NAME}_forecast.csv\")\n",
     "forecast_dataset.head(2)"
    ]
@@ -517,109 +526,40 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Preprocess Forecast Observations\n",
-    "Below preprocessing performed on downloaded data before performing model training.\n",
-    "\n",
-    "- Temperature: The downloaded data has units \"kelvin\". It will be converted to Fahrenheit.\n",
-    "- wind_speed: Using the u-component & v-component values downloaded, the wind_speed values derived. The derived values multiplied by 2.23 to convert from m/sec to mph\n",
-    "- drop u-component & v-component"
+    "### Preprocess Forecast Observations"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 16,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# Temperature\n",
-    "# convert kelvin to celsius\n",
-    "forecast_dataset[\"temperature_forecast\"] = forecast_dataset[\"temperature_forecast\"]-273.15\n",
+    "We perform the following preprocessing in the downloaded data before training the model.\n",
     "\n",
-    "# convert celsius to Fahrenheit\n",
-    "forecast_dataset[\"temperature_forecast\"] = forecast_dataset[\"temperature_forecast\"].apply(lambda x: (x * 9/5) + 32)"
+    "- `temperature`: The downloaded data is in Kelvin. It will be converted to Fahrenheit.\n",
+    "- `wind_speed`: Using the u-component & v-component values downloaded, the `wind_speed` values are derived. The derived values are multiplied by 2.23 to convert from m/sec to mph.\n",
+    "- Drop u-component & v-component"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 17,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
temperature_forecasthumidity_forecastwind_speed_forecast
date
2020-05-30 16:00:0084.17363349.2999997.025768
2020-05-30 17:00:0082.14973154.5999986.900466
\n", - "
" - ], - "text/plain": [ - " temperature_forecast humidity_forecast \\\n", - "date \n", - "2020-05-30 16:00:00 84.173633 49.299999 \n", - "2020-05-30 17:00:00 82.149731 54.599998 \n", - "\n", - " wind_speed_forecast \n", - "date \n", - "2020-05-30 16:00:00 7.025768 \n", - "2020-05-30 17:00:00 6.900466 " - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# wind_speed\n", - "# multiplying with 2.23 to convert wind speed from m/sec to mph\n", - "forecast_dataset[\"wind_speed_forecast\"] = forecast_dataset.apply(lambda x: np.sqrt(np.square(x[\"u-component_forecast\"]) + \n", - " np.square(x[\"v-component_forecast\"]))*2.23, axis=1)\n", - "\n", - "forecast_dataset.drop(columns=[\"u-component_forecast\", \"v-component_forecast\"], inplace=True)\n", + "forecast_dataset = utils.convert_forecast_data(forecast_dataset)\n", "forecast_dataset.head(2)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also clean the input data with the following operations:\n", + "- Exclude input data outside the time_range of interest.\n", + "- Shift forecast data by number of hours\n", + "- Fill missing data with neighboring data points using pandas interpolate techniques." + ] + }, { "cell_type": "code", "execution_count": 18, @@ -698,18 +638,21 @@ "2020-07-06 01:00:00 57.220984 3.85 10.642863 " ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "input_df = utils.clean_relevant_data(\n", - " actual_df=historical_dataset, \n", - " forecast_df=forecast_dataset, \n", - " out_variables=RELEVANT_FEATURES,\n", - " freq_hours=frequency_hour,\n", - " num_of_indices=number_of_hours)\n", + "input_df = utils.clean_relevant_data_using_hrrr(\n", + " actual_df=historical_dataset,\n", + " forecast_df=forecast_dataset,\n", + " out_variables=RELEVANT_FEATURES,\n", + " freq_hours=frequency_hour,\n", + " num_of_indices=number_of_hours,\n", + " start_date=start_date,\n", + " end_date=end_date,\n", + ")\n", "input_df.head(2)" ] }, @@ -717,12 +660,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Verifying the forecast observations are relevant or not relevant" + "### Verifying if the forecast observations are relevant or not relevant" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -731,7 +674,7 @@ "" ] }, - "execution_count": 19, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" }, @@ -747,7 +690,7 @@ } ], "source": [ - "plot_df = input_df[(input_df.index.month==7) & (input_df.index.year==2020)]\n", + "plot_df = input_df[(input_df.index.month == 7) & (input_df.index.year == 2020)]\n", "\n", "plt.figure(figsize=(20, 4))\n", "plt.plot(plot_df.index.values, plot_df[\"temperature_forecast\"].values, label=\"forecast\")\n", @@ -759,7 +702,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Based on the distribution of observation in above plot, the forecast observations are relevant. In this scenario continue with model training process using relevant dataset." + "Based on the distribution of observation in above plot, the forecast observations are relevant. In this scenario, we will continue with model training using the relevant dataset." ] }, { @@ -768,7 +711,7 @@ "source": [ "### Training\n", "\n", - "The script is configured to train the Micro Climate prediction model for 24 hours and the historical weather station data has points with a 60-minute frequency. Below inputs vary based on number of hours of prediction and frequency of weather station data points.\n", + "The script is configured to train the Micro Climate prediction model for 24 hours and the historical weather station data has points with a 60-minute frequency. Below inputs vary based on the number of hours of prediction and frequency of weather station data points.\n", "\n", "1. `chunk_size` - The value of the chunk size is based on the frequency of the weather station data points. For a frequency of 60 minutes, the minimum required data points are 528. If the data frequency is 15 minutes, the minimum number of data points required is 528*4 = 2112. These are the minimum number of data points need to be provided as input during the inference.\n", "2. `ts_lookahead` - The value used during the data preprocessing. It is the value used to consider weather data points ahead for a given time period while grouping the data.\n", @@ -791,7 +734,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": { "tags": [] }, @@ -803,7 +746,8 @@ " root_path=ROOT_PATH,\n", " data_export_path=DATA_EXPORT_PATH,\n", " station_name=STATION_NAME,\n", - " relevant=True)\n", + " relevant=True,\n", + ")\n", "\n", "train_weather.train_model(input_df)" ] @@ -820,26 +764,27 @@ "metadata": {}, "source": [ "### Current\n", - "Predict weather for the next 24 hours. To predict weather for next 24 hours it is required to certain hours of historical forecast observations, the default size called chunk size of historical forecast observations is 528. Choosing start time of prediction is important, if historical observations used to train model has the start time of 12:00:00 then the historical observations used for prediction should start at the same time." + "To predict the weather for next 24 hours, we need certain hours of historical forecast observations beforehand. The default size (chunk size) of historical forecast observations is 528. Choosing a start time to predict is important, if historical observations used to train model have a starting time of 12:00:00, then the historical observations used for prediction should start at the same time." ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "weather_forecast = prediction.InferenceWeather(\n", - " root_path=ROOT_PATH,\n", - " data_export_path=DATA_EXPORT_PATH,\n", - " station_name=STATION_NAME,\n", - " predicts=OUT_FEATURES,\n", - " relevant=True)" + " root_path=ROOT_PATH,\n", + " data_export_path=DATA_EXPORT_PATH,\n", + " station_name=STATION_NAME,\n", + " predicts=OUT_FEATURES,\n", + " relevant=True,\n", + ")" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -848,55 +793,69 @@ "p_end_date = datetime(year=2022, month=6, day=3, hour=0, minute=0, second=0)\n", "\n", "time_range = (p_start_date, p_end_date)\n", - "date_column=\"date\"\n", + "date_column = \"date\"\n", "\n", - "parameters = [{\n", - " \"weather_type\": \"temperature\",\n", - " \"search_text\": \"TMP:2 m\"\n", - " },\n", - " {\n", - " \"weather_type\": \"humidity\",\n", - " \"search_text\": \"RH:2 m\"\n", - " },\n", - " {\n", - " \"weather_type\": \"u-component\",\n", - " \"search_text\": \"UGRD:10 m\"\n", - " },\n", - " {\n", - " \"weather_type\": \"v-component\",\n", - " \"search_text\": \"VGRD:10 m\"\n", - " }]" + "parameters = [\n", + " {\"weather_type\": \"temperature\", \"search_text\": \"TMP:2 m\"},\n", + " {\"weather_type\": \"humidity\", \"search_text\": \"RH:2 m\"},\n", + " {\"weather_type\": \"u-component\", \"search_text\": \"UGRD:10 m\"},\n", + " {\"weather_type\": \"v-component\", \"search_text\": \"VGRD:10 m\"},\n", + "]" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 25, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "'VibeWorkflowRun'(id='ea662424-c9c5-4d1f-8d1c-ed907f0176ea', name='forecast_temperature', workflow='data_ingestion/weather/herbie_forecast', status='done')\n", - "'VibeWorkflowRun'(id='e93b52c2-9c20-4bf2-b647-a7dc04ff4947', name='forecast_humidity', workflow='data_ingestion/weather/herbie_forecast', status='done')\n", - "'VibeWorkflowRun'(id='5cbd7199-626c-43dc-aa73-17639c97bc30', name='forecast_u-component', workflow='data_ingestion/weather/herbie_forecast', status='done')\n", - "'VibeWorkflowRun'(id='e9373c58-5730-4b03-aee4-83015ab08848', name='forecast_v-component', workflow='data_ingestion/weather/herbie_forecast', status='done')\n" - ] + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "04e890c9992c4710acdbb661b49c1e56",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
     }
    ],
    "source": [
-    "forecast_ = Forecast(\n",
-    "                workflow_name=HERBIE_DOWNLOAD_WORKFLOW,\n",
-    "                geometry=STATION_GEOMETRY,\n",
-    "                time_range=time_range,\n",
-    "                parameters=parameters,\n",
-    "                )\n",
-    "run_list = forecast_.submit_download_request()"
+    "forecast = Forecast(\n",
+    "    workflow_name=HERBIE_DOWNLOAD_WORKFLOW,\n",
+    "    geometry=STATION_GEOMETRY,\n",
+    "    time_range=time_range,\n",
+    "    parameters=parameters,\n",
+    ")\n",
+    "run_list = forecast.submit_download_request()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -974,21 +933,21 @@
        "2022-03-17 17:00:00              4.563419              1.176411  "
       ]
      },
-     "execution_count": 23,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "# transform downloaded data from utc to pst timezone\n",
-    "p_forecast_dataset = forecast_.get_downloaded_data(run_list=run_list, offset_hours=-8)\n",
+    "p_forecast_dataset = forecast.get_downloaded_data(run_list=run_list, offset_hours=-8)\n",
     "p_forecast_dataset.to_csv(f\"{STATION_NAME}_forecast.csv\")\n",
     "p_forecast_dataset.head(2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -1047,14 +1006,14 @@
        "2022-03-18 14:00:00    66.300      16.175       50.075"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "predict_file_path = f\"./data/{STATION_NAME}/prediction.csv\"\n",
-    "p_historical_dataset = utils.get_csv_data(path=predict_file_path)\n",
+    "p_historical_dataset = utils.get_csv_data(path=predict_file_path, interpolate=False, fill_na=False)\n",
     "p_historical_dataset = p_historical_dataset[HISTORICAL_MODEL_TRAIN_FEATURES]\n",
     "\n",
     "p_historical_dataset.head(2)"
@@ -1062,35 +1021,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Temperature\n",
-    "# convert kelvin to celsius\n",
-    "p_forecast_dataset[\"temperature_forecast\"] = p_forecast_dataset[\"temperature_forecast\"]-273.15\n",
-    "\n",
-    "# convert celsius to Fahrenheit\n",
-    "p_forecast_dataset[\"temperature_forecast\"] = p_forecast_dataset[\"temperature_forecast\"].apply(lambda x: (x * 9/5) + 32)"
+    "p_forecast_dataset = utils.convert_forecast_data(p_forecast_dataset)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 26,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# wind_speed\n",
-    "# multiplying with 2.23 to convert wind speed from m/sec to mph\n",
-    "p_forecast_dataset[\"wind_speed_forecast\"] = p_forecast_dataset.apply(lambda x: np.sqrt(np.square(x[\"u-component_forecast\"]) + \n",
-    "                                    np.square(x[\"v-component_forecast\"]))*2.23, axis=1)\n",
-    "\n",
-    "p_forecast_dataset.drop(columns=[\"u-component_forecast\", \"v-component_forecast\"], inplace=True)"
+    "We clean the input data with the following operations:\n",
+    "- Exclude input data outside the time range of interest.\n",
+    "- Shift forecast data by number of hours\n",
+    "- Fill missing data with neighboring data points using pandas interpolate techniques."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -1166,19 +1116,21 @@
        "2022-03-18 14:00:00             45.456384      16.175            17.855009  "
       ]
      },
-     "execution_count": 27,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "input_df = utils.clean_relevant_data(\n",
-    "                    actual_df=p_historical_dataset.copy(),\n",
-    "                    forecast_df= p_forecast_dataset.copy(),\n",
-    "                    out_variables= RELEVANT_FEATURES,\n",
-    "                    freq_hours=frequency_hour,\n",
-    "                    num_of_indices=number_of_hours\n",
-    "                )\n",
+    "input_df = utils.clean_relevant_data_using_hrrr(\n",
+    "    actual_df=p_historical_dataset.copy(),\n",
+    "    forecast_df=p_forecast_dataset.copy(),\n",
+    "    out_variables=RELEVANT_FEATURES,\n",
+    "    freq_hours=frequency_hour,\n",
+    "    num_of_indices=number_of_hours,\n",
+    "    start_date=start_date,\n",
+    "    end_date=end_date,\n",
+    ")\n",
     "\n",
     "base_data_df = input_df[RELEVANT_FEATURES]\n",
     "base_data_df.head(2)"
@@ -1186,7 +1138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1197,7 +1149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -1239,7 +1191,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -1315,45 +1267,45 @@
        "2022-03-16 17:00:00             44.783197      14.325            10.509131  "
       ]
      },
-     "execution_count": 30,
+     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "predict_file_path = f\"./data/{STATION_NAME}/training.csv\"\n",
-    "p_historical_dataset = utils.get_csv_data(path=predict_file_path)\n",
+    "p_historical_dataset = utils.get_csv_data(path=predict_file_path, interpolate=False, fill_na=False)\n",
     "p_historical_dataset = p_historical_dataset[HISTORICAL_MODEL_TRAIN_FEATURES]\n",
     "p_historical_dataset.head(5)\n",
     "\n",
     "input_df = utils.clean_relevant_data(\n",
-    "                    p_historical_dataset.copy(), \n",
-    "                    p_forecast_dataset.copy(), \n",
-    "                    RELEVANT_FEATURES,\n",
-    "                    freq_hours=frequency_hour,\n",
-    "                    num_of_indices=number_of_hours)\n",
+    "    p_historical_dataset.copy(),\n",
+    "    p_forecast_dataset.copy(),\n",
+    "    RELEVANT_FEATURES,\n",
+    "    freq_hours=frequency_hour,\n",
+    "    num_of_indices=number_of_hours,\n",
+    ")\n",
     "base_data_df = input_df[RELEVANT_FEATURES]\n",
     "base_data_df.head(2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
     "predict_start_datetime = datetime(year=2022, month=4, day=30, hour=13, minute=0, second=0)\n",
     "predict_end_datetime = datetime(year=2022, month=5, day=21, hour=13, minute=0, second=0)\n",
     "\n",
-    "df_out = weather_forecast.inference_historical(base_data_df.copy(),\n",
-    "            start_datetime=predict_start_datetime,\n",
-    "            end_datetime=predict_end_datetime\n",
-    "            )"
+    "df_out = weather_forecast.inference_historical(\n",
+    "    base_data_df.copy(), start_datetime=predict_start_datetime, end_datetime=predict_end_datetime\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -1378,20 +1330,22 @@
     }
    ],
    "source": [
-    "base_data_df = base_data_df[(base_data_df.index >= predict_start_datetime) & (base_data_df.index <= predict_end_datetime)]\n",
+    "base_data_df = base_data_df[\n",
+    "    (base_data_df.index >= predict_start_datetime) & (base_data_df.index <= predict_end_datetime)\n",
+    "]\n",
     "\n",
     "for predict in OUT_FEATURES:\n",
     "    plt.figure(figsize=(18, 6))\n",
-    "    plt.plot(df_out[\"date\"].values, utils.smooth(df_out[predict].values, 2), label=\"Predict\")\n",
+    "    plt.plot(df_out[\"date\"].values, utils.smooth(df_out[predict].values, 2), label=\"Prediction\")\n",
     "    plt.plot(base_data_df.index.values, base_data_df[predict].values, label=\"Ground Truth\")\n",
-    "    # plt.plot(base_data_df.index.values, base_data_df[predict+\"_forecast\"].values, label=\"Forecast\")\n",
+    "\n",
     "    plt.title(f\"24 Models {predict} Ground Truth Vs Predict\")\n",
     "    plt.legend()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
@@ -1411,31 +1365,28 @@
     }
    ],
    "source": [
-    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
-    "import math\n",
-    "\n",
-    "def calculate_KPI(y, yhat):\n",
-    "    print(\"RMSE: {}\".format(round(mean_squared_error(y,yhat,squared=False),2)))\n",
-    "    print(\"MAE: {}\".format(round(mean_absolute_error(y,yhat),2)))\n",
-    "    print(\"MAE%: {}%\".format(round(100*sum(abs(y-yhat))/sum(y),2)))\n",
-    "\n",
     "print(\"temperature\")\n",
-    "calculate_KPI(utils.smooth(df_out[\"temperature\"].values, 1),base_data_df[\"temperature\"].values)\n",
+    "utils.calculate_KPI(\n",
+    "    utils.smooth(list(df_out[\"temperature\"].values), 1),\n",
+    "    np.array(base_data_df[\"temperature\"].values),\n",
+    ")\n",
     "\n",
     "print(\"\\n\", \"wind_speed\")\n",
-    "calculate_KPI(utils.smooth(df_out[\"wind_speed\"].values, 1),base_data_df[\"wind_speed\"].values)"
+    "utils.calculate_KPI(\n",
+    "    utils.smooth(list(df_out[\"wind_speed\"].values), 1), np.array(base_data_df[\"wind_speed\"].values)\n",
+    ")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Training model using not relevant dataset or without forecast observations"
+    "### Training model using non-relevant dataset or without forecast observations"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1445,7 +1396,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1455,7 +1406,8 @@
     "    root_path=ROOT_PATH,\n",
     "    data_export_path=DATA_EXPORT_PATH,\n",
     "    station_name=STATION_NAME,\n",
-    "    relevant=False)\n",
+    "    relevant=False,\n",
+    ")\n",
     "\n",
     "train_weather.train_model(historical_df, start=0, epochs=1)"
    ]
@@ -1470,15 +1422,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
     "weather_forecast = prediction.InferenceWeather(\n",
-    "                        root_path=ROOT_PATH,\n",
-    "                        data_export_path=DATA_EXPORT_PATH,\n",
-    "                        station_name=STATION_NAME,\n",
-    "                        predicts=OUT_FEATURES)"
+    "    root_path=ROOT_PATH,\n",
+    "    data_export_path=DATA_EXPORT_PATH,\n",
+    "    station_name=STATION_NAME,\n",
+    "    predicts=OUT_FEATURES,\n",
+    ")"
    ]
   },
   {
@@ -1491,7 +1444,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1501,7 +1454,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1511,14 +1464,12 @@
     "\n",
     "df_output_merge = pd.DataFrame(columns=base_data_df.columns)\n",
     "\n",
-    "df_out = weather_forecast.inference(base_data_df,\n",
-    "            start_datetime=predict_start_datetime\n",
-    "            )"
+    "df_out = weather_forecast.inference(base_data_df, start_datetime=predict_start_datetime)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [
     {
@@ -1560,7 +1511,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1571,15 +1522,14 @@
     "predict_start_datetime = datetime(year=2022, month=4, day=30, hour=13, minute=0, second=0)\n",
     "predict_end_datetime = datetime(year=2022, month=5, day=21, hour=13, minute=0, second=0)\n",
     "\n",
-    "df_out = weather_forecast.inference_historical(base_data_df,\n",
-    "            start_datetime=predict_start_datetime,\n",
-    "            end_datetime=predict_end_datetime\n",
-    "            )"
+    "df_out = weather_forecast.inference_historical(\n",
+    "    base_data_df, start_datetime=predict_start_datetime, end_datetime=predict_end_datetime\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 47,
    "metadata": {},
    "outputs": [
     {
@@ -1604,12 +1554,14 @@
     }
    ],
    "source": [
-    "base_data_df = base_data_df[(base_data_df.index >= predict_start_datetime) & (base_data_df.index <= predict_end_datetime)]\n",
+    "base_data_df = base_data_df[\n",
+    "    (base_data_df.index >= predict_start_datetime) & (base_data_df.index <= predict_end_datetime)\n",
+    "]\n",
     "for predict in OUT_FEATURES:\n",
     "    plt.figure(figsize=(20, 5))\n",
     "    plt.plot(df_out[\"date\"].values, df_out[predict].values)\n",
     "    plt.plot(base_data_df.index.values, base_data_df[predict].values)\n",
-    "    plt.title(f\"24 Models {predict} Ground Truth Vs Predict\")\n",
+    "    plt.title(f\"24 Models {predict} Ground Truth Vs Prediction\")\n",
     "    plt.legend([\"Predict\", \"Ground Truth\"])"
    ]
   }
@@ -1632,7 +1584,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.8.18"
   },
   "name": "Micro climate prediction",
   "running_time": "",
diff --git a/notebooks/deepmc/notebook_lib/forecast.py b/notebooks/deepmc/notebook_lib/forecast.py
index 645b8e8d..993a6617 100644
--- a/notebooks/deepmc/notebook_lib/forecast.py
+++ b/notebooks/deepmc/notebook_lib/forecast.py
@@ -1,4 +1,3 @@
-import time
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Tuple, cast
 
@@ -8,7 +7,7 @@
 from shapely.geometry import Point
 
 from vibe_core.client import FarmvibesAiClient, get_default_vibe_client
-from vibe_core.datamodel import RunConfig, RunConfigUser, RunDetails, SpatioTemporalJson
+from vibe_core.datamodel import RunConfig, RunConfigUser, SpatioTemporalJson
 
 
 class Forecast:
@@ -31,7 +30,8 @@ def submit_download_request(self):
         """
         Submit request to worker to download forecast data
         """
-        run_list = []
+        run_metadata_list = []
+        runs = []
         for parameter in self.parameters:
             run_name = f"forecast_{parameter['weather_type']}"
             run = self.client.run(
@@ -42,57 +42,40 @@ def submit_download_request(self):
                 parameters=parameter,
             )
 
-            try:
-                run.block_until_complete(5)
-            except RuntimeError:
-                print(run)
-
-            run_list.append(
+            run_metadata_list.append(
                 {
                     "id": run.id,
                     "weather_type": parameter["weather_type"],
                 }
             )
+            runs.append(run)
+
+        self.client.monitor(runs, 5)
 
-        return run_list
+        return run_metadata_list
 
     def get_run_status(self, run_list: List[Dict[str, str]]):
         clear_output(wait=True)
-        all_done = True
-        out_ = []
+        out = []
         for run_item in run_list:
             o = self.client.describe_run(run_item["id"])
             print(f"Execution status for {run_item['weather_type']}: {o.details.status}")
 
             if o.details.status == "done":
-                out_.append(o)
-            elif o.details.status == "failed":
-                print(o.details)
+                out.append(o)
             else:
-                all_done = False
-                cnt_complete = 0
-                for key, value in o.task_details.items():
-                    value = cast(RunDetails, value)
-                    assert value.subtasks is not None, "Subtasks don't exist"
-                    for subtask in value.subtasks:
-                        if subtask.status == "done":
-                            cnt_complete += 1
-                    print(
-                        "\t",
-                        f"Subtask {key}",
-                        cnt_complete,
-                        "/",
-                        len(value.subtasks),
-                    )
-                    cnt_complete = 0
-        return all_done, out_
+                raise Exception(
+                    f"Execution status for {run_item['weather_type']}: {o.details.status}"
+                )
+
+        return out
 
     def get_all_assets(self, details: RunConfigUser):
         asset_files = []
         output = details.output["weather_forecast"]
         record: Dict[str, Any]
         for record in cast(List[Dict[str, Any]], output):
-            for _, value in record["assets"].items():
+            for value in record["assets"].values():
                 asset_files.append(value["href"])
         df_assets = [pd.read_csv(f, index_col=False) for f in asset_files]
         df_out = pd.concat(df_assets)
@@ -104,21 +87,15 @@ def get_downloaded_data(self, run_list: List[Dict[str, str]], offset_hours: int
         check the download status. If status is done, fetch the downloaded data
         """
         forecast_dataset = pd.DataFrame()
-        status = False
-        out_ = []
-        while status is False:
-            status, out_ = self.get_run_status(run_list)
-            time.sleep(10)
-
-        if status:
-            for detail in out_:
-                df = self.get_all_assets(detail)
+        out = self.get_run_status(run_list)
+        for detail in out:
+            df = self.get_all_assets(detail)
 
-                # Offset from UTC to specified timezone
-                df.index = df.index + pd.offsets.Hour(offset_hours)
+            # Offset from UTC to specified timezone
+            df.index = df.index + pd.offsets.Hour(offset_hours)
 
-                if not df.empty:
-                    forecast_dataset = pd.concat([forecast_dataset, df], axis=1)
+            if not df.empty:
+                forecast_dataset = pd.concat([forecast_dataset, df], axis=1)
 
         return forecast_dataset
 
diff --git a/notebooks/deepmc/notebook_lib/modules.py b/notebooks/deepmc/notebook_lib/modules.py
index 5fbfe012..9be52ab4 100644
--- a/notebooks/deepmc/notebook_lib/modules.py
+++ b/notebooks/deepmc/notebook_lib/modules.py
@@ -59,14 +59,14 @@ def training_step(self, train_batch: Tensor, _):
         x, y = train_batch[:6], train_batch[6]
         y_hat = self.deepmc(x)
         loss = self.loss(y_hat, y)
-        self.log("train_loss/total", loss)
+        self.log("train_loss/total", loss, on_epoch=True, prog_bar=True, logger=True, on_step=True)
         return loss
 
     def validation_step(self, validation_batch: Tensor, _):
         x, y = validation_batch[:6], validation_batch[6]
         y_hat = self.deepmc(x)
         loss = self.loss(y_hat, y)
-        self.log("val_loss/total", loss, on_epoch=True)
+        self.log("val_loss/total", loss, on_epoch=True, prog_bar=True, logger=True, on_step=True)
         return loss
 
 
diff --git a/notebooks/deepmc/notebook_lib/post_models.py b/notebooks/deepmc/notebook_lib/post_models.py
deleted file mode 100644
index 224be6fd..00000000
--- a/notebooks/deepmc/notebook_lib/post_models.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from keras.layers import BatchNormalization, Dense, Input
-from keras.models import Sequential
-from keras.utils.vis_utils import plot_model
-
-
-def simple_mixture_model(inshape: int):
-    model = Sequential()
-    model.add(Input(shape=(inshape,)))
-
-    model.add(Dense(inshape * 2, activation="relu"))
-    model.add(BatchNormalization())
-    model.add(Dense(inshape * 4, activation="relu"))
-    model.add(BatchNormalization())
-    model.add(Dense(inshape))
-
-    model.compile(loss="mae", optimizer="adam")
-    return model
-
-
-def fit_model(model, train_X, train_y, test_X, test_y, batch_size: int):
-    batch_size = batch_size
-    validation_data = (test_X, test_y)
-
-    # fit network
-    history = model.fit(
-        train_X,
-        train_y,
-        epochs=20,
-        batch_size=batch_size,
-        validation_data=validation_data,
-        verbose=1,
-    )
-
-    return model, history
diff --git a/notebooks/deepmc/notebook_lib/train.py b/notebooks/deepmc/notebook_lib/train.py
index 6a6f2242..4b9fa8b7 100644
--- a/notebooks/deepmc/notebook_lib/train.py
+++ b/notebooks/deepmc/notebook_lib/train.py
@@ -14,8 +14,8 @@
 from torch import Tensor
 from torch.utils.data import DataLoader, TensorDataset
 
-from . import utils
-from .preprocess import Preprocess
+from vibe_notebook.deepmc import utils
+from vibe_notebook.deepmc.preprocess import Preprocess
 
 MODEL_SUFFIX = "deepmc."
 
@@ -35,7 +35,7 @@ def __init__(
         wavelet: str = "bior3.5",
         mode: str = "periodic",
         level: int = 5,
-        batch_size: int = 256,
+        batch_size: int = 24,
         relevant: bool = False,
     ):
         if relevant:
@@ -67,6 +67,7 @@ def train_model(
         start: int = 0,
         end: int = -1,
         epochs: int = 20,
+        reset_preprocess: bool = False,
     ):
         end = self.total_models if end == -1 else end
 
@@ -80,12 +81,22 @@ def train_model(
             input_order_df[out_feature] = out_feature_df
 
             # data preprocessing
-            (train_scaler, output_scaler, train_df, test_df,) = utils.get_split_scaled_data(
+            (
+                train_scaler,
+                output_scaler,
+                train_df,
+                test_df,
+            ) = utils.get_split_scaled_data(
                 data=input_order_df, out_feature=out_feature, split_ratio=0.92
             )
+            if reset_preprocess and os.path.exists(
+                self.data_export_path % (out_feature, self.relevant_text)
+            ):
+                os.remove(self.data_export_path % (out_feature, self.relevant_text))
 
             if os.path.exists(self.data_export_path % (out_feature, self.relevant_text)):
-                with open(self.data_export_path % (out_feature, self.relevant_text), "rb") as f:
+                exp_path = self.data_export_path.replace("train_data.pkl", "train_data_dates.pkl")
+                with open(exp_path % (out_feature, self.relevant_text), "rb") as f:
                     (
                         train_X,
                         train_y,
@@ -93,6 +104,10 @@ def train_model(
                         test_y,
                         train_scaler,
                         output_scaler,
+                        train_dates_X,
+                        train_dates_y,
+                        test_dates_X,
+                        test_dates_y,
                     ) = pickle.load(f)
 
                 self.preprocess = Preprocess(
@@ -128,6 +143,10 @@ def train_model(
                     train_y,
                     test_X,
                     test_y,
+                    train_dates_X,
+                    train_dates_y,
+                    test_dates_X,
+                    test_dates_y,
                 ) = self.preprocess.wavelet_transform_train(train_df, test_df, out_feature)
 
                 with open(self.data_export_path % (out_feature, self.relevant_text), "wb") as f:
@@ -136,6 +155,25 @@ def train_model(
                         f,
                     )
 
+                exp_path = self.data_export_path.replace("train_data.pkl", "train_data_dates.pkl")
+
+                with open(exp_path % (out_feature, self.relevant_text), "wb") as f:
+                    pickle.dump(
+                        [
+                            train_X,
+                            train_y,
+                            test_X,
+                            test_y,
+                            train_scaler,
+                            output_scaler,
+                            train_dates_X,
+                            train_dates_y,
+                            test_dates_X,
+                            test_dates_y,
+                        ],
+                        f,
+                    )
+
             self.train_models(
                 train_X=train_X,  # type: ignore
                 train_y=train_y,  # type: ignore
@@ -145,6 +183,8 @@ def train_model(
                 out_feature=out_feature,
                 start=start,
                 end=end,
+                train_dates_y=train_dates_y,  # type: ignore
+                test_dates_y=test_dates_y,  # type: ignore
             )
 
     def train_models(
@@ -157,6 +197,8 @@ def train_models(
         out_feature: str,
         start: int,
         end: int,
+        train_dates_y: List[str],
+        test_dates_y: List[str],
     ):
         first_channels = train_X[0].shape[2]
         rest_channels = train_X[1].shape[2]
@@ -209,7 +251,6 @@ def train_models(
                         dirpath=model_path,
                     ),
                 ],
-                num_processes=1,
             )
 
             t_obj.fit(m, train_loader, val_loader)
@@ -225,6 +266,8 @@ def train_models(
                 out_feature=out_feature,
                 model_index=i,
                 epochs=epochs,
+                train_dates_y=train_dates_y,
+                test_dates_y=test_dates_y,
             )
 
     def export_to_onnx(
@@ -249,19 +292,24 @@ def export_to_onnx(
         )
 
     def get_dataloader(
-        self, gt: NDArray[Any], target: NDArray[Any], o_feature: str
+        self,
+        gt: NDArray[Any],
+        target: NDArray[Any],
+        o_feature: str,
+        dates_mapped: NDArray[Any],
     ) -> Tuple[DataLoader[Any], List[Tensor]]:
-        o_x = self.preprocess.dl_preprocess_data(pd.DataFrame(gt), o_feature)[0][:, :, 0].astype(
-            np.float32
-        )
+        dates_mapped = pd.to_datetime(dates_mapped, format="%Y-%m-%d %H:%M:%S").values
+        df = pd.DataFrame(list(zip(gt, dates_mapped)), columns=["data", "date"])
+        df.set_index("date", inplace=True)
+        o_x = self.preprocess.dl_preprocess_data(df, o_feature)[0][:, :, 0].astype(np.float32)
 
-        o_y = self.preprocess.dl_preprocess_data(pd.DataFrame(target), o_feature)[0][
-            :, :, 0
-        ].astype(np.float32)
+        df = pd.DataFrame(list(zip(target, dates_mapped)), columns=["data", "date"])
+        df.set_index("date", inplace=True)
+        o_y = self.preprocess.dl_preprocess_data(df, o_feature)[0][:, :, 0].astype(np.float32)
 
         o_inputs = [torch.from_numpy(x.astype(np.float32)) for x in (o_x, o_y)]
         o_dataset = TensorDataset(*o_inputs)
-        o_loader = DataLoader(o_dataset, batch_size=self.batch_size, shuffle=True)
+        o_loader = DataLoader(o_dataset, batch_size=self.batch_size, shuffle=True, drop_last=True)
         return o_loader, o_inputs
 
     def post_model(
@@ -274,6 +322,8 @@ def post_model(
         out_feature: str,
         model_index: int,
         epochs: int,
+        train_dates_y: List[str],
+        test_dates_y: List[str],
     ):
         m.eval()
 
@@ -288,11 +338,17 @@ def xf(a: List[NDArray[Any]]) -> List[Tensor]:
             os.mkdir(post_model_path)
 
         train_dataloader, _ = self.get_dataloader(
-            gt=train_y[:, model_index, 0], target=train_yhat, o_feature=out_feature  # type: ignore
+            gt=train_y[:, model_index, 0],  # type: ignore
+            target=train_yhat,
+            o_feature=out_feature,
+            dates_mapped=train_dates_y[:, model_index],  # type: ignore
         )
 
-        val_dataloader, val_inputs = self.get_dataloader(
-            gt=test_y[:, model_index, 0], target=test_yhat, o_feature=out_feature  # type: ignore
+        val_dataloader, _ = self.get_dataloader(
+            gt=test_y[:, model_index, 0],  # type: ignore
+            target=test_yhat,
+            o_feature=out_feature,
+            dates_mapped=test_dates_y[:, model_index],  # type: ignore
         )
 
         p_m = DeepMCPostTrain(first_in_features=self.total_models)
@@ -308,9 +364,113 @@ def xf(a: List[NDArray[Any]]) -> List[Tensor]:
                     dirpath=post_model_path,
                 ),
             ],
-            num_processes=1,
         )
 
         t_obj.fit(p_m, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)
 
         self.export_to_onnx(file_path=post_model_path, model=p_m.deepmc, inputs=torch.rand((1, 24)))
+
+    def preprocess_data(
+        self,
+        input_df: pd.DataFrame,
+        out_path: str,
+        start: int = 0,
+        end: int = -1,
+        epochs: int = 20,
+        reset_preprocess: bool = False,
+    ):
+        end = self.total_models if end == -1 else end
+
+        for out_feature in self.out_features:
+            if not os.path.exists(self.path_to_station % out_feature):
+                os.makedirs(self.path_to_station % out_feature, exist_ok=True)
+
+            input_order_df = input_df[self.train_features].copy()
+            out_feature_df = input_order_df[out_feature]
+            input_order_df.drop(columns=[out_feature], inplace=True)
+            input_order_df[out_feature] = out_feature_df
+
+            # data preprocessing
+            (
+                train_scaler,
+                output_scaler,
+                train_df,
+                test_df,
+            ) = utils.get_split_scaled_data(
+                data=input_order_df, out_feature=out_feature, split_ratio=0.92
+            )
+            if reset_preprocess and os.path.exists(
+                self.data_export_path % (out_feature, self.relevant_text)
+            ):
+                os.remove(self.data_export_path % (out_feature, self.relevant_text))
+
+            if os.path.exists(self.data_export_path % (out_feature, self.relevant_text)):
+                with open(self.data_export_path % (out_feature, self.relevant_text), "rb") as f:
+                    (
+                        train_X,
+                        train_y,
+                        test_X,
+                        test_y,
+                        train_scaler,
+                        output_scaler,
+                    ) = pickle.load(f)
+
+                self.preprocess = Preprocess(
+                    train_scaler=train_scaler,
+                    output_scaler=output_scaler,
+                    is_training=True,
+                    is_validation=self.is_validation,
+                    ts_lookahead=self.ts_lookahead,
+                    ts_lookback=self.ts_lookback,
+                    chunk_size=self.chunk_size,
+                    wavelet=self.wavelet,
+                    mode=self.mode,
+                    level=self.level,
+                    relevant=self.relevant,
+                )
+            else:
+                self.preprocess = Preprocess(
+                    train_scaler=train_scaler,
+                    output_scaler=output_scaler,
+                    is_training=True,
+                    is_validation=self.is_validation,
+                    ts_lookahead=self.ts_lookahead,
+                    ts_lookback=self.ts_lookback,
+                    chunk_size=self.chunk_size,
+                    wavelet=self.wavelet,
+                    mode=self.mode,
+                    level=self.level,
+                    relevant=self.relevant,
+                )
+
+                (
+                    train_X,
+                    train_y,
+                    test_X,
+                    test_y,
+                    train_dates,
+                    test_dates,
+                ) = self.preprocess.wavelet_transform_train(train_df, test_df, out_feature)
+
+                with open(self.data_export_path % (out_feature, self.relevant_text), "wb") as f:
+                    pickle.dump(
+                        [train_X, train_y, test_X, test_y, train_scaler, output_scaler],
+                        f,
+                    )
+
+                exp_path = self.data_export_path.replace("train_data.pkl", "train_data_dates.pkl")
+
+                with open(exp_path % (out_feature, self.relevant_text), "wb") as f:
+                    pickle.dump(
+                        [
+                            train_X,
+                            train_y,
+                            test_X,
+                            test_y,
+                            train_scaler,
+                            output_scaler,
+                            train_dates,
+                            test_dates,
+                        ],
+                        f,
+                    )
diff --git a/notebooks/deepmc/notebook_lib/transformer_models_ts.py b/notebooks/deepmc/notebook_lib/transformer_models_ts.py
deleted file mode 100644
index ba55aaca..00000000
--- a/notebooks/deepmc/notebook_lib/transformer_models_ts.py
+++ /dev/null
@@ -1,367 +0,0 @@
-import numpy as np
-import tensorflow as tf
-
-
-def get_angles(pos, i, d_model):
-    angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
-    return pos * angle_rates
-
-
-def positional_encoding(position, d_model):
-    angle_rads = get_angles(
-        np.arange(position)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model
-    )
-
-    # apply sin to even indices in the array; 2i
-    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
-
-    # apply cos to odd indices in the array; 2i+1
-    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
-
-    pos_encoding = angle_rads[np.newaxis, ...]
-
-    return tf.cast(pos_encoding, dtype=tf.float32)
-
-
-# create mask for padding, 0 --> 1 (mask)
-def create_padding_mask(seq):
-    seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
-
-    # add extra dimensions to add the padding
-    # to the attention logits.
-    return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)
-
-
-def create_look_ahead_mask(size):
-    mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
-    return mask  # (seq_len, seq_len)
-
-
-def scaled_dot_product_attention(q, k, v, mask):
-    """Calculate the attention weights.
-    q, k, v must have matching leading dimensions.
-    k, v must have matching penultimate dimension, i.e.: seq_len_k = seq_len_v.
-    The mask has different shapes depending on its type(padding or look ahead)
-    but it must be broadcastable for addition.
-
-    Args:
-    q: query shape == (..., seq_len_q, depth)
-    k: key shape == (..., seq_len_k, depth)
-    v: value shape == (..., seq_len_v, depth_v)
-    mask: Float tensor with shape broadcastable
-          to (..., seq_len_q, seq_len_k). Defaults to None.
-
-    Returns:
-    output, attention_weights
-    """
-
-    matmul_qk = tf.matmul(q, k, transpose_b=True)  # (..., seq_len_q, seq_len_k)
-
-    # scale matmul_qk
-    dk = tf.cast(tf.shape(k)[-1], tf.float32)
-    scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)
-
-    # add the mask to the scaled tensor.
-    if mask is not None:
-        scaled_attention_logits += mask * -1e9
-
-    # softmax is normalized on the last axis (seq_len_k) so that the scores
-    # add up to 1.
-    attention_weights = tf.nn.softmax(
-        scaled_attention_logits, axis=-1
-    )  # (..., seq_len_q, seq_len_k)
-
-    output = tf.matmul(attention_weights, v)  # (..., seq_len_q, depth_v)
-
-    return output, attention_weights
-
-
-def print_out(q, k, v):
-    temp_out, temp_attn = scaled_dot_product_attention(q, k, v, None)
-    print("Attention weights are:")
-    print(temp_attn)
-    print("Output is:")
-    print(temp_out)
-
-
-"""
-    - Q (query), K (key) and V (value) are split into multiple heads (num_heads)
-    - each tuple (q, k, v) are fed to scaled_dot_product_attention
-    - all attention outputs are concatenated
-"""
-
-
-class MultiHeadAttention(tf.keras.layers.Layer):
-    def __init__(self, d_model, num_heads):
-        super(MultiHeadAttention, self).__init__()
-        self.num_heads = num_heads
-        self.d_model = d_model
-
-        assert d_model % self.num_heads == 0
-
-        self.depth = d_model // self.num_heads
-
-        self.wq = tf.keras.layers.Dense(d_model)
-        self.wk = tf.keras.layers.Dense(d_model)
-        self.wv = tf.keras.layers.Dense(d_model)
-
-        self.dense = tf.keras.layers.Dense(d_model)
-
-    def split_heads(self, x, batch_size):
-        """Split the last dimension into (num_heads, depth).
-        Transpose the result such that the shape is (batch_size, num_heads, seq_len, depth)
-        """
-        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
-        return tf.transpose(x, perm=[0, 2, 1, 3])
-
-    def call(self, v, k, q, mask):
-        batch_size = tf.shape(q)[0]
-
-        q = self.wq(q)  # (batch_size, seq_len, d_model)
-        k = self.wk(k)  # (batch_size, seq_len, d_model)
-        v = self.wv(v)  # (batch_size, seq_len, d_model)
-
-        q = self.split_heads(q, batch_size)  # (batch_size, num_heads, seq_len_q, depth)
-        k = self.split_heads(k, batch_size)  # (batch_size, num_heads, seq_len_k, depth)
-        v = self.split_heads(v, batch_size)  # (batch_size, num_heads, seq_len_v, depth)
-
-        scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask)
-
-        scaled_attention = tf.transpose(
-            scaled_attention, perm=[0, 2, 1, 3]
-        )  # (batch_size, seq_len_q, num_heads, depth)
-
-        concat_attention = tf.reshape(
-            scaled_attention, (batch_size, -1, self.d_model)
-        )  # (batch_size, seq_len_q, d_model)
-
-        output = self.dense(concat_attention)  # (batch_size, seq_len_q, d_model)
-
-        return output, attention_weights
-
-
-def point_wise_feed_forward_network(d_model, dff):
-    return tf.keras.Sequential(
-        [
-            tf.keras.layers.Dense(dff, activation="relu"),  # (batch_size, seq_len, dff)
-            tf.keras.layers.Dense(d_model),  # (batch_size, seq_len, d_model)
-        ]
-    )
-
-
-class EncoderLayer(tf.keras.layers.Layer):
-    def __init__(self, d_model, num_heads, dff, rate=0.1):
-        super(EncoderLayer, self).__init__()
-
-        self.mha = MultiHeadAttention(d_model, num_heads)
-        self.ffn = point_wise_feed_forward_network(d_model, dff)
-
-        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
-        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
-
-        self.dropout1 = tf.keras.layers.Dropout(rate)
-        self.dropout2 = tf.keras.layers.Dropout(rate)
-
-    def call(self, x, training, mask):
-
-        attn_output, _ = self.mha(x, x, x, mask)  # (batch_size, input_seq_len, d_model)
-        attn_output = self.dropout1(attn_output, training=training)
-        out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)
-
-        ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
-        ffn_output = self.dropout2(ffn_output, training=training)
-        out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)
-
-        return out2
-
-
-class DecoderLayer(tf.keras.layers.Layer):
-    def __init__(self, d_model, num_heads, dff, rate=0.1):
-        super(DecoderLayer, self).__init__()
-
-        self.mha1 = MultiHeadAttention(d_model, num_heads)
-        self.mha2 = MultiHeadAttention(d_model, num_heads)
-
-        self.ffn = point_wise_feed_forward_network(d_model, dff)
-
-        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
-        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
-        self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
-
-        self.dropout1 = tf.keras.layers.Dropout(rate)
-        self.dropout2 = tf.keras.layers.Dropout(rate)
-        self.dropout3 = tf.keras.layers.Dropout(rate)
-
-    def call(self, x, enc_output, training, look_ahead_mask, padding_mask):
-        # enc_output.shape == (batch_size, input_seq_len, d_model)
-
-        attn1, attn_weights_block1 = self.mha1(
-            x, x, x, look_ahead_mask
-        )  # (batch_size, target_seq_len, d_model)
-        attn1 = self.dropout1(attn1, training=training)
-        out1 = self.layernorm1(attn1 + x)
-
-        attn2, attn_weights_block2 = self.mha2(
-            enc_output, enc_output, out1, padding_mask
-        )  # (batch_size, target_seq_len, d_model)
-        attn2 = self.dropout2(attn2, training=training)
-        out2 = self.layernorm2(attn2 + out1)  # (batch_size, target_seq_len, d_model)
-
-        ffn_output = self.ffn(out2)  # (batch_size, target_seq_len, d_model)
-        ffn_output = self.dropout3(ffn_output, training=training)
-        out3 = self.layernorm3(ffn_output + out2)  # (batch_size, target_seq_len, d_model)
-
-        return out3, attn_weights_block1, attn_weights_block2
-
-
-class Encoder(tf.keras.layers.Layer):
-    def __init__(self, num_layers, d_model, num_heads, dff, maximum_position_encoding, rate=0.1):
-        super(Encoder, self).__init__()
-
-        self.d_model = d_model
-        self.num_layers = num_layers
-
-        self.embedding = tf.keras.layers.Dense(d_model, activation="relu")
-        self.pos_encoding = positional_encoding(maximum_position_encoding, self.d_model)
-
-        self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]
-
-        self.dropout = tf.keras.layers.Dropout(rate)
-
-    def call(self, x, training, mask):
-
-        seq_len = tf.shape(x)[1]
-
-        # print("Encoder:", x.shape)
-        # adding embedding and position encoding.
-        x = self.embedding(x)  # (batch_size, input_seq_len, d_model)
-        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
-        x += self.pos_encoding[:, :seq_len, :]
-
-        x = self.dropout(x, training=training)
-
-        for i in range(self.num_layers):
-            x = self.enc_layers[i](x, training, mask)
-
-        return x  # (batch_size, input_seq_len, d_model)
-
-
-class Decoder(tf.keras.layers.Layer):
-    def __init__(self, num_layers, d_model, num_heads, dff, maximum_position_encoding, rate=0.1):
-        super(Decoder, self).__init__()
-
-        self.d_model = d_model
-        self.num_layers = num_layers
-
-        self.embedding = tf.keras.layers.Dense(d_model, activation="relu")
-        self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)
-
-        self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]
-        self.dropout = tf.keras.layers.Dropout(rate)
-
-    def call(self, x, enc_output, training, look_ahead_mask, padding_mask):
-
-        seq_len = tf.shape(x)[1]
-        attention_weights = {}
-
-        x = self.embedding(x)  # (batch_size, target_seq_len, d_model)
-        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
-        x += self.pos_encoding[:, :seq_len, :]
-
-        x = self.dropout(x, training=training)
-
-        for i in range(self.num_layers):
-            x, block1, block2 = self.dec_layers[i](
-                x, enc_output, training, look_ahead_mask, padding_mask
-            )
-            attention_weights["decoder_layer{}_block1".format(i + 1)] = block1
-            attention_weights["decoder_layer{}_block2".format(i + 1)] = block2
-
-        return x, attention_weights
-
-
-class Transformer(tf.keras.Model):
-    def __init__(
-        self, num_layers, d_model, num_heads, dff, target_vocab_size, pe_input, pe_target, rate=0.1
-    ):
-        super(Transformer, self).__init__()
-
-        self.encoder = Encoder(num_layers, d_model, num_heads, dff, pe_input, rate)
-
-        self.decoder = Decoder(num_layers, d_model, num_heads, dff, pe_target, rate)
-
-        self.final_layer = tf.keras.layers.Dense(target_vocab_size)
-
-    def call(self, inp, tar, training, enc_padding_mask, look_ahead_mask, dec_padding_mask):
-
-        enc_output = self.encoder(
-            inp, training, enc_padding_mask
-        )  # (batch_size, inp_seq_len, d_model)
-
-        # dec_output.shape == (batch_size, tar_seq_len, d_model)
-        dec_output, attention_weights = self.decoder(
-            tar, enc_output, training, look_ahead_mask, dec_padding_mask
-        )
-
-        final_output = self.final_layer(dec_output)  # (batch_size, tar_seq_len, target_vocab_size)
-
-        return final_output, attention_weights
-
-
-class GLU(tf.keras.layers.Layer):
-    def __init__(self, input_channel, output_channel):
-        super(GLU, self).__init__()
-        self.linear_left = tf.keras.layers.Dense(output_channel)
-        self.linear_right = tf.keras.layers.Dense(output_channel)
-
-    def call(self, x):
-        return tf.math.multiply(
-            self.linear_left(x), tf.keras.activations.sigmoid(self.linear_right(x))
-        )
-
-
-class FFT(tf.keras.layers.Layer):
-    def __init__(self, time_step, order, output_channel):
-        super(FFT, self).__init__()
-        self.time_step = time_step
-        self.order = order
-        self.output_channel = output_channel
-        self.GLUs = []  # nn.ModuleList()
-        for i in range(3):
-            if i == 0:
-                self.GLUs.append(
-                    GLU(self.time_step * self.order, self.time_step * self.output_channel)
-                )
-                self.GLUs.append(
-                    GLU(self.time_step * self.order, self.time_step * self.output_channel)
-                )
-            elif i == 1:
-                self.GLUs.append(
-                    GLU(self.time_step * self.output_channel, self.time_step * self.output_channel)
-                )
-                self.GLUs.append(
-                    GLU(self.time_step * self.output_channel, self.time_step * self.output_channel)
-                )
-            else:
-                self.GLUs.append(
-                    GLU(self.time_step * self.output_channel, self.time_step * self.output_channel)
-                )
-                self.GLUs.append(
-                    GLU(self.time_step * self.output_channel, self.time_step * self.output_channel)
-                )
-
-    def call(self, x):
-        # x should be (b, seq_len, units)
-        x = tf.keras.layers.Permute((2, 1))(x)
-        ffted = tf.signal.fft(tf.cast(x, dtype=tf.complex64))  # (b, units, seq_len)
-        real = tf.math.real(ffted)  # [b, units, seq_len]
-        img = tf.math.imag(ffted)
-        for i in range(3):
-            real = self.GLUs[i * 2](real)
-            img = self.GLUs[2 * i + 1](img)
-
-        time_step_as_inner = tf.dtypes.complex(real, img)
-        iffted = tf.signal.ifft(time_step_as_inner)  # [b, k, node_cnt, 48]
-        iffted = tf.cast(iffted, dtype=tf.float32)
-        iffted = tf.keras.layers.Permute((2, 1))(iffted)
-        return iffted
diff --git a/notebooks/deepmc/notebook_lib/utils.py b/notebooks/deepmc/notebook_lib/utils.py
deleted file mode 100644
index eacec1aa..00000000
--- a/notebooks/deepmc/notebook_lib/utils.py
+++ /dev/null
@@ -1,104 +0,0 @@
-from datetime import datetime, timedelta
-from typing import Any, Dict, List
-
-import numpy as np
-import pandas as pd
-from numpy._typing import NDArray
-from pandas.tseries.offsets import DateOffset
-from sklearn.preprocessing import StandardScaler
-
-
-def get_csv_data(
-    path: str,
-    date_attribute: str = "date",
-    columns_rename: Dict[str, str] = {},
-    frequency: str = "60min",
-):
-    """
-    Read data from CSV file using Pandas python package.
-    """
-
-    data_df = pd.read_csv(path)
-    data_df[date_attribute] = pd.to_datetime(data_df[date_attribute])
-
-    if columns_rename:
-        data_df.rename(columns=columns_rename, inplace=True)
-
-    # apply index on date
-    data_df.reset_index(drop=True, inplace=True)
-    data_df.set_index(date_attribute, inplace=True)
-    data_df.sort_index(ascending=True, inplace=True)
-
-    # interpolate to derive missing data
-    data_df = data_df.interpolate(method="from_derivatives")
-    assert data_df is not None, "Interpolate deleted all data"
-    data_df = data_df.dropna()
-
-    # Group rows by frequency, requires date attribute indexed to execute this
-    data_df = data_df.fillna(method="ffill")
-    data_df = data_df.fillna(method="bfill")
-    data_df = data_df.groupby(pd.Grouper(freq=frequency)).mean()
-    data_df = data_df.fillna(method="ffill")
-    data_df = data_df.fillna(method="bfill")
-
-    return data_df
-
-
-def hour_round(t: datetime):
-    # Rounds to nearest hour by adding a timedelta hour if minute >= 30
-    return t.replace(second=0, microsecond=0, minute=0, hour=t.hour) + timedelta(
-        hours=t.minute // 30
-    )
-
-
-def get_split_scaled_data(data: pd.DataFrame, out_feature: str, split_ratio: float = 0.92):
-    split = int(split_ratio * data.shape[0])
-
-    train_data = data.iloc[:split]
-    test_data = data.iloc[split:]
-
-    output_scaler = StandardScaler()
-    output_scaler.fit_transform(np.expand_dims(data[out_feature].values, axis=1))  # type: ignore
-
-    train_scaler = StandardScaler()
-    train_scale_df = pd.DataFrame(
-        train_scaler.fit_transform(train_data), columns=train_data.columns, index=train_data.index
-    )
-    test_scale_df = pd.DataFrame(
-        train_scaler.transform(test_data), columns=test_data.columns, index=test_data.index
-    )
-
-    return train_scaler, output_scaler, train_scale_df, test_scale_df
-
-
-def shift_index(ds_df: pd.DataFrame, freq_minutes: int, num_indices: int, dateColumn: str = "date"):
-    ds_df[dateColumn] = ds_df.index.shift(-num_indices, freq=DateOffset(minutes=freq_minutes))
-    ds_df = ds_df.reset_index(drop=True)
-    ds_df = ds_df.set_index(dateColumn)
-    return ds_df
-
-
-def clean_relevant_data(
-    actual_df: pd.DataFrame,
-    forecast_df: pd.DataFrame,
-    out_variables: List[str],
-    freq_hours: int,
-    num_of_indices: int,
-):
-    base_data_df = actual_df.copy()
-    current_ws_df = forecast_df.add_suffix("Current")
-    base_data_df = base_data_df.join(current_ws_df)
-    shift_forecast_df = shift_index(forecast_df, freq_hours * 60, num_of_indices)
-    base_data_df = base_data_df.join(shift_forecast_df)
-
-    base_data_df = base_data_df[out_variables]
-    base_data_df = base_data_df.interpolate(method="from_derivatives")
-    assert base_data_df is not None, "Interpolate deleted all data"
-    base_data_df = base_data_df.dropna()
-    return base_data_df
-
-
-def smooth(y: NDArray[Any], box_pts: int):
-    box = np.ones(box_pts) / box_pts
-    y_smooth = np.convolve(y, box, mode="same")
-    return y_smooth
diff --git a/notebooks/deepmc_neighbors/deepmc_neighbors_env.yaml b/notebooks/deepmc_neighbors/deepmc_neighbors_env.yaml
new file mode 100644
index 00000000..834e34c9
--- /dev/null
+++ b/notebooks/deepmc_neighbors/deepmc_neighbors_env.yaml
@@ -0,0 +1,19 @@
+name: deepmc-pytorch-neighbors
+channels:
+  - pyg
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.9.*
+  - pip~=21.2.4
+  - pip:
+    - geopandas~=0.9.0
+    - einops~=0.6.0
+    - geopy~=2.4.1
+    - ipykernel~=6.17.1
+    - unfoldNd~=0.2.0
+    - pyWavelets~=1.3.0
+    - pydantic~=1.10.12
+    - matplotlib~=3.9.0
+    - ../../src/vibe_core
+    - ../../src/vibe_notebook
\ No newline at end of file
diff --git a/notebooks/deepmc_neighbors/gnn_forecast.ipynb b/notebooks/deepmc_neighbors/gnn_forecast.ipynb
new file mode 100644
index 00000000..75834edc
--- /dev/null
+++ b/notebooks/deepmc_neighbors/gnn_forecast.ipynb
@@ -0,0 +1,643 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Micro Climate Predictions with Nearby Weather Stations\n",
+    "\n",
+    "It helps infer weather forecasts for stations that have no data or limited data by utilizing data of neighboring stations. The notebook demonstrates configuring inputs and training a model using neighboring weather stations data.\n",
+    "\n",
+    "This is an extension of the deepmc notebook [notebooks/deepmc/mc_forecast.ipynb](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/deepmc/mc_forecast.ipynb).\n",
+    "\n",
+    "Before running this notebook, let's build a micromamba environment. If you do not have micromamba installed, please follow the instructions from the [micromamba installation guide](https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html).\n",
+    "\n",
+    "```bash\n",
+    "$ micromamba env create -f ./deepmc_neighbors_env.yaml\n",
+    "$ micromamba activate deepmc-pytorch-neighbors\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Install Packages**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install torch==1.12.1 --index-url https://download.pytorch.org/whl/cpu\n",
+    "! pip install torch-scatter==2.1.0 torch-sparse==0.6.15 torch-geometric==2.3.0 -f https://data.pyg.org/whl/torch-1.12.1%2Bcpu.html\n",
+    "! pip install torch-geometric-temporal~=0.54.0 onnxruntime~=1.15.0 pytorch-lightning~=1.8.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook overview\n",
+    "In this notebook, we describe steps to generate forecast for weather variables at a specific station with no or limited data. We employ [Graphical Neural Network (GNNs)](https://pytorch-geometric.readthedocs.io/) for cross-learning from nearby weather stations by capturing spatial relationships. \n",
+    "\n",
+    "To illustrate this approach, we focus on three locations in Washington state, U.S.A., utilizing data accessible through [AGWeatherNet](https://weather.wsu.edu/). An example is shown the figure below. For instance, assuming that the Warden SW station has missing data, we look to neighboring stations (such as Royal Slope and Ringold) that provide relevant data. We consider the weather variables, temperature, humidity and wind_speed.\n",
+    "\n",
+    "\n",
+    "\n",
+    "Selecting appropriate neighboring stations is crucial for accurate predictions. When choosing neighboring weather stations, consider the following factors:\n",
+    "\n",
+    "- Elevation Similarity: In the current model the neighboring stations should be at a similar elevation to the target station. This ensures that altitude-related effects are consistent. Although, one can build a edge weight model which includes altitude differential to accommodate for the topography (this notebook does not cover that). \n",
+    "\n",
+    "- Spatial Proximity: The distance between neighboring stations should be small. Proximity often implies similar local weather patterns. For example, in the example, we chose stations with distance less than 25 km between them. In our experiments we noticed significant errors with distances greater than 25 Kms.\n",
+    "\n",
+    "**Graph Representation of Weather Stations for GNNs**\n",
+    "\n",
+    "Each weather station corresponds to a node in our graph. To capture the relationships between stations, we connect stations based on the distance between them. This graph does not change with time during inference. If a new station is available which can be helpful to increase accuracy, then the model can be dynamically updated by recomputing & retraining the GNN.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Following steps are required for training a model and inference.\n",
+    "\n",
+    "**Step 1: Download AgWeatherNet data**\n",
+    "- Download historical weather data for the stations Royal Slope and Ringold from [AGWeatherNet]( https://weather.wsu.edu/) for the time range of interest (minimum 2 years of data).\n",
+    "- Clean downloaded historical data for considered variables temperature, humidity and wind_speed. \n",
+    "\n",
+    "Note: these two steps are not included in the notebook. See [sample data](sample_data.csv) for an example. \n",
+    "\n",
+    "**Step 2: Download forecast data**\n",
+    "- Download HRRR data for the stations Warden SW, Royal Slope and Ringold using herbie_forecast workflow in Farmvibes for the time range of interest (minimum 2 years of data).\n",
+    "- Clean downloaded HRRR data for considered variables temperature, humidity and wind_speed.\n",
+    "\n",
+    "**Step 3: Train DeepMC models**\n",
+    "- For stations Royal Slope and Ringold, train the DeepMC model using the notebook [notebooks/deepmc/mc_forecast.ipynb]( https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/deepmc/mc_forecast.ipynb). You will need to train separately for each station.\n",
+    "- The results received from DeepMC inference results are weather forecasts for next 24 hours for the stations Royal Slope and Ringold.\n",
+    "\n",
+    "**Step 4: Preparation for GNN model training**\n",
+    "- Create embeddings: Concatenate cleaned HRRR weather forecast data of station Warden SW and DeepMC inference results of station Royal Slope & Ringold.\n",
+    "- Create train and test splits from the embeddings.\n",
+    "- Train GNN model.\n",
+    "\n",
+    "**Step 5: Inference**\n",
+    "\n",
+    "Run the inference to infer weather forecasts for the Warden SW station.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook Setup\n",
+    "\n",
+    "Let's start by importing the required packages and defining some constants.\n",
+    "\n",
+    "### Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import warnings\n",
+    "from datetime import datetime\n",
+    "\n",
+    "from notebook_lib.post_deepmc_inference import download_forecast_data\n",
+    "from notebook_lib.train import MC_Neighbors\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Constants\n",
+    "- ROOT_DIR - Root directory of DeepMC output.\n",
+    "- WEATHER_TYPE - temperature, humidity, or wind_speed.\n",
+    "- INFERENCE_STATION - Station having missing weather data.\n",
+    "- MODEL_TYPE - relevant or not-relevant"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ROOT_DIR = \"\"\n",
+    "WEATHER_TYPE = \"temperature\"\n",
+    "INFERENCE_STATION = \"Warden_SW\"\n",
+    "MODEL_TYPE = \"relevant\"\n",
+    "ROOT_PATH = os.path.join(ROOT_DIR, WEATHER_TYPE)\n",
+    "\n",
+    "# Forecast data\n",
+    "infer_forecast_data_path = f\"{ROOT_PATH}/{INFERENCE_STATION}/{MODEL_TYPE}/forecast.csv\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1: Download stations data.  \n",
+    "Here, we are taking the stations from [AGWeatherNet](https://weather.wsu.edu/). \n",
+    "\n",
+    "We are assuming that the station Warden_SW does not have the weather station data. We consider the stations Royal Slope and Ringold as neighboring weather stations having similar weather patterns, hence historical data download is required for these two stations. See [sample data](sample_data.csv) for an example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Neighboring stations\n",
+    "# Coordinates are in (longitude, latitude)\n",
+    "neighbor_stations = [\n",
+    "    {\n",
+    "        \"name\": \"Warden_SW\",\n",
+    "        \"column_name\": \"temperature_forecast\",\n",
+    "        \"coordinates\": (-119.12, 46.93),\n",
+    "    },\n",
+    "    {\n",
+    "        \"name\": \"royal_slope\",\n",
+    "        \"column_name\": \"temperature\",\n",
+    "        \"coordinates\": (-119.32, 46.95),\n",
+    "    },\n",
+    "    {\n",
+    "        \"name\": \"ringold\",\n",
+    "        \"column_name\": \"temperature\",\n",
+    "        \"coordinates\": (-119.18, 46.48),\n",
+    "    },\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2: Download Forecast data\n",
+    "\n",
+    "For weather station Warden SW, download weather forecast observations by submitting request to worker running in background. Workflow execution utilize below parameters while processing requests, this can be overwritten using the parameter argument.\n",
+    "\n",
+    "- fxx: [1, 25, 1] # start, stop, step\n",
+    "- search_text: \"TMP:2 m\"\n",
+    "- interval: 60 # in minutes\n",
+    "- weather_type: \"temperature\"\n",
+    "- multi_threads: 25"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start_date = datetime(year=2021, month=7, day=30)\n",
+    "end_date = datetime(year=2023, month=8, day=2)\n",
+    "forecast_data = download_forecast_data([neighbor_stations[0]], start_date, end_date)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "forecast_data[\"Warden_SW\"].to_csv(infer_forecast_data_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 3: Train DeepMC models\n",
+    "\n",
+    "Complete the DeepMC model training using the notebook [notebooks/deepmc/mc_forecast.ipynb](https://github.com/microsoft/farmvibes-ai/blob/main/notebooks/deepmc/mc_forecast.ipynb) for weather stations Royal Slope and Ringold.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 4: Train [Graphical Neural Network (GNN)](https://pytorch-geometric.readthedocs.io/) model\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 4.1 Create embeddings\n",
+    "\n",
+    "The get_embeddings module does the following: \n",
+    "1. Run inference using DeepMC trained model to find weather forecasts of temperature for station Royal Slope and Ringold weather stations.\n",
+    "2. Pre-process inference results to create a lookback by transforming it to a 2D matrix.\n",
+    "3. Pre-process HRRR weather forecast to create a lookback by transforming it to a 2D matrix.\n",
+    "4. Embeddings created by concatenating pre-process results. The embeddings are sorted by timestamp and station name."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "obj_neighbors = MC_Neighbors(root_dir=ROOT_PATH, learning_rate=0.0025, use_edge_weights=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_embeddings, test_embeddings = obj_neighbors.get_embeddings(\n",
+    "    INFERENCE_STATION,\n",
+    "    neighbor_stations,\n",
+    "    24,\n",
+    "    infer_forecast_data_path,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 4.2 Model training\n",
+    "\n",
+    "The GNN training script does the following:\n",
+    "\n",
+    "1. Creates Dataset that reads the input embeddings, creates a node for each timestamp, and creates edges connecting weather stations.\n",
+    "2. Creates BatchSampler to split data into batches for training and testing dataset.\n",
+    "3. Using PyTorch lightning package, the model training is initiated."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "obj_neighbors.run_train(\n",
+    "    train_embeddings=train_embeddings,\n",
+    "    test_embeddings=test_embeddings,\n",
+    "    neighbor_stations=neighbor_stations,\n",
+    "    infer_station=INFERENCE_STATION,\n",
+    "    epochs=20,\n",
+    "    batch_size=24 * len(neighbor_stations),\n",
+    "    forecast_hours=24,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Run Inference to validate the trained model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_df = obj_neighbors.run_inference(\n",
+    "    embeddings=test_embeddings.copy(),\n",
+    "    neighbors_station=neighbor_stations,\n",
+    "    infer_station=INFERENCE_STATION,\n",
+    "    batch_size=len(neighbor_stations),\n",
+    "    forecast_hours=24,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- historical_data_path: it's a path to historical weather data downloaded and cleaned in Step 1.\n",
+    "- hrrr_data_path: it's a path to hrr weather data downloaded and cleaned in Step 2."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "historical_data_path = \"\"\n",
+    "hrrr_data_path = \"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "obj_neighbors.view_plot(pred_df, historical_data_path, hrrr_data_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GNN temperature\n", + "RMSE: 3.98\n", + "MAE: 3.19\n", + "MAE%: 4.22%\n", + "\n", + "Hrrr temperature\n", + "RMSE: 4.64\n", + "MAE: 3.8\n", + "MAE%: 4.91%\n" + ] + } + ], + "source": [ + "obj_neighbors.view_performance(pred_df, historical_data_path, hrrr_data_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5: Inference\n", + "For weather stations used in GNN model training, we will perform the steps below to get inference results.\n", + "\n", + "**5.1 Download data**\n", + "- Download historical weather data for the stations Royal Slope and Ringold from [AGWeatherNet]( https://weather.wsu.edu/) for the time range interested in.\n", + "\n", + "Note: To perform the inference for 24 hours with 60 minutes interval, the minimum data required for 528 hours, similarly If the data frequency is 15 minutes, the minimum number of data points required is 528*4 = 2112. These are the minimum number of data points need to be provided as input during the inference.\n", + "\n", + "**5.2 Preprocessing**\n", + "- For each weather station, historical and HRRR data are concatenated by timestamp.\n", + "- Data processing is done using Wavelet Transformation techniques. \n", + "- For each weather station, using the trained DeepMC model, we run the inference to find weather forecasts.\n", + "- Embeddings are created by combining HRRR data and the predicted weather forecasts.\n", + "\n", + "**5.3 Run GNN model inference**\n", + "\n", + "Finally, we plot the results and calculate KPIs." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.1 Download data\n", + "\n", + "Download AgWeatherNet data and clean it. See [sample data](sample_data.csv)." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# weather dataset filtered and model training limited to train features.\n", + "HISTORICAL_MODEL_TRAIN_FEATURES = [\"humidity\", \"wind_speed\", \"temperature\"]\n", + "\n", + "# Historical data aligned using INDEX variable\n", + "INDEX = \"date\"\n", + "\n", + "# weather dataset filtered and model training limited to train features.\n", + "FORECAST_MODEL_TRAIN_FEATURES = [\"humidity_forecast\", \"wind_speed_forecast\", \"temperature_forecast\"]\n", + "\n", + "# Models trained to predict out features\n", + "OUT_FEATURES = [\"temperature\"] # ['wind_speed' , 'temperature']" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# Get actual observations data for a station, '%s' is a place holder for station name.\n", + "file_path = f\"/%s/prediction.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# time range interested in\n", + "start_date = datetime(year=2022, month=7, day=1, hour=0, minute=0, second=0)\n", + "end_date = datetime(year=2022, month=8, day=15, hour=0, minute=0, second=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Execution status for temperature: done\n", + "Execution status for humidity: done\n", + "Execution status for u-component: done\n", + "Execution status for v-component: done\n" + ] + } + ], + "source": [ + "forecast_data = download_forecast_data(neighbor_stations, start_date, end_date)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.2 Preprocessing\n", + "\n", + "Steps to derive embeddings:\n", + "\n", + "- We perform wavelet transformation on selected weather variables (historical and forecast data). \n", + "- The preprocessed output is used as input to run the inference using the DeepMC trained model. The inference results are weather forecasts for neighboring stations.\n", + "- The DeepMC inference results are concatenated with HRRR forecast data to create embeddings." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "obj_neighbors = MC_Neighbors(root_dir=ROOT_PATH, learning_rate=0.0025, use_edge_weights=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "embeddings = obj_neighbors.get_embeddings_inference(\n", + " INFERENCE_STATION,\n", + " neighbor_stations,\n", + " 24,\n", + " infer_forecast_data_path,\n", + " OUT_FEATURES,\n", + " file_path,\n", + " forecast_data,\n", + " start_date,\n", + " end_date,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.3 Run inference\n", + "\n", + "The inference results are weather forecast for stations that are missing station data." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "pred_df = obj_neighbors.run_inference(\n", + " embeddings=embeddings.copy(),\n", + " neighbors_station=neighbor_stations,\n", + " infer_station=INFERENCE_STATION,\n", + " batch_size=len(neighbor_stations),\n", + " forecast_hours=24,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.4 Plot results and calculate KPIs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- historical_data_path: it's a path to historical weather data downloaded and cleaned in Step 5.1.\n", + "- hrrr_data_path: it's a path to hrr weather data downloaded and cleaned in Step 5.1." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "historical_data_path = \"\"\n", + "hrrr_data_path = \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "obj_neighbors.view_plot(pred_df, historical_data_path, hrrr_data_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GNN temperature\n", + "RMSE: 3.05\n", + "MAE: 2.57\n", + "MAE%: 3.48%\n", + "\n", + "Hrrr temperature\n", + "RMSE: 3.72\n", + "MAE: 3.02\n", + "MAE%: 4.05%\n" + ] + } + ], + "source": [ + "obj_neighbors.view_performance(pred_df, historical_data_path, hrrr_data_path)" + ] + } + ], + "metadata": { + "description": "It helps to find weather forecasts for sensors that have no data by utilizing data of neighboring stations", + "disk_space": "", + "kernelspec": { + "display_name": "dev-vibes3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.19" + }, + "name": "Micro Climate Predictions using Neighbor stations", + "running_time": "", + "tags": [ + "Weather", + "Model Training" + ] + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/deepmc_neighbors/graph.svg b/notebooks/deepmc_neighbors/graph.svg new file mode 100755 index 00000000..53ae5c23 --- /dev/null +++ b/notebooks/deepmc_neighbors/graph.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/notebooks/deepmc_neighbors/notebook_lib/base_dataset.py b/notebooks/deepmc_neighbors/notebook_lib/base_dataset.py new file mode 100644 index 00000000..82488fd7 --- /dev/null +++ b/notebooks/deepmc_neighbors/notebook_lib/base_dataset.py @@ -0,0 +1,318 @@ +from math import cos, sin +from typing import Any, Dict, List, Union + +import geopy.distance +import numpy as np +import pandas as pd +import torch +import torch.utils +import torch.utils.data +from sklearn.preprocessing import StandardScaler +from torch import Tensor +from torch.utils.data import IterableDataset + + +class GNNDataset: + def __init__( + self, + data: pd.DataFrame, + scaler_input: StandardScaler, + scaler_label: StandardScaler, + neighbor_station: Dict[str, Any], + infer_station: str, + forecast_hours: int, + label_column_index: Union[int, None], + forecast_step: int = 0, + device_count: int = torch.cuda.device_count(), + ): + super().__init__() + self.data = data + self.forecast_step = forecast_step + self.device_count = device_count + self.scaler_input = scaler_input + self.scaler_label = scaler_label + self.neighbor_stations = neighbor_station + self.stations_count = len(self.neighbor_stations["stations"]) + self.infer_station = infer_station + self.forecast_hours = forecast_hours + self.label_column_index = label_column_index + self.load_nodes() + self.load_edges() + + def load_node_labels(self, data: pd.DataFrame): + if "labels" not in data.columns: + return data + + node_labels = data["labels"].to_numpy() + node_labels = node_labels.reshape(-1)[ + : int(len(data.index.get_level_values(0)) / self.node_num) * self.node_num * 1 + ] + + self.node_labels = torch.from_numpy( + node_labels.reshape( + int(len(data.index.get_level_values(0)) / self.node_num), + self.node_num, + 1, + ).astype("float32") + ) + data.drop(columns=["labels"], inplace=True) + return data + + def load_nodes(self): + data = self.node_feature_selection(self.data) + data["timestamp"] = [pd.Timestamp(a).replace(tzinfo=None) for a in data["timestamp"]] + data = data.rename(columns={"station": "Node"}) + self.node_names = data["Node"].unique().astype(str) + self.node_num = len(self.node_names) + data.set_index(["timestamp", "Node"], inplace=True) + data = self.load_node_labels(data) + data.drop(columns=["forecast_step"], inplace=True) + + # Set node variables + self.lookback_indices = list(range(self.forecast_hours)) + self.target_idx = self.forecast_step + self.timestamps = data.index.get_level_values(0).unique() + self.infer_station_index = next( + (i for i, a in enumerate(self.node_names) if a == self.infer_station), None + ) + self.node_feas = list(data.columns) + self.node_fea_dim = len(self.node_feas) + node_vals = data.values.reshape(-1)[ + : int(len(data.index.get_level_values(0)) / self.node_num) + * self.node_num + * self.node_fea_dim + ] + + self.node_data = torch.from_numpy( + node_vals.reshape( + int(len(data.index.get_level_values(0)) / self.node_num), + self.node_num, + self.node_fea_dim, + ).astype("float32") + ) + + self.timestamps = self.timestamps[: self.node_data.shape[0]] + + def get_from_to_nodes(self, neighbor_stations: Dict[str, Any]): + from_node = [] + to_node = [] + for s in neighbor_stations["stations"]: + for c in self.neighbor_stations["stations"]: + if s != c and s != self.infer_station: + from_node.append(s) + to_node.append(c) + return from_node, to_node + + def get_edges(self, neighbor_stations: Dict[str, Any]): + from_node, to_node = self.get_from_to_nodes(neighbor_stations) + + coords = neighbor_stations["long_lat"] + edges = zip(from_node, to_node) + distances = [] + turbine_dir_x = [] + turbine_dir_y = [] + + for edge in edges: + coord_1 = coords[edge[0]][::-1] + coord_2 = coords[edge[1]][::-1] + distances.append(geopy.distance.geodesic(coord_1, coord_2).km) + x1, y1 = coord_1 + x2, y2 = coord_2 + turbine_dir_x.append(cos(x1) * sin(y1 - y2)) + turbine_dir_y.append(cos(x2) * sin(x1) - sin(x2) * cos(x1) * cos(y1 - y2)) + + data = { + "from_node": from_node, + "to_node": to_node, + "distance": distances, + "dir_x": turbine_dir_x, + "dir_y": turbine_dir_y, + } + return data + + def load_edges(self): + data = self.get_edges(self.neighbor_stations) + data = pd.DataFrame(data) + data["to_node"] = data["to_node"] + data["from_node"] = data["from_node"] + data["edge"] = data.apply(lambda x: "{}->{}".format(x["from_node"], x["to_node"]), axis=1) + data.loc[:, "distance"] = 1 / data.loc[:, "distance"] + data.drop(columns=["from_node", "to_node"], inplace=True) + edge_names = sorted(data["edge"].unique()) + node2id = dict(zip(self.node_names, range(len(self.node_names)))) + edge_index = [ + [node2id[src_node], node2id[tgt_node]] + for src_node, tgt_node in [edge.split("->") for edge in edge_names] + ] + + edge_df = data[["distance", "edge"]].set_index(["edge"]) + self.edge_names = edge_names + self.edge_feas = list(edge_df.columns) + self.edge_index = torch.LongTensor(edge_index) + self.edge_num = len(self.edge_names) + + self.edge_fea_dim = len(self.edge_feas) + self.edge_data = torch.from_numpy( + edge_df.values.reshape( + self.edge_num, + self.edge_fea_dim, + ).astype("float32") + ) + + def node_feature_selection(self, df_node: pd.DataFrame): + df_node = df_node.sort_values(["timestamp", "forecast_step", "station"]) + scaled_input_array = self.scaler_input.transform( + df_node.to_numpy()[:, 0 : self.forecast_hours] + ) + df_node.iloc[:, 0 : self.forecast_hours] = scaled_input_array # type: ignore + + if self.label_column_index is not None: + scaled_label = self.scaler_label.transform( + np.expand_dims(df_node.to_numpy()[:, self.label_column_index], axis=-1) + ) + df_node.iloc[:, self.label_column_index] = scaled_label # type: ignore + return df_node + + +class BatchSampler(IterableDataset): # type: ignore + def __init__( + self, + dataset: GNNDataset, + batch_size: int, + lookahead_horizon: int, + lookback_horizon: int, + device: Union[str, torch.device], + random: bool = True, + noise_parameters: Dict[str, Any] = {}, + use_edge_weights: bool = False, + ): + self.dataset = dataset + self.batch_size = batch_size + self.device_count = dataset.device_count + self.random = random + self.lookahead_horizon = lookahead_horizon + self.lookback_horizon = lookback_horizon + self.device = device + self.noise_parameters = noise_parameters + self.use_edge_weights = use_edge_weights + self.stations_count = dataset.stations_count + + def get_forecast_indices(self): + forecast_indices = list(range(len(self.dataset.timestamps))) + if self.random: + np.random.seed() + np.random.shuffle(forecast_indices) + + return forecast_indices + + def get_batch_edge_index(self, cur_batch_size: int, num_devices: int): + edge_num = self.dataset.edge_num + if num_devices == 0: + num_devices = 1 + + batch_size_each_device = int(cur_batch_size / num_devices) + + # Reshape edge_index to [batch_size, 2, edge_num] + self.edge_index = torch.cat( + batch_size_each_device * [self.dataset.edge_index] # type: ignore + ).reshape( # type: ignore + batch_size_each_device, 2, edge_num + ) + + # Add offset to edge_index + offset = torch.arange( + 0, batch_size_each_device * self.dataset.node_num, self.dataset.node_num + ).view(-1, 1, 1) + self.edge_index = self.edge_index + offset + self.edge_index = torch.cat(num_devices * [self.edge_index]).reshape( + cur_batch_size, 2, edge_num + ) + + def get_batch_edge_data(self, cur_batch_size: int, num_devices: int): + edge_num = self.dataset.edge_num + if num_devices == 0: + num_devices = 1 + batch_size_each_device = int(cur_batch_size / num_devices) + + # Reshape edge_index to [batch_size, 2, edge_num] + self.edge_data = torch.cat(batch_size_each_device * [self.dataset.edge_data]).reshape( + batch_size_each_device, self.dataset.edge_fea_dim, edge_num + ) # batch_size, edge_in_fea_dim, num_edges + # Add offset to edge_index + offset = torch.arange( + 0, batch_size_each_device * self.dataset.node_num, self.dataset.node_num + ).view(-1, 1, 1) + self.edge_data = self.edge_data + offset # [batch_size, edge_node_dim, num_edges] + + self.edge_data = torch.cat(num_devices * [self.edge_data]).reshape( + cur_batch_size, self.dataset.edge_fea_dim, edge_num + ) + + def generate(self): + total_forecast_indices = self.get_forecast_indices() + num_batches = (len(total_forecast_indices) // (self.batch_size)) + ( + len(total_forecast_indices) % self.batch_size != 0 + ) + + for batch_id in range(num_batches): + lookback_indices = [] + batch_id_s = batch_id * self.batch_size + batch_id_e = batch_id_s + self.batch_size + forecast_indices = total_forecast_indices[batch_id_s:batch_id_e] + cur_batch_size = len(forecast_indices) + lookback_indices = forecast_indices + + # Collect meta data + forecast_timestamps = [self.dataset.timestamps[i] for i in forecast_indices] + + # Collect node-level time series + node_lookback = ( + self.dataset.node_data[lookback_indices] + .reshape(cur_batch_size, 1, self.dataset.node_num, self.dataset.node_fea_dim) + .transpose(1, 2) + .contiguous() + ) + + if self.dataset.label_column_index is not None: + # Collect node-level time series + node_lookback_labels = ( + self.dataset.node_labels[lookback_indices] + .reshape(cur_batch_size, 1, self.dataset.node_num, 1) + .transpose(1, 2) + .contiguous() + ) + else: + node_lookback_labels = None + + self.get_batch_edge_index(cur_batch_size, self.device_count) + self.get_batch_edge_data(cur_batch_size, self.device_count) + + batch = self.get_output(node_lookback, node_lookback_labels, forecast_timestamps) + + yield batch + + def get_output( + self, + node_lookback: Tensor, + node_lookback_labels: Union[Tensor, None], + forecast_timestamps: List[str], + ): + if self.use_edge_weights: + self.edge_data = torch.squeeze(self.edge_data.reshape(-1, 1)) + + self.edge_index = self.edge_index.permute(1, 0, 2).contiguous().view(2, -1) + # node_lookahead not implemented + # when we get it in the future, we will implement it + batch = {} + batch["node_data"] = node_lookback[:, :, :, :] + batch["edge_index"] = self.edge_index + batch["edge_data"] = self.edge_data + batch["forecast_timestamps"] = forecast_timestamps + + if node_lookback_labels is not None: + batch["node_labels"] = node_lookback_labels + + return list(batch.values()) + + def __iter__(self): + return iter(self.generate()) diff --git a/notebooks/deepmc_neighbors/notebook_lib/base_deepmc.py b/notebooks/deepmc_neighbors/notebook_lib/base_deepmc.py new file mode 100644 index 00000000..83129eaa --- /dev/null +++ b/notebooks/deepmc_neighbors/notebook_lib/base_deepmc.py @@ -0,0 +1,47 @@ +import os +from typing import Any, List + +import numpy as np +import onnxruntime +from numpy.typing import NDArray + +from vibe_notebook.deepmc.utils import transform_to_array + + +def inference_deepmc(model_path: str, data_x: NDArray[Any], inference_hours: int): + list_data_x = [] + for pred_idx in range(inference_hours): + model_onnx_path = os.path.join(model_path, f"model_{pred_idx}", "export.onnx") + session = onnxruntime.InferenceSession(model_onnx_path, None) + data_in = { + out.name: data_x[i].astype(np.float32) for i, out in enumerate(session.get_inputs()) + } + + result = session.run(None, input_feed=data_in)[0] + result = result.astype(np.float32) + result = transform_to_array(result, inference_hours) + result = result[..., 0] + list_data_x.append(result) + return list_data_x + + +def inference_deepmc_post( + model_path: str, + post_data_x: List[NDArray[Any]], +): + # Train Post-Processing Scaling Models + inshape = len(post_data_x) + mix_data_yhat = np.empty([post_data_x[0].shape[0], inshape, inshape]) + idx = 0 + + for pred_idx, train_yhat in enumerate(post_data_x): + post_model_onnx_path = os.path.join(model_path, f"model_{pred_idx}", "post", "export.onnx") + post_session = onnxruntime.InferenceSession(post_model_onnx_path, None) + data_in = { + out.name: train_yhat.astype(np.float32) + for i, out in enumerate(post_session.get_inputs()) + } + result = post_session.run(None, input_feed=data_in)[0] + mix_data_yhat[:, :, idx] = result + idx = idx + 1 + return mix_data_yhat diff --git a/notebooks/deepmc_neighbors/notebook_lib/base_model.py b/notebooks/deepmc_neighbors/notebook_lib/base_model.py new file mode 100644 index 00000000..78769485 --- /dev/null +++ b/notebooks/deepmc_neighbors/notebook_lib/base_model.py @@ -0,0 +1,154 @@ +from typing import Any, Dict, List, Tuple, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor +from torch.nn import Conv1d, Linear +from torch.utils.data import TensorDataset +from torch_geometric_temporal.nn.recurrent import TGCN + +from .schema import BatchTGCNInputs + + +def get_batch(batch: Union[Tensor, List[Tensor], TensorDataset], use_edge_weights: bool): + if isinstance(batch, TensorDataset): + batch = batch[:] + node_data = batch[0] + edge_index = batch[1] + # used for training + # skipped during inference + if len(batch) == 5: + node_labels = batch[4] + else: + node_labels = torch.tensor([]) + + if use_edge_weights: + edge_data = batch[2] + else: + edge_data = torch.tensor([]) + return node_data, edge_index, edge_data, node_labels + + +class BaseModule(nn.Module): + def __init__(self, problem_params: Dict[str, Any]): + super().__init__() + self.batch_size = problem_params["batch_size"] + self.lookback_horizon = problem_params["lookback_horizon"] + self.lookahead_horizon = problem_params["lookahead_horizon"] + + # node + self.num_nodes = problem_params["node_num"] + self.node_in_fea_dim = problem_params["node_in_fea_dim"] + self.node_out_fea_dim = problem_params["node_out_fea_dim"] + self.node_input_dim = self.lookback_horizon * self.node_in_fea_dim + self.node_output_dim = self.lookahead_horizon * self.node_out_fea_dim + self.use_dropout = problem_params["use_dropout"] + + # edge + self.edge_in_fea_dim = problem_params["edge_in_fea_dim"] + self.edge_out_fea_dim = problem_params["edge_out_fea_dim"] + self.edge_input_dim = self.lookback_horizon * self.edge_in_fea_dim + self.edge_output_dim = self.lookahead_horizon * self.edge_out_fea_dim + + # Add day and hour embeddings + self.day_em_dim = problem_params["day_em_dim"] + self.hour_em_dim = problem_params["hour_em_dim"] + # 7 days + self.day_em = nn.Embedding(7, self.day_em_dim) + # 24 hours + self.hour_em = nn.Embedding(24, self.hour_em_dim) + + # GRU hidden him + self.hidden_dim = problem_params["hidden_dim"] + self.dropout = nn.Dropout2d(0.01) + + # linear layer + self.linear1_node = nn.Linear(self.hidden_dim, self.node_output_dim) + self.linear2_node = nn.Linear(self.node_in_fea_dim - 1, self.lookahead_horizon) + self.ar = nn.Linear(self.lookback_horizon, self.lookahead_horizon) + + # Multi-dimensional edge attribute to one dimension + self.edge_num = problem_params["edge_num"] + self.use_edge_weights = problem_params["use_edge_weights"] + self.linear_edge = nn.Linear(self.edge_in_fea_dim, 1) + + def weights_init(self, m: Union[Conv1d, Linear]): + if isinstance(m, Conv1d) or isinstance(m, Linear): + nn.init.xavier_uniform_(m.weight.data) + if m.bias is not None: + nn.init.zeros_(m.bias.data) + + def initialize_weights(self): + pass + + def forward(self, batch: Dict[str, Any]): + pass + + +class BatchTGCN(BaseModule): + def __init__( + self, + inputs: BatchTGCNInputs, + ): + super().__init__(inputs.dict()) + self.inputs = inputs.dict() + self.decoder_in_fea_dim = 2 + self.node_in_fea_dim = self.node_in_fea_dim + + self.tgcn_cell_encoder = TGCN(self.node_in_fea_dim, self.hidden_dim) + self.tgcn_cell_encoder1 = TGCN(self.node_in_fea_dim, self.hidden_dim) + + self.tgcn_cell_decoder = TGCN(self.decoder_in_fea_dim, self.hidden_dim) + self.tgcn_cell_decoder1 = TGCN(self.decoder_in_fea_dim, self.hidden_dim) + # stopping loop reference + self.get_batch = get_batch + self.dropout_encoder1 = nn.Dropout(0.05) + + def forward(self, inputs: Union[Tensor, List[Tensor]]): + node_data, edge_index, edge_data, _ = get_batch(inputs, self.use_edge_weights) + h = torch.empty + self.edge_index = edge_index # 2, num_edges + # Process edge + self.batch_size, self.num_nodes, _, _ = node_data.shape + hh, e = self.process(node_data, edge_data) + h = F.relu_(hh) + h = self.linear1_node(h) + h = h.reshape(self.batch_size, self.num_nodes, self.lookahead_horizon) # type: ignore + hh = hh.reshape(self.batch_size, self.num_nodes, self.hidden_dim) # type: ignore + return h, e, hh + + def get_hidden_embedding( + self, + horizon: int, + x: Tensor, + edge_weights: Union[Tensor, None], + ) -> Tuple[Tensor, Union[Tensor, None]]: + for i in range(horizon): + indices_lookback = torch.tensor(self.inputs["lookback_indices"]).to(x.device) + input = torch.index_select(x[:, :, i, :], 2, indices_lookback) + input = input.reshape(self.batch_size * self.num_nodes, -1) + h = self.tgcn_cell_encoder(input, self.edge_index, edge_weights) + h = F.relu(h) + h = self.dropout_encoder1(h) + return h, edge_weights + + def process( + self, + node_data: Tensor, + edge_data: Tensor, + ) -> Tuple[Tensor, Union[Tensor, None]]: + # Add hour and day embedding + horizon = self.lookback_horizon + x = node_data + + if self.use_dropout: + x = self.dropout(x) + + edge_weights = None + if self.use_edge_weights: + edge_weights = edge_data + + self.prev_input = x[:, :, -1, :horizon] + h, e = self.get_hidden_embedding(horizon, x, edge_weights) + return h, e diff --git a/notebooks/deepmc_neighbors/notebook_lib/base_modules.py b/notebooks/deepmc_neighbors/notebook_lib/base_modules.py new file mode 100644 index 00000000..34896c29 --- /dev/null +++ b/notebooks/deepmc_neighbors/notebook_lib/base_modules.py @@ -0,0 +1,50 @@ +from typing import List, Union + +import pytorch_lightning as pl +from notebook_lib.base_model import BatchTGCN +from torch import Tensor, nn +from torch.optim import Adagrad + +from .schema import BatchTGCNInputs + + +class BatchTGCNTrain(pl.LightningModule): + def __init__( + self, + inputs: BatchTGCNInputs, + learning_rate: float = 0.001, + ): + super().__init__() + self.gnn = BatchTGCN(inputs) + self.loss = nn.MSELoss() + self.learning_rate = learning_rate + + def forward(self, batch: Union[Tensor, List[Tensor]]): + y_hat, _, _ = self.gnn(batch) + return y_hat + + def configure_optimizers(self): + optimizer = Adagrad( + self.parameters(), + lr=self.learning_rate, + initial_accumulator_value=1e-6, + eps=1e-6, + weight_decay=1e-6, + ) + return optimizer + + def training_step(self, train_batch: Union[Tensor, List[Tensor]], _): + _, _, _, node_labels = self.gnn.get_batch(train_batch, self.gnn.use_edge_weights) + y = node_labels + y_hat, _, _ = self.gnn(train_batch) + loss = self.loss(y_hat, y.reshape(y_hat.shape)) + self.log("train_loss/total", loss, on_epoch=True, prog_bar=True) + return loss + + def validation_step(self, validation_batch: Union[Tensor, List[Tensor]], _): + _, _, _, node_labels = self.gnn.get_batch(validation_batch, self.gnn.use_edge_weights) + y = node_labels + y_hat, _, _ = self.gnn(validation_batch) + loss = self.loss(y_hat, y.reshape(y_hat.shape)) + self.log("val_loss/total", loss, on_epoch=True, prog_bar=True) + return loss diff --git a/notebooks/deepmc_neighbors/notebook_lib/data_utils.py b/notebooks/deepmc_neighbors/notebook_lib/data_utils.py new file mode 100644 index 00000000..85ce7231 --- /dev/null +++ b/notebooks/deepmc_neighbors/notebook_lib/data_utils.py @@ -0,0 +1,241 @@ +import os +import pickle +from typing import Any, Dict, List, Tuple, Union + +import numpy as np +import pandas as pd +import torch +from numpy.typing import NDArray +from sklearn.preprocessing import StandardScaler +from torch import Tensor +from torch.utils.data import DataLoader, TensorDataset + +from vibe_notebook.deepmc.utils import transform_to_array + +from .base_dataset import BatchSampler, GNNDataset +from .base_modules import BatchTGCNTrain + + +def build_scaler(train_embeddings: pd.DataFrame, forecast_hours: int) -> StandardScaler: + train_data_scaler = StandardScaler() + train_data_scaler.fit(train_embeddings.to_numpy()[:, :forecast_hours]) + return train_data_scaler + + +def build_scaler_label( + train_embeddings: pd.DataFrame, labels_column: str +) -> Tuple[StandardScaler, int]: + index = -1 + for i, column in enumerate(train_embeddings.columns): + if column == labels_column: + index = i + + if index == -1: + raise ValueError(f"Labels column '{labels_column}' not found") + + train_label_scaler = StandardScaler() + train_label_scaler.fit(np.expand_dims(train_embeddings.to_numpy()[:, index], axis=-1)) + return train_label_scaler, index + + +def get_batch_sample( + train_dataset: GNNDataset, + test_dataset: GNNDataset, + batch_size: int, + lookahead_horizon: int, + lookback_horizon: int, + device: torch.device, + use_edge_weights: bool, +) -> Tuple[BatchSampler, BatchSampler]: + train_sampler = BatchSampler( + dataset=train_dataset, + batch_size=batch_size, + lookahead_horizon=lookahead_horizon, + lookback_horizon=lookback_horizon, + device=device, + random=False, + use_edge_weights=use_edge_weights, + ) + + test_sampler = BatchSampler( + dataset=test_dataset, + batch_size=batch_size, + lookahead_horizon=lookahead_horizon, + lookback_horizon=lookback_horizon, + device=device, + random=False, + use_edge_weights=use_edge_weights, + ) + + return (train_sampler, test_sampler) + + +def train_test_dataset( + train_data: pd.DataFrame, + test_data: pd.DataFrame, + step: int, + neighbors_station: Dict[str, Any], + scaler_data: StandardScaler, + scaler_label: StandardScaler, + infer_station: str, + labels_column_index: int, +) -> Tuple[GNNDataset, GNNDataset]: + train_dataset = GNNDataset( + train_data, + forecast_step=step, + scaler_input=scaler_data, + scaler_label=scaler_label, + neighbor_station=neighbors_station, + forecast_hours=24, + infer_station=infer_station, + label_column_index=labels_column_index, + ) + + test_dataset = GNNDataset( + test_data, + forecast_step=step, + scaler_input=scaler_data, + scaler_label=scaler_label, + neighbor_station=neighbors_station, + forecast_hours=24, + infer_station=infer_station, + label_column_index=labels_column_index, + ) + + return (train_dataset, test_dataset) + + +def problem_params( + dataset: GNNDataset, + batch_size: int, + lookback_horizon: int, + lookahead_horizon: int, + use_edge_weights: bool, + use_dropout: bool, + hidden_dim: int, + forecast_hours: int, +) -> Dict[str, Any]: + problem_params = { + "lookback_horizon": lookback_horizon, + "lookahead_horizon": lookahead_horizon, + "node_num": dataset.node_num, + "node_in_fea_dim": dataset.node_fea_dim, + "node_out_fea_dim": 1, + "edge_in_fea_dim": dataset.edge_fea_dim, + "edge_out_fea_dim": 1, + "edge_num": dataset.edge_num, + "use_edge_weights": use_edge_weights, + "day_em_dim": 1, + "hour_em_dim": 1, + "period": 5, # for attention model + "batch_size": batch_size, + "use_dropout": use_dropout, + "hidden_dim": hidden_dim, + "device_count": torch.cuda.device_count(), + "lookback_indices": list(range(forecast_hours)), + } + + return problem_params + + +def export_to_onnx( + file_path: str, + model: BatchTGCNTrain, + inputs: DataLoader, # type: ignore + use_edge_weights: bool, + edge_num: int, + number_of_stations: int, +): + data = next(iter(inputs)) + node_data, edge_index, edge_data, _ = get_batch(data, use_edge_weights) + data = { + "node_data": node_data[:number_of_stations], + "edge_index": edge_index[:, : (edge_num * number_of_stations)], + "edge_data": edge_data[: (edge_num * number_of_stations)], + } + keys = list(data.keys()) + batch_axes = {keys[i]: {0: "batch_size"} for i in range(len(keys))} + onnx_output_path = os.path.join(file_path, "model_output.onnx") + if os.path.exists(onnx_output_path): + os.remove(onnx_output_path) + + # Export the model + torch.onnx.export( + model, + list(data.values()), # type: ignore + onnx_output_path, + input_names=list(batch_axes.keys()), + dynamic_axes=batch_axes, + opset_version=16, + ) + + +def write_to_file(output_file: str, data: List[Any]): + with open(output_file, "wb") as f: + pickle.dump(data, f) + + +def get_file(file_path: str) -> List[Any]: + if os.path.exists(file_path): + with open(file_path, "rb") as f: + return pickle.load(f) + else: + raise Exception(f"File {file_path} not found") + + +def get_batch(batch: Union[Tensor, List[Tensor], TensorDataset], use_edge_weights: bool): + if type(batch) == TensorDataset: + batch = batch[:] + node_data = batch[0] + edge_index = batch[1] + # considered for training + # skipped during inference + if len(batch) == 5: + node_labels = batch[4] + else: + node_labels = torch.tensor([]) + + if use_edge_weights: + edge_data = batch[2] + else: + edge_data = torch.tensor([]) + return node_data, edge_index, edge_data, node_labels + + +def smooth(y: List[float], box_pts: int): + box = np.ones(box_pts) / box_pts + y_smooth = np.convolve(y, box, mode="same") + return y_smooth + + +def get_split_data(split_data: NDArray[Any], timestamps: NDArray[Any], split_at_index: int): + split_by_index = [] + for i in range(split_at_index): + data_at_index = split_data[i::split_at_index][:, i] + timestamp_at_index = timestamps[i::split_at_index] + split_by_index.append( + pd.DataFrame(zip(timestamp_at_index, data_at_index), columns=["timestamp", "label"]) + ) + + split_data_df = pd.concat(split_by_index, axis=0, ignore_index=True) + split_data_df["timestamp"] = pd.to_datetime(split_data_df["timestamp"]) + split_data_df = split_data_df.sort_values(by="timestamp") + + return np.array(split_data_df["label"].values) + + +def preprocess_transform( + mix_data_yhat: NDArray[Any], + inference_hours: int, + dates_list: NDArray[Any], +): + init_start = 0 + data_list = [] + end = mix_data_yhat.shape[0] + for i in range(init_start, end, inference_hours): + for j in range(inference_hours): + data_list.append(mix_data_yhat[i, 0, j]) + + mix_data_yhat = transform_to_array(np.array(data_list))[: mix_data_yhat.shape[0]] + dates_list = dates_list[: mix_data_yhat.shape[0]] + return mix_data_yhat, dates_list diff --git a/notebooks/deepmc_neighbors/notebook_lib/embeddings.py b/notebooks/deepmc_neighbors/notebook_lib/embeddings.py new file mode 100644 index 00000000..012f8a4a --- /dev/null +++ b/notebooks/deepmc_neighbors/notebook_lib/embeddings.py @@ -0,0 +1,235 @@ +import os +from datetime import datetime +from typing import Any, Dict, List + +import pandas as pd + +from .data_utils import get_file + + +def construct_neighbor_stations(stations: List[Dict[str, Any]]): + neighbors = {"stations": [], "coordinates": {}} + for station in stations: + neighbors["stations"].append(station["name"]) + neighbors["coordinates"][station["name"]] = station["coordinates"] + + return neighbors + + +def get_deepmc_post_results(root_path: str, stations: List[Dict[str, Any]], model_type: str): + predict_out = {} + for station in stations: + deepmc_post_path = os.path.join( + root_path, station["name"], model_type, "embeddings", "post_processed_results.pkl" + ) + ( + intermediate_test, + intermediate_train, + _, + _, + train_labels_station, + test_labels_station, + out_train_dates, + out_test_dates, + ) = get_file(deepmc_post_path) + predict_out[station["name"]] = ( + intermediate_train, + intermediate_test, + train_labels_station, + test_labels_station, + out_train_dates, + out_test_dates, + ) + + return predict_out + + +def get_date(stations: Dict[str, Any], data_index: int = -2, date_type: int = 0): + """Retrieves the start date and end date by comparing data of all stations. + :param stations: Dictionary with station name as key and values + with collection of station information used to generate embeddings. + + :param data_index: It defines position of data in array. + will use -2 for train, -1 for test, 1 for inference. + + :param date_type: 0 for start_date, -1 for end_date. + + return: date. + """ + station_name = next(iter(stations)) + station_values = stations[station_name] + date = datetime.strptime(station_values[data_index][date_type], "%Y-%m-%d %H:%M:%S") + for station_values in stations.values(): + try: + s_date = datetime.strptime(station_values[data_index][date_type], "%Y-%m-%d %H:%M:%S") + # for start date + if date_type == 0 and date < s_date: + date = s_date + # for end date + if date_type == -1 and date > s_date: + date = s_date + except Exception as e: + print(e) + return date + + +def create_embeddings( + stations: List[Dict[str, Any]], + inference_hours: int, + root_path: str, + model_type: str, +): + neighbor_stations = construct_neighbor_stations(stations) + predict_out = get_deepmc_post_results(root_path, stations, model_type) + + # get start date + train_start_date = get_date(predict_out, data_index=-2, date_type=0) + test_start_date = get_date(predict_out, data_index=-1, date_type=0) + + # get end date + train_end_date = get_date(predict_out, data_index=-2, date_type=-1) + test_end_date = get_date(predict_out, data_index=-1, date_type=-1) + + test_start_date = datetime.strptime( + test_start_date.strftime("%Y-%m-%d") + " " + train_start_date.strftime("%H:%M:%S"), + "%Y-%m-%d %H:%M:%S", + ) + + df_train_embeddings = process_embeddings( + predict_out=predict_out, + inference_hours=inference_hours, + neighbor_stations=neighbor_stations, + start_date=train_start_date, + end_date=train_end_date, + data_index=0, + label_index=2, + timestamp_index=4, + ) + + df_test_embeddings = process_embeddings( + predict_out=predict_out, + inference_hours=inference_hours, + neighbor_stations=neighbor_stations, + start_date=test_start_date, + end_date=test_end_date, + data_index=1, + label_index=3, + timestamp_index=5, + ) + + return df_train_embeddings, df_test_embeddings + + +def create_embeddings_inference( + stations: List[Dict[str, Any]], + inference_hours: int, + deepmc_post_results: Dict[str, Any], +): + neighbor_stations = construct_neighbor_stations(stations) + inference_start_date = get_date(deepmc_post_results, data_index=1, date_type=0) + inference_end_date = get_date(deepmc_post_results, data_index=1, date_type=-1) + + df_embeddings = get_inference_embeddings( + predict_out=deepmc_post_results, + inference_hours=inference_hours, + neighbor_stations=neighbor_stations, + start_date=inference_start_date, + end_date=inference_end_date, + ) + + return df_embeddings + + +def get_inference_embeddings( + predict_out: Dict[str, Any], + inference_hours: int, + neighbor_stations: Dict[str, Any], + start_date: datetime, + end_date: datetime, +): + embeddings = [] + for station in neighbor_stations["stations"]: + df = pd.DataFrame( + predict_out[station][0].reshape( + predict_out[station][0].shape[0], predict_out[station][0].shape[2] + ), + columns=list(range(inference_hours)), + ) + timestamps = predict_out[station][1] + + df["station"] = station + df["timestamp"] = timestamps + df["timestamp"] = pd.to_datetime(df["timestamp"], format="%Y-%m-%d %H:%M:%S") + + mask = (df["timestamp"] >= start_date) & (df["timestamp"] <= end_date) + df = df.loc[mask] + + df.reset_index(drop=True, inplace=True) + df["forecast_step"] = df.index + embeddings.append(df) + + df_embeddings = pd.concat(embeddings, axis=0) + df_embeddings.sort_values(by=["forecast_step", "station"], inplace=True) + return df_embeddings + + +def process_embeddings( + predict_out: Dict[str, Any], + inference_hours: int, + neighbor_stations: Dict[str, Any], + start_date: datetime, + end_date: datetime, + data_index: int, + label_index: int, + timestamp_index: int, +): + """ + Process embeddings for train or test data. + + :param predict_out: Dictionary with station name as key and values. It's output of deepmc post processing. + :param inference_hours: Number of hours to predict. + :param neighbor_stations: Dictionary with stations and coordinates. + :param start_date: Start date for embeddings. + :param end_date: End date for embeddings. + :param data_index: Index of train or test data in predict_out. The pickle file + generated by deepmc follows this index train=0, test=1 + :param label_index: Index of train or test labels in predict_out. The pickle file + generated by deepmc follows this index train=2, test=3 + :param timestamp_index: Index of train or test timestamps in predict_out. The pickle file + generated by deepmc follows this index train=4, test=5 + """ + embeddings = [] + for station in neighbor_stations["stations"]: + df = pd.DataFrame( + predict_out[station][data_index].reshape( + predict_out[station][data_index].shape[0], predict_out[station][data_index].shape[2] + ), + columns=list(range(inference_hours)), + ) + + labels = predict_out[station][label_index] + timestamps = predict_out[station][timestamp_index] + + df["station"] = station + if len(timestamps) < len(labels): + labels = labels[: len(timestamps)] + + df["labels"] = labels + + if len(timestamps) > len(labels): + timestamps = timestamps[: len(labels)] + df["timestamp"] = timestamps + + df["timestamp"] = pd.to_datetime(df["timestamp"], format="%Y-%m-%d %H:%M:%S") + + mask = (df["timestamp"] >= start_date) & (df["timestamp"] <= end_date) + df = df.loc[mask] + + df.reset_index(drop=True, inplace=True) + df["forecast_step"] = df.index + + embeddings.append(df) + + df_embeddings = pd.concat(embeddings, axis=0) + df_embeddings.sort_values(by=["forecast_step", "station"], inplace=True) + return df_embeddings diff --git a/notebooks/deepmc_neighbors/notebook_lib/post_deepmc.py b/notebooks/deepmc_neighbors/notebook_lib/post_deepmc.py new file mode 100644 index 00000000..77191ab3 --- /dev/null +++ b/notebooks/deepmc_neighbors/notebook_lib/post_deepmc.py @@ -0,0 +1,271 @@ +import os +import pickle +from typing import Any, Dict, List + +import numpy as np +import pandas as pd +from notebook_lib.base_deepmc import inference_deepmc, inference_deepmc_post +from notebook_lib.data_utils import get_file, preprocess_transform +from numpy.typing import NDArray +from sklearn.preprocessing import StandardScaler + +from vibe_notebook.deepmc.utils import get_csv_data, transform_to_array_3D + + +def write_embeddings_input( + embeddings_input_path: str, + data_scaler: StandardScaler, + mix_yhat: NDArray[Any], + mix_train_yhat: NDArray[Any], + mix_yc: NDArray[Any], + mix_train_yc: NDArray[Any], + train_y: NDArray[Any], + test_y: NDArray[Any], + train_dates_list: NDArray[Any], + test_dates_list: NDArray[Any], +): + if os.path.exists(embeddings_input_path): + os.remove(embeddings_input_path) + + p_path_dir = os.path.dirname(embeddings_input_path) + if not os.path.exists(p_path_dir): + os.makedirs(p_path_dir) + + # Inverse transform outputs, save results + with open( + embeddings_input_path, + "wb", + ) as f: + mix_yhat = np.expand_dims(np.array(data_scaler.inverse_transform(mix_yhat[:, :])), axis=1) + mix_yc = np.expand_dims(np.array(data_scaler.inverse_transform(mix_yc[:, 0, :])), axis=1) + mix_train_yhat = np.expand_dims( + np.array(data_scaler.inverse_transform(mix_train_yhat[:, :])), axis=1 + ) + mix_train_yc = np.expand_dims( + np.array(data_scaler.inverse_transform(mix_train_yc[:, 0, :])), axis=1 + ) + train_dates_list = train_dates_list[:, 0] + test_dates_list = test_dates_list[:, 0] + train_labels = np.array(data_scaler.inverse_transform(np.rollaxis(train_y, 2, 1)[:, 0, :])) + test_labels = np.array(data_scaler.inverse_transform(np.rollaxis(test_y, 2, 1)[:, 0, :])) + train_labels = train_labels[:, 0] + test_labels = test_labels[:, 0] + pickle.dump( + [ + mix_yhat, + mix_train_yhat, + mix_yc, + mix_train_yc, + train_labels, + test_labels, + train_dates_list, + test_dates_list, + ], + f, + ) + + return mix_yhat, mix_train_yhat, mix_yc, mix_train_yc, train_labels, test_labels + + +def get_date_range( + stations: List[Dict[str, Any]], infer_station_name: str, root_path: str, model_type: str +): + for station in stations: + if station["name"] != infer_station_name: + model_path = os.path.join(root_path, station["name"], model_type) + train_data_path = os.path.join(model_path, "train_data_dates.pkl") + ( + _, + _, + _, + _, + _, + _, + _, + train_dates_list, + _, + test_dates_list, + ) = get_file(train_data_path) + + return (train_dates_list, test_dates_list) + raise Exception("No station found to get date range") + + +def get_station_object(stations: List[Dict[str, Any]], infer_station_name: str): + station, column_name = None, None + for stations_dict in stations: + if stations_dict["name"] == infer_station_name: + station = stations_dict["name"] + column_name = stations_dict["column_name"] + return station, column_name + + raise Exception(f"No station found with name {infer_station_name}") + + +def dump_forecast_output( + train_df: pd.DataFrame, + test_df: pd.DataFrame, + model_path: str, + column_name: str, + train_dates_list: List[str], + test_dates_list: List[str], + inference_hours: int, +): + train_data = np.array(train_df[column_name].values) + test_data = np.array(test_df[column_name].values) + mix_train_yhat = transform_to_array_3D(train_data[:-inference_hours], inference_hours) + mix_train_y = transform_to_array_3D(train_data[inference_hours:], inference_hours) + mix_test_yhat = transform_to_array_3D(test_data[:-inference_hours], inference_hours) + mix_test_y = transform_to_array_3D(test_data[inference_hours:], inference_hours) + out_dir = os.path.join(model_path, "embeddings") + if not os.path.exists(out_dir): + os.makedirs(out_dir) + + out_path = os.path.join(out_dir, "post_processed_results.pkl") + + # Inverse transform outputs, save results + with open(out_path, "wb") as f: + train_labels = mix_train_y.squeeze() + test_labels = mix_test_y.squeeze() + train_labels = train_labels[:, 0] + test_labels = test_labels[:, 0] + + pickle.dump( + [ + mix_test_yhat, + mix_train_yhat, + mix_test_y, + mix_train_y, + train_labels, + test_labels, + train_dates_list, + test_dates_list, + ], + f, + ) + + +def embeddings_preprocess_forecast( + stations: List[Dict[str, Any]], + infer_station_name: str, + root_path: str, + input_data_path: str, + forecast_interval: int, + model_type: str, + column_name: str, +): + model_path = os.path.join(root_path, infer_station_name, model_type) + forecast_df = get_csv_data(input_data_path) + train_dates_list, test_dates_list = get_date_range( + stations, infer_station_name, root_path, model_type + ) + train_df = forecast_df[forecast_df.index.isin(train_dates_list[:, 0])] + test_df = forecast_df[forecast_df.index.isin(test_dates_list[:, 0])] + + train_dates_list = ( + train_df[forecast_interval:].index.strftime("%Y-%m-%d %H:%M:%S").tolist() # type: ignore + ) + test_dates_list = ( + test_df[forecast_interval:].index.strftime("%Y-%m-%d %H:%M:%S").tolist() # type: ignore + ) + + dump_forecast_output( + train_df, + test_df, + model_path, + column_name, + train_dates_list, + test_dates_list, + forecast_interval, + ) + + +def embeddings_preprocess_deepmc( + model_path: str, + inference_hours: int, +): + train_data_path = os.path.join(model_path, "train_data_dates.pkl") + ( + train_X, + train_y, + test_X, + test_y, + _, + output_scaler1, + _, + train_dates_list, + _, + test_dates_list, + ) = get_file(train_data_path) + + list_train_X = inference_deepmc(model_path, train_X, inference_hours) + list_test_X = inference_deepmc(model_path, test_X, inference_hours) + + # Train data deepmc inference Post-Processing + mix_train_yc = preprocess_post_deepmc_gt(list_train_X, train_y, inference_hours) + mix_train_yhat = inference_deepmc_post(model_path, list_train_X) + + # Test data deepmc inference Post-Processing + mix_yc = preprocess_post_deepmc_gt(list_test_X, test_y, inference_hours) + mix_yhat = inference_deepmc_post(model_path, list_test_X) + + mix_train_yhat, train_dates_list = preprocess_transform( + mix_train_yhat, inference_hours, train_dates_list + ) + mix_yhat, test_dates_list = preprocess_transform(mix_yhat, inference_hours, test_dates_list) + embeddings_input_path = os.path.join(model_path, "embeddings", "post_processed_results.pkl") + + # Inverse transform outputs, save results + write_embeddings_input( + embeddings_input_path, + output_scaler1, + mix_yhat, + mix_train_yhat, + mix_yc, + mix_train_yc, + train_y, + test_y, + train_dates_list, + test_dates_list, + ) + + +def preprocess_post_deepmc_gt( + post_data_x: List[NDArray[Any]], data_y: NDArray[Any], inference_hours: int +): + data_y = data_y[: data_y.shape[0] - inference_hours] + mix_data_gt = np.empty([data_y.shape[0], data_y.shape[1], len(post_data_x)]) + + idx = 0 + for _, _ in enumerate(post_data_x): + mix_data_gt[:, :, idx] = mix_data_gt[:, idx, :] + idx = idx + 1 + + return mix_data_gt + + +def initialize_embeddings_preprocessing( + infer_station_name: str, + stations: List[Dict[str, Any]], + root_path: str, + infer_forecast_data_path: str, + infer_interval: int, + model_type: str, +): + for station in stations: + model_path = os.path.join(root_path, station["name"], model_type) + if station["name"] == infer_station_name: + embeddings_preprocess_forecast( + stations, + infer_station_name, + root_path, + infer_forecast_data_path, + infer_interval, + model_type, + station["column_name"], + ) + else: + embeddings_preprocess_deepmc( + model_path, + inference_hours=24, + ) diff --git a/notebooks/deepmc_neighbors/notebook_lib/post_deepmc_inference.py b/notebooks/deepmc_neighbors/notebook_lib/post_deepmc_inference.py new file mode 100644 index 00000000..eb6065ef --- /dev/null +++ b/notebooks/deepmc_neighbors/notebook_lib/post_deepmc_inference.py @@ -0,0 +1,268 @@ +import os +from datetime import datetime +from typing import Any, Dict, List, Tuple + +import numpy as np +import pandas as pd +from notebook_lib.base_deepmc import inference_deepmc, inference_deepmc_post +from notebook_lib.data_utils import preprocess_transform +from numpy.typing import NDArray +from shapely import geometry +from sklearn.preprocessing import StandardScaler + +from vibe_notebook.deepmc import prediction, utils +from vibe_notebook.deepmc.forecast import Forecast +from vibe_notebook.deepmc.utils import get_csv_data, transform_to_array_3D + +HRRR_PARAMETERS = [ + {"weather_type": "temperature", "search_text": "TMP:2 m"}, + {"weather_type": "humidity", "search_text": "RH:2 m"}, + {"weather_type": "u-component", "search_text": "UGRD:10 m"}, + {"weather_type": "v-component", "search_text": "VGRD:10 m"}, +] + + +def get_date_range( + stations: List[Dict[str, Any]], + infer_station_name: str, + deepmc_inference_results: Dict[str, Any], +): + for station in stations: + if station["name"] != infer_station_name: + (_, dates_list, _, _) = deepmc_inference_results[station["name"]] + dates_list = np.squeeze(np.array(dates_list)[:, 0]) + dates_list = dates_list[:, 0] + return dates_list + + raise Exception("No station found to get date range") + + +def get_station_object(stations: List[Dict[str, Any]], infer_station_name: str): + station, column_name = None, None + for stations_dict in stations: + if stations_dict["name"] == infer_station_name: + station = stations_dict["name"] + column_name = stations_dict["column_name"] + return station, column_name + + if station is None: + raise Exception(f"No station found with name {infer_station_name}") + + +def embeddings_preprocess_forecast( + stations: List[Dict[str, Any]], + infer_station_name: str, + input_data_path: str, + forecast_interval: int, + deepmc_inference_results: Dict[str, Any], + column_name: str, +): + forecast_df = get_csv_data(input_data_path) + dates_list = get_date_range(stations, infer_station_name, deepmc_inference_results) + data_df = forecast_df[forecast_df.index.isin(dates_list)] + + dates_list = ( + data_df[forecast_interval:].index.strftime("%Y-%m-%d %H:%M:%S").tolist() # type: ignore + ) + + data_forecast = np.array(data_df[column_name].values) + data_forecast = transform_to_array_3D(data_forecast[:], forecast_interval) + + return data_forecast, dates_list + + +def embeddings_preprocess_deepmc( + model_path: str, + inference_hours: int, + deepmc_inference_results: Tuple[NDArray[Any], NDArray[Any], StandardScaler, StandardScaler], +): + (data_x, dates_list, _, output_scaler) = deepmc_inference_results + + deepmc_out = inference_deepmc(model_path, data_x, inference_hours) + + # Train Post-Processing Scaling Models + mix_yhat = inference_deepmc_post(model_path, deepmc_out) + mix_yhat, dates_list = preprocess_transform(mix_yhat, inference_hours, dates_list) + dates_list = np.squeeze(np.array(dates_list)[:, 0]) + dates_list = dates_list[:, 0] + dates_list = pd.to_datetime(dates_list).strftime("%Y-%m-%d %H:%M:%S") + mix_yhat = np.expand_dims(np.array(output_scaler.inverse_transform(mix_yhat[:, :])), axis=1) + return mix_yhat, dates_list + + +def inference_embeddings_preprocessing( + infer_station_name: str, + stations: List[Dict[str, Any]], + root_path: str, + infer_forecast_data_path: str, + infer_interval: int, + model_type: str, + deepmc_inference_results: Dict[str, Any], +): + process_out = {} + for station in stations: + model_path = os.path.join(root_path, station["name"], model_type) + if station["name"] == infer_station_name: + process_out[station["name"]] = embeddings_preprocess_forecast( + stations, + infer_station_name, + infer_forecast_data_path, + infer_interval, + deepmc_inference_results, + station["column_name"], + ) + else: + process_out[station["name"]] = embeddings_preprocess_deepmc( + model_path, + infer_interval, + deepmc_inference_results[station["name"]], + ) + return process_out + + +def download_forecast_data( + stations: List[Dict[str, Any]], + start_date: datetime, + end_date: datetime, +): + parameters = HRRR_PARAMETERS + hrrr_data_workflow = "data_ingestion/weather/herbie_forecast" + time_range = (start_date, end_date) + forecast_dataset = {} + + for station in stations: + # AGWeatherNet station + station_name = station["name"] + station_location = station["coordinates"] + station_geometry = geometry.Point(station_location) + + forecast_ = Forecast( + workflow_name=hrrr_data_workflow, + geometry=station_geometry, + time_range=time_range, + parameters=parameters, + ) + run_list = forecast_.submit_download_request() + + p_forecast_dataset = forecast_.get_downloaded_data(run_list=run_list, offset_hours=-8) + p_forecast_dataset = utils.convert_forecast_data(p_forecast_dataset) + forecast_dataset[station_name] = p_forecast_dataset + return forecast_dataset + + +def get_historical_data( + stations: List[Dict[str, Any]], + historical_data_path: str, + historical_dataset_features: List[str], + inference_station: str, +): + historical_datasets = {} + for station in stations: + if station["name"] != inference_station: + p = historical_data_path % station["name"] + historical_df = utils.get_csv_data(path=p, interpolate=False, fill_na=False) + historical_df = historical_df[historical_dataset_features] + + historical_datasets[station["name"]] = historical_df + + return historical_datasets + + +def concat_historical_forecast( + stations: List[Dict[str, Any]], + historical_data_path: str, + hrrr_datasets: Dict[str, pd.DataFrame], + start_date: datetime, + end_date: datetime, + inference_station: str, + historical_dataset_features: List[str] = ["humidity", "wind_speed", "temperature"], + forecast_dataset_features: List[str] = [ + "humidity_forecast", + "wind_speed_forecast", + "temperature_forecast", + ], + frequency_hour: int = 1, + number_of_hours: int = 24, + weather_inference_type: str = "temperature", +): + historical_datasets = get_historical_data( + stations, historical_data_path, historical_dataset_features, inference_station + ) + + dataset_variables = historical_dataset_features.copy() + dataset_variables.extend(forecast_dataset_features) + dataset_variables.sort() + + out_dataset = {} + for station, historical_df in historical_datasets.items(): + forecast_df = hrrr_datasets[station] + + input_df = utils.clean_relevant_data_using_hrrr( + actual_df=historical_df.copy(), + forecast_df=forecast_df.copy(), + out_variables=dataset_variables, + freq_hours=frequency_hour, + num_of_indices=number_of_hours, + start_date=start_date, + end_date=end_date, + ) + + input_df = input_df[dataset_variables] + input_df = input_df[input_df.columns] + out_feature_df = input_df[weather_inference_type] + input_df.drop(columns=[weather_inference_type], inplace=True) + input_df[weather_inference_type] = out_feature_df + out_dataset[station] = input_df + + return out_dataset + + +def run_deepmc_inference( + root_path: str, + model_type: str, + out_features: List[str], + stations: List[Dict[str, Any]], + historical_data_path: str, + hrrr_datasets: Dict[str, pd.DataFrame], + start_date: datetime, + end_date: datetime, + inference_station: str, + historical_dataset_features: List[str] = ["humidity", "wind_speed", "temperature"], + forecast_dataset_features: List[str] = [ + "humidity_forecast", + "wind_speed_forecast", + "temperature_forecast", + ], + frequency_hour: int = 1, + number_of_hours: int = 24, + weather_inference_type: str = "temperature", +): + historical_clean_dataset = concat_historical_forecast( + stations, + historical_data_path, + hrrr_datasets, + start_date, + end_date, + inference_station, + historical_dataset_features, + forecast_dataset_features, + frequency_hour, + number_of_hours, + weather_inference_type, + ) + + inference_output = {} + for station, clean_dataset in historical_clean_dataset.items(): + train_data_export_path = os.path.join(root_path, station, model_type, "train_data.pkl") + + weather_forecast = prediction.InferenceWeather( + root_path=root_path, + data_export_path=train_data_export_path, + station_name=station, + predicts=out_features, + relevant=True, + ) + + inference_output[station] = weather_forecast.deepmc_preprocess(clean_dataset, "temperature") + + return inference_output diff --git a/notebooks/deepmc_neighbors/notebook_lib/schema.py b/notebooks/deepmc_neighbors/notebook_lib/schema.py new file mode 100644 index 00000000..a9868007 --- /dev/null +++ b/notebooks/deepmc_neighbors/notebook_lib/schema.py @@ -0,0 +1,23 @@ +from typing import List + +from pydantic import BaseModel + + +class BatchTGCNInputs(BaseModel): + lookback_horizon: int + lookahead_horizon: int + node_num: int + node_in_fea_dim: int + node_out_fea_dim: int + edge_in_fea_dim: int + edge_out_fea_dim: int + edge_num: int + use_edge_weights: bool + day_em_dim: int + hour_em_dim: int + period: int + batch_size: int + use_dropout: bool + hidden_dim: int + device_count: int + lookback_indices: List[int] diff --git a/notebooks/deepmc_neighbors/notebook_lib/train.py b/notebooks/deepmc_neighbors/notebook_lib/train.py new file mode 100644 index 00000000..84b84c9a --- /dev/null +++ b/notebooks/deepmc_neighbors/notebook_lib/train.py @@ -0,0 +1,516 @@ +import os +import shutil +import warnings +from datetime import datetime +from typing import Any, Dict, List, Union + +import numpy as np +import onnxruntime +import pandas as pd +import pytorch_lightning as pl +import torch +from matplotlib import pyplot as plt +from notebook_lib.embeddings import create_embeddings, create_embeddings_inference +from notebook_lib.post_deepmc import initialize_embeddings_preprocessing +from notebook_lib.post_deepmc_inference import ( + inference_embeddings_preprocessing, + run_deepmc_inference, +) +from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint +from sklearn.preprocessing import StandardScaler +from torch.utils.data import DataLoader + +from vibe_notebook.deepmc.utils import calculate_KPI, get_csv_data + +from .base_dataset import BatchSampler, GNNDataset +from .base_modules import BatchTGCNInputs, BatchTGCNTrain +from .data_utils import ( + build_scaler, + build_scaler_label, + export_to_onnx, + get_batch, + get_batch_sample, + get_file, + get_split_data, + problem_params, + smooth, + train_test_dataset, + write_to_file, +) + + +class MC_Neighbors: + def __init__( + self, + root_dir: str, + hidden_dim: int = 528, + lookahead_horizon: int = 1, + lookback_horizon: int = 1, + learning_rate: float = 0.001, + use_dropout: bool = False, + use_edge_weights: bool = False, + device_type: str = "cpu", # cuda, cpu + labels_column: str = "labels", + weather_type: str = "temperature", + model_type: str = "relevant", + ): + """ + Initialize the MC_Neighbors. + + :param root_dir: Path to trained model and preprocessed files. + :param hidden_dim: Input dimension transforms it to linear layer. + :param lookahead_horizon: Number of hours to lookahead. + :param lookback_horizon: Number of hours to lookback. + :param learning_rate: The learning rate of the model. + :param use_dropout: True or False to use dropout layer for model training. + :param use_edge_weights: True or False. If True consider spatial distance + between stations for model training. + :param device_type: The device type of the model. + :param labels_column: The labels column of the dataset. + :param weather_type: Purpose of trained model. It can be temperature or wind_speed etc.,. + :param model_type: relevant or not-relevant. + """ + self.weather_type = weather_type + self.root_dir = root_dir + self.lookahead_horizon = lookahead_horizon + self.lookback_horizon = lookback_horizon + self.hidden_dim = hidden_dim + self.learning_rate = learning_rate + self.use_dropout = use_dropout + self.use_edge_weights = use_edge_weights + self.labels_column = labels_column + self.device = torch.device( + device_type if device_type == "cuda" and torch.cuda.is_available() else "cpu" + ) + self.model_type = model_type + + def gnn_output_dir(self, infer_station: str): + if self.use_edge_weights: + edge_weights = "edge_weights" + else: + edge_weights = "no_edge_weights" + return os.path.join( + self.root_dir, + infer_station, + self.model_type, + "gnn_models", + edge_weights, + ) + + def gnn_preprocess_file(self, infer_station: str): + output_dir = self.gnn_output_dir(infer_station) + return os.path.join(output_dir, "pre_process_data_export.json") + + def run_train( + self, + train_embeddings: pd.DataFrame, + test_embeddings: pd.DataFrame, + neighbor_stations: List[Dict[str, Any]], + infer_station: str, + epochs: int, + batch_size: int, + forecast_hours: int, + ) -> None: + self.output_dir = self.gnn_output_dir(infer_station) + stations = self.get_neighbor_stations(neighbor_stations) + scaler_data = build_scaler(train_embeddings.copy(), forecast_hours) + scaler_label, labels_column_index = build_scaler_label( + train_embeddings.copy(), self.labels_column + ) + data_export_path = self.gnn_preprocess_file(infer_station) + if not os.path.exists(data_export_path): + os.makedirs(os.path.dirname(data_export_path), exist_ok=True) + write_to_file(data_export_path, data=[scaler_data, scaler_label, labels_column_index]) + + self.initialize_train( + train_embeddings, + test_embeddings, + stations, + infer_station, + epochs, + batch_size, + forecast_hours, + scaler_data, + scaler_label, + labels_column_index, + ) + + def initialize_train( + self, + train_embeddings: pd.DataFrame, + test_embeddings: pd.DataFrame, + neighbors_station: Dict[str, Any], + infer_station: str, + epochs: int, + batch_size: int, + forecast_hours: int, + scaler_data: StandardScaler, + scaler_label: StandardScaler, + labels_column_index: int, + ): + for step in range(forecast_hours): + train_dataset, test_dataset = train_test_dataset( + train_data=train_embeddings, + test_data=test_embeddings, + step=step, + neighbors_station=neighbors_station, + scaler_data=scaler_data, + scaler_label=scaler_label, + infer_station=infer_station, + labels_column_index=labels_column_index, + ) + + train_sampler, test_sampler = get_batch_sample( + train_dataset=train_dataset, + test_dataset=test_dataset, + batch_size=batch_size, + lookahead_horizon=self.lookahead_horizon, + lookback_horizon=self.lookback_horizon, + device=self.device, + use_edge_weights=self.use_edge_weights, + ) + + inputs = BatchTGCNInputs( + **problem_params( + train_dataset, + batch_size, + self.lookback_horizon, + self.lookahead_horizon, + self.use_edge_weights, + self.use_dropout, + self.hidden_dim, + forecast_hours, + ) + ) + model = BatchTGCNTrain(inputs, self.learning_rate) + model.to(self.device) + self.train_model(model, epochs, train_sampler, test_sampler, step) + + def train_model( + self, + model: BatchTGCNTrain, + epochs: int, + train_sampler: BatchSampler, + test_sampler: BatchSampler, + forecast_step: int, + ): + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + + model_path = "{}/model_{}".format(self.output_dir, forecast_step) + + if os.path.exists(model_path): + shutil.rmtree(model_path, ignore_errors=True) + + os.makedirs(model_path, exist_ok=True) + + # batch_size is set to None to avoid batch size in dataloader + # batch_size is set when creating the sampler + train_loader = DataLoader(train_sampler, batch_size=None, collate_fn=lambda x: x) + val_loader = DataLoader(test_sampler, batch_size=None, collate_fn=lambda x: x) + + t_obj = pl.Trainer( + logger=True, + max_epochs=epochs, + callbacks=[ + LearningRateMonitor(), + ModelCheckpoint( + monitor="val_loss/total", + save_last=True, + dirpath=model_path, + ), + ], + ) + t_obj.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader) + export_to_onnx( + model_path, + model, + train_loader, + self.use_edge_weights, + train_sampler.dataset.edge_num, + train_sampler.dataset.stations_count, + ) + + def run_inference( + self, + embeddings: pd.DataFrame, + neighbors_station: List[Dict[str, Any]], + infer_station: str, + batch_size: int, + forecast_hours: int, + ): + self.output_dir = self.gnn_output_dir(infer_station) + stations = self.get_neighbor_stations(neighbors_station) + scaler_data, scaler_label, labels_column_index = get_file( + self.gnn_preprocess_file(infer_station) + ) + + pred_data = [] + for step in range(forecast_hours): + dataset, sampler = self.get_infer_inputs( + embeddings, + stations, + infer_station, + batch_size, + forecast_hours, + step, + None, + scaler_data, + scaler_label, + ) + loader = DataLoader(sampler, batch_size=None, collate_fn=lambda x: x) + for index, data in enumerate(loader): + onnx_file_path = "{}/model_{}/model_output.onnx".format(self.output_dir, step) + if data[0].shape[0] != batch_size: + warnings.warn( + f"""Data at step {step} batch index {index} is less than batch size. + It will be skipped from running inference.""" + ) + continue + if step == 0: + results = np.zeros((batch_size, forecast_hours)) + results[:, step] = self.inference(onnx_file_path, data)[ + :, dataset.infer_station_index + ].squeeze() + pred_data.append(results) + else: + pred_data[index][:, step] = self.inference(onnx_file_path, data)[ + :, dataset.infer_station_index + ].squeeze() + pred_data = np.concatenate(pred_data, axis=0) + pred_data = scaler_data.inverse_transform(pred_data) + timestamps = dataset.timestamps[: pred_data.shape[0]] + pred_data = get_split_data(pred_data, timestamps, forecast_hours) # type: ignore + pred_data_df = pd.DataFrame( + zip(pred_data, timestamps), columns=[self.weather_type, "timestamp"] + ) + return pred_data_df + + def get_historical_data(self, data_path: str): + historical_data_df = get_csv_data(data_path) + historical_data_df.reset_index(inplace=True) + historical_data_df.rename(columns={"date": "timestamp"}, inplace=True) + return historical_data_df + + def get_hrrr_data( + self, + data_path: str, + ): + df_node = pd.read_csv(data_path, parse_dates=["date"]) + df_node.rename(columns={"date": "timestamp"}, inplace=True) + return df_node + + def get_infer_inputs( + self, + embeddings: pd.DataFrame, + neighbors_station: Dict[str, Any], + infer_station: str, + batch_size: int, + forecast_hours: int, + step: int, + labels_column_index: Union[int, None], + scaler_data: StandardScaler, + scaler_label: StandardScaler, + ): + dataset = GNNDataset( + embeddings, + forecast_step=step, + scaler_input=scaler_data, + scaler_label=scaler_label, + neighbor_station=neighbors_station, + forecast_hours=forecast_hours, + infer_station=infer_station, + label_column_index=labels_column_index, + ) + + sampler = BatchSampler( + dataset=dataset, + batch_size=batch_size, + lookahead_horizon=self.lookahead_horizon, + lookback_horizon=self.lookback_horizon, + device=self.device, + random=False, + use_edge_weights=self.use_edge_weights, + ) + + return dataset, sampler + + def inference(self, onnx_file_path: str, data: torch.Tensor): + session = onnxruntime.InferenceSession(onnx_file_path, None) + node_data, edge_index, edge_data, _ = get_batch(data, self.use_edge_weights) + + inputs = { + "node_data": node_data.numpy(), + "edge_index": edge_index.numpy(), + "edge_data": edge_data.numpy(), + } + + inputs = {out.name: inputs[out.name] for i, out in enumerate(session.get_inputs())} + results = session.run(None, input_feed=inputs)[0] + return results + + def get_embeddings( + self, + inference_station: str, + neighbor_stations: List[Dict[str, Any]], + inference_hours: int, + infer_forecast_data_path: str, + ): + initialize_embeddings_preprocessing( + infer_station_name=inference_station, + stations=neighbor_stations, + root_path=self.root_dir, + infer_forecast_data_path=infer_forecast_data_path, + infer_interval=inference_hours, + model_type=self.model_type, + ) + + df_train_embeddings, df_test_embeddings = create_embeddings( + stations=neighbor_stations, + inference_hours=inference_hours, + root_path=self.root_dir, + model_type=self.model_type, + ) + + return df_train_embeddings, df_test_embeddings + + def get_neighbor_stations( + self, + neighbor_stations: List[Dict[str, Any]], + ): + stations_connection = {} + stations = [] + station_long_lat = {} + for station in neighbor_stations: + stations.append(station["name"]) + station_long_lat[station["name"]] = station["coordinates"] + + stations_connection["stations"] = stations + stations_connection["long_lat"] = station_long_lat + + return stations_connection + + def filter_data( + self, + df_inference: pd.DataFrame, + df_historical: pd.DataFrame, + df_forecast: pd.DataFrame, + ): + start_date = df_inference["timestamp"].min() + end_date = df_inference["timestamp"].max() + + df_historical = df_historical[df_historical.timestamp.between(start_date, end_date)] + df_historical = df_historical[["timestamp", self.weather_type]] + + df_inference = df_inference[df_inference.timestamp.between(start_date, end_date)] + df_inference = df_inference[["timestamp", self.weather_type]] + + df_forecast = df_forecast[df_forecast.timestamp.between(start_date, end_date)] + df_forecast.rename(columns={"temperature_forecast": self.weather_type}, inplace=True) + df_forecast = df_forecast[["timestamp", self.weather_type]] + + return df_inference, df_historical, df_forecast + + def view_plot( + self, + df_inference: pd.DataFrame, + historical_data_path: str, + hrrr_data_path: str, + ): + df_historical = self.get_historical_data(historical_data_path) + df_forecast = self.get_hrrr_data(hrrr_data_path) + + df_inference, df_historical, df_forecast = self.filter_data( + df_inference, df_historical, df_forecast + ) + + timestamps = df_inference["timestamp"] + y_hat = list(df_inference[self.weather_type].values) + y = list(df_historical[self.weather_type].values) + hrrr_data_y = list(df_forecast[self.weather_type].values) + + plt.figure(figsize=(18, 6)) + plt.plot(timestamps, smooth(y_hat, 2), label="Predict") + plt.plot(timestamps, y, label="Ground Truth") + plt.plot(timestamps, hrrr_data_y, label="HRRR", linestyle="--") + plt.title("Comparison Ground Truth Vs Inference Results Vs HRRR") + plt.legend() + + def view_performance( + self, + df_inference: pd.DataFrame, + historical_data_path: str, + hrrr_data_path: str, + ): + df_historical = self.get_historical_data(historical_data_path) + df_forecast = self.get_hrrr_data(hrrr_data_path) + + df_inference, df_historical, df_forecast = self.filter_data( + df_inference, df_historical, df_forecast + ) + + y_hat = list(df_inference[self.weather_type].values) + y = np.array(df_historical[self.weather_type].values) + hrrr_data_y = list(df_forecast[self.weather_type].values) + + print("GNN ", self.weather_type) + calculate_KPI(smooth(y_hat, 1), y) + print("") + print("Hrrr", self.weather_type) + calculate_KPI(smooth(hrrr_data_y, 1), y) + + def get_embeddings_inference( + self, + inference_station: str, + neighbor_stations: List[Dict[str, Any]], + inference_hours: int, + infer_forecast_data_path: str, + out_features: List[str], + historical_data_path: str, + hrrr_datasets: Dict[str, pd.DataFrame], + start_date: datetime, + end_date: datetime, + historical_dataset_featues: List[str] = ["humidity", "wind_speed", "temperature"], + forecast_dataset_features: List[str] = [ + "humidity_forecast", + "wind_speed_forecast", + "temperature_forecast", + ], + frequency_hour: int = 1, + number_of_hours: int = 24, + weather_inference_type: str = "temperature", + ): + deepmc_results = run_deepmc_inference( + self.root_dir, + self.model_type, + out_features, + neighbor_stations, + historical_data_path, + hrrr_datasets, + start_date, + end_date, + inference_station, + historical_dataset_featues, + forecast_dataset_features, + frequency_hour, + number_of_hours, + weather_inference_type, + ) + + deepmc_post_results = inference_embeddings_preprocessing( + infer_station_name=inference_station, + stations=neighbor_stations, + root_path=self.root_dir, + infer_forecast_data_path=infer_forecast_data_path, + infer_interval=inference_hours, + model_type=self.model_type, + deepmc_inference_results=deepmc_results, + ) + + df_embeddings = create_embeddings_inference( + stations=neighbor_stations, + inference_hours=inference_hours, + deepmc_post_results=deepmc_post_results, + ) + + return df_embeddings diff --git a/notebooks/deepmc_neighbors/sample_data.csv b/notebooks/deepmc_neighbors/sample_data.csv new file mode 100644 index 00000000..b4646fd3 --- /dev/null +++ b/notebooks/deepmc_neighbors/sample_data.csv @@ -0,0 +1,92 @@ +date,temperature,humidity,wind_speed +2021-07-25 00:15:00,79.4,33.3,8.5 +2021-07-25 00:30:00,78.4,35.3,9.4 +2021-07-25 00:45:00,78.1,34.9,9.2 +2021-07-25 01:00:00,78.0,35.0,8.9 +2021-07-25 01:15:00,77.7,35.6,9.0 +2021-07-25 01:30:00,77.8,35.8,8.9 +2021-07-25 01:45:00,77.6,35.9,8.8 +2021-07-25 02:00:00,76.8,37.5,9.1 +2021-07-25 02:15:00,75.4,39.8,9.4 +2021-07-25 02:30:00,74.5,41.6,8.0 +2021-07-25 02:45:00,74.4,41.9,7.3 +2021-07-25 03:00:00,74.6,41.5,6.0 +2021-07-25 03:15:00,73.8,43.1,5.0 +2021-07-25 03:30:00,73.4,43.7,5.9 +2021-07-25 03:45:00,73.4,42.8,6.3 +2021-07-25 04:00:00,73.5,42.2,4.9 +2021-07-25 04:15:00,72.4,44.5,5.2 +2021-07-25 04:30:00,73.9,40.8,7.1 +2021-07-25 04:45:00,74.1,40.7,7.2 +2021-07-25 05:00:00,73.9,41.6,7.2 +2021-07-25 05:15:00,73.6,42.5,7.2 +2021-07-25 05:30:00,73.7,42.6,6.9 +2021-07-25 05:45:00,74.0,42.3,6.9 +2021-07-25 06:00:00,74.2,42.5,6.3 +2021-07-25 06:15:00,74.4,42.5,7.1 +2021-07-25 06:30:00,75.1,41.1,6.9 +2021-07-25 06:45:00,76.1,39.9,5.7 +2021-07-25 07:00:00,76.9,40.2,4.6 +2021-07-25 07:15:00,76.7,44.9,3.9 +2021-07-25 07:30:00,76.8,46.7,3.1 +2021-07-25 07:45:00,77.0,45.6,3.5 +2021-07-25 08:00:00,77.7,44.3,3.4 +2021-07-25 08:15:00,78.5,44.1,3.9 +2021-07-25 08:30:00,79.0,44.0,3.9 +2021-07-25 08:45:00,79.9,42.2,3.4 +2021-07-25 09:00:00,81.0,43.7,3.8 +2021-07-25 09:15:00,81.5,44.2,5.0 +2021-07-25 09:30:00,81.8,42.4,6.3 +2021-07-25 09:45:00,82.5,42.7,6.9 +2021-07-25 10:00:00,82.8,40.9,7.3 +2021-07-25 10:15:00,83.2,38.5,7.0 +2021-07-25 10:30:00,83.7,36.1,5.8 +2021-07-25 10:45:00,84.4,35.2,5.2 +2021-07-25 11:00:00,86.0,31.6,4.8 +2021-07-25 11:15:00,86.5,29.0,5.2 +2021-07-25 11:30:00,87.6,26.1,6.2 +2021-07-25 11:45:00,87.9,26.2,6.5 +2021-07-25 12:00:00,88.0,25.9,6.0 +2021-07-25 12:15:00,88.5,27.0,5.7 +2021-07-25 12:30:00,89.2,25.6,5.0 +2021-07-25 12:45:00,89.7,24.0,4.6 +2021-07-25 13:00:00,90.4,23.0,4.7 +2021-07-25 13:15:00,91.6,21.7,5.2 +2021-07-25 13:30:00,91.5,20.5,5.8 +2021-07-25 13:45:00,91.7,21.1,5.5 +2021-07-25 14:00:00,93.4,20.4,4.9 +2021-07-25 14:15:00,94.3,18.2,4.4 +2021-07-25 14:30:00,93.4,18.4,4.7 +2021-07-25 14:45:00,94.4,17.4,4.0 +2021-07-25 15:00:00,94.6,17.8,4.3 +2021-07-25 15:15:00,93.9,19.1,5.4 +2021-07-25 15:30:00,93.4,18.9,6.5 +2021-07-25 15:45:00,93.6,18.1,5.2 +2021-07-25 16:00:00,93.8,18.3,4.7 +2021-07-25 16:15:00,93.9,17.9,4.5 +2021-07-25 16:30:00,94.0,16.4,4.7 +2021-07-25 16:45:00,94.0,16.7,4.3 +2021-07-25 17:00:00,94.0,16.9,4.2 +2021-07-25 17:15:00,94.2,16.7,3.2 +2021-07-25 17:30:00,94.3,16.8,3.7 +2021-07-25 17:45:00,93.7,18.9,3.8 +2021-07-25 18:00:00,93.7,18.3,3.2 +2021-07-25 18:15:00,93.3,20.6,3.0 +2021-07-25 18:30:00,91.7,26.2,3.5 +2021-07-25 18:45:00,90.5,26.8,2.6 +2021-07-25 19:00:00,88.4,28.1,3.0 +2021-07-25 19:15:00,85.3,30.7,4.3 +2021-07-25 19:30:00,83.4,31.0,4.7 +2021-07-25 19:45:00,82.0,33.8,4.6 +2021-07-25 20:00:00,80.7,36.4,3.6 +2021-07-25 20:15:00,78.6,39.3,4.4 +2021-07-25 20:30:00,80.6,31.3,4.7 +2021-07-25 20:45:00,79.1,37.6,4.8 +2021-07-25 21:00:00,80.4,30.2,5.7 +2021-07-25 21:15:00,82.8,24.7,6.5 +2021-07-25 21:30:00,82.2,24.7,6.8 +2021-07-25 21:45:00,81.6,25.2,6.7 +2021-07-25 22:00:00,80.8,26.2,6.9 +2021-07-25 22:15:00,80.2,27.3,6.9 +2021-07-25 22:30:00,79.8,28.0,6.9 +2021-07-25 22:45:00,79.3,28.5,6.8 \ No newline at end of file diff --git a/src/vibe_notebook/vibe_notebook/deepmc/__init__.py b/src/vibe_notebook/vibe_notebook/deepmc/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/vibe_notebook/vibe_notebook/deepmc/forecast.py b/src/vibe_notebook/vibe_notebook/deepmc/forecast.py new file mode 100644 index 00000000..993a6617 --- /dev/null +++ b/src/vibe_notebook/vibe_notebook/deepmc/forecast.py @@ -0,0 +1,164 @@ +from datetime import datetime, timedelta +from typing import Any, Dict, List, Tuple, cast + +import numpy as np +import pandas as pd +from IPython.display import clear_output +from shapely.geometry import Point + +from vibe_core.client import FarmvibesAiClient, get_default_vibe_client +from vibe_core.datamodel import RunConfig, RunConfigUser, SpatioTemporalJson + + +class Forecast: + def __init__( + self, + workflow_name: str, + geometry: Point, + time_range: Tuple[datetime, datetime], + parameters: List[Dict[str, str]], + date_column: str = "date", + ): + self.client: FarmvibesAiClient = get_default_vibe_client() + self.workflow_name = workflow_name + self.geometry = geometry + self.parameters = parameters + self.time_range = time_range + self.date_column = date_column + + def submit_download_request(self): + """ + Submit request to worker to download forecast data + """ + run_metadata_list = [] + runs = [] + for parameter in self.parameters: + run_name = f"forecast_{parameter['weather_type']}" + run = self.client.run( + workflow=self.workflow_name, + name=run_name, + geometry=self.geometry, + time_range=self.time_range, + parameters=parameter, + ) + + run_metadata_list.append( + { + "id": run.id, + "weather_type": parameter["weather_type"], + } + ) + runs.append(run) + + self.client.monitor(runs, 5) + + return run_metadata_list + + def get_run_status(self, run_list: List[Dict[str, str]]): + clear_output(wait=True) + out = [] + for run_item in run_list: + o = self.client.describe_run(run_item["id"]) + print(f"Execution status for {run_item['weather_type']}: {o.details.status}") + + if o.details.status == "done": + out.append(o) + else: + raise Exception( + f"Execution status for {run_item['weather_type']}: {o.details.status}" + ) + + return out + + def get_all_assets(self, details: RunConfigUser): + asset_files = [] + output = details.output["weather_forecast"] + record: Dict[str, Any] + for record in cast(List[Dict[str, Any]], output): + for value in record["assets"].values(): + asset_files.append(value["href"]) + df_assets = [pd.read_csv(f, index_col=False) for f in asset_files] + df_out = pd.concat(df_assets) + df_out = self.clean_forecast_data(forecast_df=df_out, run_details=details) + return df_out + + def get_downloaded_data(self, run_list: List[Dict[str, str]], offset_hours: int = 0): + """ + check the download status. If status is done, fetch the downloaded data + """ + forecast_dataset = pd.DataFrame() + out = self.get_run_status(run_list) + for detail in out: + df = self.get_all_assets(detail) + + # Offset from UTC to specified timezone + df.index = df.index + pd.offsets.Hour(offset_hours) + + if not df.empty: + forecast_dataset = pd.concat([forecast_dataset, df], axis=1) + + return forecast_dataset + + def clean_forecast_data( + self, + forecast_df: pd.DataFrame, + run_details: RunConfig, + ): + df = forecast_df[self.date_column] + assert isinstance(run_details.user_input, SpatioTemporalJson) + start_date: datetime = run_details.user_input.start_date + end_date: datetime = run_details.user_input.end_date + + # derive forecast data + forecast_df.drop(columns=[self.date_column], inplace=True) + a = forecast_df.values.tolist() + o = pd.DataFrame([a]) + o = o.T + + df_date = pd.DataFrame( + data=pd.date_range(start_date, end_date + timedelta(days=1), freq="h"), + columns=[self.date_column], + ) + + # derive hours + hours = [f"{str(i)}:00:00" for i in range(24)] + list_hours = [hours for _ in range(forecast_df.shape[0])] + + assert run_details.parameters is not None, "Parameters are not defined" + # transform forecast data with date and time + df = pd.DataFrame( + data={ + self.date_column: df.values, + "time": list_hours, + run_details.parameters["weather_type"]: o[0], + } + ) + df = df.explode(column=["time", run_details.parameters["weather_type"]]) + df[self.date_column] = df[self.date_column].astype(str) + " " + df["time"] + df[self.date_column] = pd.to_datetime(df[self.date_column].values) + + df.drop(columns=["time"], inplace=True) + df = pd.merge(df_date, df, how="left", left_on=self.date_column, right_on=self.date_column) + + df.reset_index() + df.set_index(self.date_column, inplace=True) + df.sort_index(ascending=True, inplace=True) + df[run_details.parameters["weather_type"]] = df[ + run_details.parameters["weather_type"] + ].values.astype(np.float32) + + # rename columns with suffix forecast + df.rename( + columns={ + run_details.parameters[ + "weather_type" + ]: f"{run_details.parameters['weather_type']}_forecast" + }, + inplace=True, + ) + + # interpolate to derive missing data + df = df.interpolate(method="from_derivatives") + assert df is not None, "Interpolation deleted all data" + df = df.dropna() + return df diff --git a/notebooks/deepmc/notebook_lib/prediction.py b/src/vibe_notebook/vibe_notebook/deepmc/prediction.py similarity index 90% rename from notebooks/deepmc/notebook_lib/prediction.py rename to src/vibe_notebook/vibe_notebook/deepmc/prediction.py index fad17778..eb72cd4a 100644 --- a/notebooks/deepmc/notebook_lib/prediction.py +++ b/src/vibe_notebook/vibe_notebook/deepmc/prediction.py @@ -6,9 +6,9 @@ import numpy as np import onnxruntime import pandas as pd -from numpy._typing import NDArray +from numpy.typing import NDArray -from .preprocess import Preprocess +from vibe_notebook.deepmc.preprocess import Preprocess MODEL_SUFFIX = "deepmc." @@ -167,7 +167,7 @@ def run_individual_predict( relevant=self.relevant, ) - test_X = preprocess.wavelet_transform_predict(df_in=df_in, predict=predict) + test_X, _, _ = preprocess.wavelet_transform_predict(df_in=df_in, predict=predict) time_arr = [] post_yhat = np.empty([1, self.ts_lookahead, self.ts_lookahead]) for idx in range(0, self.total_models): @@ -251,7 +251,7 @@ def run_individual_predict_historical( ) inshape = self.total_models - test_X = preprocess.wavelet_transform_predict(df_in=df_in, predict=predict) + test_X, _, _ = preprocess.wavelet_transform_predict(df_in=df_in, predict=predict) post_yhat = np.empty([test_X[0].shape[0] + 1 - inshape, inshape, self.total_models]) for idx in range(0, self.total_models): out_x = self.predict(path=self.onnx_file, predict=predict, model_idx=idx, inputs=test_X) @@ -279,3 +279,24 @@ def run_individual_predict_historical( yhat_final = output_scaler.inverse_transform(np.expand_dims(yhat_final, axis=1))[:, 0] df_predict = pd.DataFrame(data=list(zip(df_out, yhat_final)), columns=["date", predict]) return df_predict + + def deepmc_preprocess(self, df_in: pd.DataFrame, predict: str): + with open(self.data_export_path, "rb") as f: + train_scaler, output_scaler = pickle.load(f)[4:6] + + preprocess = Preprocess( + train_scaler=train_scaler, + output_scaler=output_scaler, + is_training=False, + ts_lookahead=self.ts_lookahead, + ts_lookback=self.ts_lookback, + chunk_size=self.chunk_size, + wavelet=self.wavelet, + mode=self.mode, + level=self.level, + relevant=self.relevant, + ) + + test_x, test_x_dates, _ = preprocess.wavelet_transform_predict(df_in=df_in, predict=predict) + + return test_x, test_x_dates, train_scaler, output_scaler diff --git a/notebooks/deepmc/notebook_lib/preprocess.py b/src/vibe_notebook/vibe_notebook/deepmc/preprocess.py similarity index 64% rename from notebooks/deepmc/notebook_lib/preprocess.py rename to src/vibe_notebook/vibe_notebook/deepmc/preprocess.py index c8d81b93..a6a28113 100644 --- a/notebooks/deepmc/notebook_lib/preprocess.py +++ b/src/vibe_notebook/vibe_notebook/deepmc/preprocess.py @@ -1,10 +1,11 @@ +from datetime import timedelta from math import ceil -from typing import Any, Optional, Tuple +from typing import Any, List, Optional, Tuple -from numpy._typing import NDArray import numpy as np import pandas as pd import pywt +from numpy.typing import NDArray from sklearn.preprocessing import StandardScaler @@ -35,11 +36,15 @@ def __init__( self.is_validation = is_validation self.relevant = relevant - def wavelet_transform_predict(self, df_in: pd.DataFrame, predict: str) -> NDArray[Any]: + def wavelet_transform_predict( + self, df_in: pd.DataFrame, predict: str + ) -> Tuple[NDArray[Any], List[Any], List[Any]]: i = 1 start = i end = start t_test_X = [] + t_x_dates = [] + t_y_dates = [] test_df = pd.DataFrame( self.train_scaler.transform(df_in), columns=df_in.columns, index=df_in.index @@ -52,9 +57,13 @@ def wavelet_transform_predict(self, df_in: pd.DataFrame, predict: str) -> NDArra i = i + 1 chunkdataDF = test_df.iloc[start:end] - test_uX, _ = self.convert_df_wavelet_input(data_df=chunkdataDF, predict=predict) + test_uX, _, test_x_dates, test_y_dates = self.convert_df_wavelet_input( + data_df=chunkdataDF, predict=predict + ) t_test_X.append(test_uX) + t_x_dates.append(test_x_dates) + t_y_dates.append(test_y_dates) test_X = t_test_X[0].copy() @@ -62,30 +71,51 @@ def wavelet_transform_predict(self, df_in: pd.DataFrame, predict: str) -> NDArra for j in range(len(t_test_X[i])): test_X[j] = np.append(test_X[j], t_test_X[i][j], axis=0) - return test_X + return test_X, t_x_dates, t_y_dates def wavelet_transform_train( self, train_df: pd.DataFrame, test_df: pd.DataFrame, out_feature: str ) -> Tuple[NDArray[Any], ...]: - t_train_X, t_train_y = self.prepare_wavelet_data(train_df, out_feature=out_feature) + t_train_X, t_train_y, t_train_X_dates, t_train_y_dates = self.prepare_wavelet_data( + train_df, out_feature=out_feature + ) - t_test_X, t_test_y = self.prepare_wavelet_data(test_df, out_feature=out_feature) + t_test_X, t_test_y, t_test_X_dates, t_test_y_dates = self.prepare_wavelet_data( + test_df, out_feature=out_feature + ) train_X = t_train_X[0].copy() train_y = t_train_y[0].copy() - for i in range(1, len(t_train_X)): + train_dates_X = t_train_X_dates[0][0].copy() + train_dates_y = t_train_y_dates[0].copy() + for i in range(len(t_train_X)): train_y = np.append(train_y, t_train_y[i], axis=0) + train_dates_X = np.append(train_dates_X, t_train_X_dates[i][0], axis=0) + train_dates_y = np.append(train_dates_y, t_train_y_dates[i], axis=0) for j in range(len(t_train_X[i])): train_X[j] = np.append(train_X[j], t_train_X[i][j], axis=0) test_X = t_test_X[0].copy() test_y = t_test_y[0].copy() + test_dates_X = t_test_X_dates[0][0].copy() + test_dates_y = t_test_y_dates[0].copy() for i in range(1, len(t_test_X)): test_y = np.append(test_y, t_test_y[i], axis=0) + test_dates_X = np.append(test_dates_X, t_test_X_dates[i][0], axis=0) + test_dates_y = np.append(test_dates_y, t_test_y_dates[i], axis=0) for j in range(len(t_test_X[i])): test_X[j] = np.append(test_X[j], t_test_X[i][j], axis=0) - return train_X, train_y, test_X, test_y + return ( + train_X, + train_y, + test_X, + test_y, + train_dates_X, + train_dates_y, + test_dates_X, + test_dates_y, + ) def prepare_wavelet_data(self, data_df: pd.DataFrame, out_feature: str): i = 0 @@ -93,6 +123,8 @@ def prepare_wavelet_data(self, data_df: pd.DataFrame, out_feature: str): end = start t_data_x = [] t_data_y = [] + t_dates_x = [] + t_dates_y = [] while end < data_df.shape[0]: start = i @@ -100,14 +132,16 @@ def prepare_wavelet_data(self, data_df: pd.DataFrame, out_feature: str): i = i + 1 o_data_df = data_df.iloc[start:end] - data_ux, data_uy = self.convert_df_wavelet_input( + data_ux, data_uy, data_ux_dates, data_uy_dates = self.convert_df_wavelet_input( o_data_df, predict=out_feature, ) t_data_x.append(data_ux) t_data_y.append(data_uy) + t_dates_x.append(data_ux_dates) + t_dates_y.append(data_uy_dates) - return t_data_x, t_data_y + return t_data_x, t_data_y, t_dates_x, t_dates_y def dl_preprocess_data( self, @@ -115,7 +149,7 @@ def dl_preprocess_data( predict: str, per_split: float = 0.8, training: bool = False, - ) -> Tuple[NDArray, Optional[NDArray], Optional[NDArray], Optional[NDArray]]: # type: ignore + ) -> Tuple[NDArray, Optional[NDArray], Optional[NDArray], Optional[NDArray], Optional[NDArray]]: # type: ignore """ merge chunk of data as single entity Args: @@ -140,7 +174,7 @@ def dl_preprocess_data( label_data = label_df.values # label_data = label_df.values - X, y = list(), list() + X, y, dates = list(), list(), list() in_start = 0 # step over the entire history one time step at a time @@ -153,30 +187,37 @@ def dl_preprocess_data( if out_end <= len(data): X.append(data[in_start:in_end, :]) y.append(label_data[in_end:out_end, :]) + dates.append(df.index[in_end:out_end].strftime("%Y-%m-%d %H:%M:%S").values) # move along one time step in_start += 1 X = np.array(X) y = np.array(y) + dates = np.array(dates) if self.is_validation is True: n_train_split = ceil(len(data) * per_split) train_X, train_y = X[:n_train_split, :, :], y[:n_train_split, :, :] test_X, test_y = X[n_train_split:, :], y[n_train_split:, :] - return train_X, train_y, test_X, test_y + return train_X, train_y, test_X, test_y, dates else: - return X, y, None, None + return X, y, None, None, dates else: - X = list() + X, dates = list(), list() in_start = 0 for _ in range(len(data) - n_in + 1): in_end = in_start + n_in if in_end <= len(data): X.append(data[in_start:in_end, :]) + # shift dates by lookahead to match it with the y + dates.append( + [t + timedelta(hours=self.ts_lookback) for t in df.index[in_start:in_end]] + ) in_start += 1 X = np.array(X) - return X, None, None, None + dates = np.array(dates) + return X, None, None, None, dates def convert_df_wavelet_input(self, data_df: pd.DataFrame, predict: str): if self.relevant: @@ -188,59 +229,66 @@ def convert_df_wavelet_input_not_relevant(self, data_df: pd.DataFrame, predict: level = self.level rd = list() N = data_df.shape[0] - test_X = list() + test_X, test_X_dates, test_y_dates, test_y = list(), list(), list(), list() if self.is_training: - test_y = self.dl_preprocess_data( + (_, test_y, _, _, test_y_dates) = self.dl_preprocess_data( data_df.iloc[-self.ts_lookback - self.ts_lookahead :], predict=predict, training=self.is_training, - )[1] + ) assert test_y is not None test_y = test_y[[-1], :, :] + dates = test_y_dates[[-1], :] data_df = data_df.iloc[: -self.ts_lookahead] - else: - test_y = [] wp5 = pywt.wavedec(data=data_df[predict], wavelet=self.wavelet, mode=self.mode, level=level) N = data_df.shape[0] for i in range(1, level + 1): rd.append(pywt.waverec(wp5[:-i] + [None] * i, wavelet=self.wavelet, mode=self.mode)[:N]) - t_test_X = self.dl_preprocess_data(data_df.iloc[-self.ts_lookback :], predict=predict)[0] + (t_test_X, _, _, _, t_test_X_dates) = self.dl_preprocess_data( + data_df.iloc[-self.ts_lookback :], predict=predict + ) test_X.append(t_test_X[[-1], :, :]) + test_X_dates.append(t_test_X_dates[[-1], :]) wpt_df = data_df[[]].copy() for i in range(0, level): wpt_df[predict] = rd[i][:] - t_test_X = self.dl_preprocess_data(wpt_df.iloc[-self.ts_lookback :], predict=predict)[0] + (t_test_X, _, _, _, t_test_X_dates) = self.dl_preprocess_data( + wpt_df.iloc[-self.ts_lookback :], predict=predict + ) test_X.append(t_test_X[[-1], :, :]) + test_X_dates.append(t_test_X_dates) - return test_X, test_y + return test_X, test_y, test_X_dates, test_y_dates def convert_df_wavelet_input_relevant(self, data_df: pd.DataFrame, predict: str): rd = list() test_X = list() + test_X, test_X_dates, test_y_dates, test_y = list(), list(), list(), list() if self.is_training: - test_y = self.dl_preprocess_data( + (_, test_y, _, _, test_y_dates) = self.dl_preprocess_data( data_df.iloc[-self.ts_lookback - self.ts_lookahead :], predict=predict, training=self.is_training, - )[1] + ) assert test_y is not None test_y = test_y[[-1], :, :] - else: - test_y = [] + test_y_dates = test_y_dates[[-1], :] data_df = data_df.iloc[: -self.ts_lookahead] - t_test_X = self.dl_preprocess_data(data_df.iloc[-self.ts_lookback :], predict=predict)[0] + (t_test_X, _, _, _, t_test_X_dates) = self.dl_preprocess_data( + data_df.iloc[-self.ts_lookback :], predict=predict + ) data = data_df[predict] data = data.append(data_df[predict + "_forecast"].iloc[-self.ts_lookback :]).values @@ -253,13 +301,17 @@ def convert_df_wavelet_input_relevant(self, data_df: pd.DataFrame, predict: str) ) test_X.append(t_test_X[[-1], :, :]) + test_X_dates.append(t_test_X_dates[[-1], :]) wpt_df = data_df[[]].copy() for i in range(0, self.level): wpt_df[predict] = rd[i] - t_test_X = self.dl_preprocess_data(wpt_df.iloc[-self.ts_lookback :], predict=predict)[0] + (t_test_X, _, _, _, t_test_X_dates) = self.dl_preprocess_data( + wpt_df.iloc[-self.ts_lookback :], predict=predict + ) test_X.append(t_test_X[[-1], :, :]) + test_X_dates.append(t_test_X_dates) - return test_X, test_y + return test_X, test_y, test_X_dates, test_y_dates diff --git a/src/vibe_notebook/vibe_notebook/deepmc/utils.py b/src/vibe_notebook/vibe_notebook/deepmc/utils.py new file mode 100644 index 00000000..ffc2a2e2 --- /dev/null +++ b/src/vibe_notebook/vibe_notebook/deepmc/utils.py @@ -0,0 +1,200 @@ +from datetime import datetime, timedelta +from typing import Any, Dict, List + +import numpy as np +import pandas as pd +from numpy._typing import NDArray +from pandas.tseries.offsets import DateOffset +from sklearn.metrics import mean_absolute_error, mean_squared_error +from sklearn.preprocessing import StandardScaler + + +def get_csv_data( + path: str, + date_attribute: str = "date", + columns_rename: Dict[str, str] = {}, + frequency: str = "60min", + interpolate: bool = True, + fill_na: bool = True, +): + """ + Read data from CSV file using Pandas python package. + """ + + data_df = pd.read_csv(path) + data_df[date_attribute] = pd.to_datetime(data_df[date_attribute]) + + if columns_rename: + data_df.rename(columns=columns_rename, inplace=True) + + # apply index on date + data_df.reset_index(drop=True, inplace=True) + data_df.set_index(date_attribute, inplace=True) + data_df.sort_index(ascending=True, inplace=True) + + if interpolate: + # interpolate to derive missing data + data_df = data_df.interpolate(method="from_derivatives") + assert data_df is not None, "Interpolate deleted all data" + data_df = data_df.dropna() + + if fill_na: + # Group rows by frequency, requires date attribute indexed to execute this + data_df = data_df.fillna(method="ffill") # type: ignore + data_df = data_df.fillna(method="bfill") + data_df = data_df.groupby(pd.Grouper(freq=frequency)).mean() + data_df = data_df.fillna(method="ffill") + data_df = data_df.fillna(method="bfill") + else: + data_df = data_df.groupby(pd.Grouper(freq=frequency)).mean() + + return data_df + + +def hour_round(t: datetime): + # Rounds to nearest hour by adding a timedelta hour if minute >= 30 + return t.replace(second=0, microsecond=0, minute=0, hour=t.hour) + timedelta( + hours=t.minute // 30 + ) + + +def get_split_scaled_data(data: pd.DataFrame, out_feature: str, split_ratio: float = 0.92): + split = int(split_ratio * data.shape[0]) + + train_data = data.iloc[:split] + test_data = data.iloc[split:] + + output_scaler = StandardScaler() + output_scaler.fit_transform(np.expand_dims(data[out_feature].values, axis=1)) # type: ignore + + train_scaler = StandardScaler() + train_scale_df = pd.DataFrame( + train_scaler.fit_transform(train_data), + columns=train_data.columns, + index=train_data.index, + ) + test_scale_df = pd.DataFrame( + train_scaler.transform(test_data), + columns=test_data.columns, + index=test_data.index, + ) + + return train_scaler, output_scaler, train_scale_df, test_scale_df + + +def shift_index(ds_df: pd.DataFrame, freq_minutes: int, num_indices: int, dateColumn: str = "date"): + ds_df[dateColumn] = ds_df.index.shift(-num_indices, freq=DateOffset(minutes=freq_minutes)) + ds_df = ds_df.reset_index(drop=True) + ds_df = ds_df.set_index(dateColumn) + return ds_df + + +def clean_relevant_data( + actual_df: pd.DataFrame, + forecast_df: pd.DataFrame, + out_variables: List[str], + freq_hours: int, + num_of_indices: int, +): + base_data_df = actual_df.copy() + current_ws_df = forecast_df.add_suffix("Current") + base_data_df = base_data_df.join(current_ws_df) + shift_forecast_df = shift_index(forecast_df, freq_hours * 60, num_of_indices) + base_data_df = base_data_df.join(shift_forecast_df) + + base_data_df = base_data_df[out_variables] + base_data_df = base_data_df.interpolate(method="from_derivatives") + assert base_data_df is not None, "Interpolate deleted all data" + base_data_df = base_data_df.dropna() + return base_data_df + + +def smooth(y: List[float], box_pts: int): + box = np.ones(box_pts) / box_pts + y_smooth = np.convolve(y, box, mode="same") + return y_smooth + + +def clean_relevant_data_using_hrrr( + actual_df: pd.DataFrame, + forecast_df: pd.DataFrame, + out_variables: List[str], + freq_hours: int, + num_of_indices: int, + start_date: datetime, + end_date: datetime, +): + forecast_df = forecast_df.loc[ + (forecast_df.index >= start_date) & (forecast_df.index <= end_date) + ] + actual_df = actual_df.loc[(actual_df.index >= start_date) & (actual_df.index <= end_date)] + + for col in actual_df.columns: + sub_df = actual_df[actual_df[col].isna()] + if col + "_forecast" in forecast_df.columns: + actual_df.loc[actual_df.index.isin(sub_df.index.values), col] = forecast_df[ + forecast_df.index.isin(sub_df.index.values) + ][col + "_forecast"] + + base_data_df = actual_df.copy() + current_ws_df = forecast_df.add_suffix("Current") + base_data_df = base_data_df.join(current_ws_df) + shift_forecast_df = shift_index(forecast_df, freq_hours * 60, num_of_indices) + base_data_df = base_data_df.join(shift_forecast_df) + + base_data_df = base_data_df[out_variables] + base_data_df = base_data_df.interpolate(method="from_derivatives") + assert base_data_df is not None, "Interpolate deleted all data" + base_data_df = base_data_df.dropna() + return base_data_df + + +def calculate_KPI(y: NDArray[Any], yhat: NDArray[Any]): + mae = float(mean_absolute_error(y, yhat)) + rmse = float(mean_squared_error(y, yhat, squared=False)) + print(f"RMSE: {round(rmse, 2)}") + print(f"MAE: {round(mae, 2)}") + print(f"MAE%: {round(100*sum(abs(y-yhat))/sum(y),2)}%") + + +def convert_forecast_data(data: pd.DataFrame): + # Temperature + # convert kelvin to celsius + # convert celsius to Fahrenheit + data["temperature_forecast"] = data["temperature_forecast"].apply( + lambda x: ((x - 273.15) * 9 / 5) + 32 + ) + + # wind_speed + # multiplying with 2.23 to convert wind speed from m/sec to mph + data["wind_speed_forecast"] = data.apply( + lambda x: np.sqrt( + np.square(x["u-component_forecast"]) + np.square(x["v-component_forecast"]) + ) + * 2.23, + axis=1, + ) + data.drop(columns=["u-component_forecast", "v-component_forecast"], inplace=True) + return data + + +def transform_to_array_3D(data: NDArray[Any], inference_hours: int = 24) -> NDArray[Any]: + X = transform_to_array(data, inference_hours) + X = X.reshape(X.shape[0], 1, X.shape[1]) + return X + + +def transform_to_array(data: NDArray[Any], inference_hours: int = 24) -> NDArray[Any]: + data = np.array(data) + X = [] + for in_start in range(len(data)): + in_end = in_start + inference_hours + if in_end <= (len(data)): + X.append(data[in_start:in_end]) + else: + break + + X = np.array(X) + # skip rows not in loop + X = X[: data.shape[0] - inference_hours] + return X From 8be7698059fb03cf6dc47d606853e2913f4ab479 Mon Sep 17 00:00:00 2001 From: Rafael Padilha Date: Wed, 31 Jul 2024 17:39:36 +0000 Subject: [PATCH 04/13] Removing @SECRETS(..., pc-sub-key) from notebooks (#187) Fix for #173. Planetary Computer does not uses API key anymore. Some of our notebooks are failing because of that (and because the secret has not been added to the cluster). --- notebooks/forest/download_alos_forest_map.ipynb | 1 - notebooks/forest/forest_change_detection.ipynb | 1 - notebooks/sentinel/sentinel_spaceeye.ipynb | 3 --- 3 files changed, 5 deletions(-) diff --git a/notebooks/forest/download_alos_forest_map.ipynb b/notebooks/forest/download_alos_forest_map.ipynb index 687cd8c7..f3444d0a 100644 --- a/notebooks/forest/download_alos_forest_map.ipynb +++ b/notebooks/forest/download_alos_forest_map.ipynb @@ -378,7 +378,6 @@ " \"Download ALOS Forest Map\",\n", " geometry=geom,\n", " time_range=time_range,\n", - " parameters={\"pc_key\": \"@SECRET(eywa-secrets, pc-sub-key)\"},\n", ")\n", "run.monitor()" ] diff --git a/notebooks/forest/forest_change_detection.ipynb b/notebooks/forest/forest_change_detection.ipynb index b21c78a7..1876d2af 100644 --- a/notebooks/forest/forest_change_detection.ipynb +++ b/notebooks/forest/forest_change_detection.ipynb @@ -480,7 +480,6 @@ " geometry=geom,\n", " time_range=time_range,\n", " parameters={\n", - " \"pc_key\": \"@SECRET(eywa-secrets, pc-sub-key)\",\n", " \"from_values\": [4, 3, 0, 2, 1],\n", " \"to_values\": [0, 0, 0, 1, 2],\n", " },\n", diff --git a/notebooks/sentinel/sentinel_spaceeye.ipynb b/notebooks/sentinel/sentinel_spaceeye.ipynb index 1d364ac4..e9c1e2a6 100755 --- a/notebooks/sentinel/sentinel_spaceeye.ipynb +++ b/notebooks/sentinel/sentinel_spaceeye.ipynb @@ -137,7 +137,6 @@ " f\"Amazon {time_range[0].year}\",\n", " geometry=geom,\n", " time_range=time_range,\n", - " parameters={\"pc_key\": \"@SECRET(eywa-secrets, pc-sub-key)\"},\n", ")" ] }, @@ -233,7 +232,6 @@ " f\"Amazon {time_range[0].year}\",\n", " geometry=geom,\n", " time_range=time_range,\n", - " parameters={\"pc_key\": \"@SECRET(eywa-secrets, pc-sub-key)\"},\n", ")\n", "run2.monitor()" ] @@ -384,7 +382,6 @@ " f\"Amazon {tr[0].year}\",\n", " geometry=geom,\n", " time_range=tr,\n", - " parameters={\"pc_key\": \"@SECRET(eywa-secrets, pc-sub-key)\"},\n", " )\n", " for tr in time_ranges\n", "]\n", From 2fa6555de50de5b952e181cfb1c226b124981d11 Mon Sep 17 00:00:00 2001 From: Rafael Soares Padilha Date: Thu, 1 Aug 2024 14:38:23 -0300 Subject: [PATCH 05/13] SpaceEye - Adjusting reading windows for chips that fall close to the raster's boarder (#183) In the SpaceEye workflow, if the input geometry yields a window that is smaller than the chip size, we adjust it by splitting the difference between the chip's and window's width (height) half way to the right/left (top/bottom) sides of the original window. However if the input geometry falls close to the border of the raster, we might not be able to completely increase the window in one of the sides and end up with a window smaller than the chip size. Because of that, the workflow fails with "_dim_size cannot be smaller than chip_size. Please consider reducing the step/chip size or increasing the input geometry._" This PR updates the `_adjust_roi_window` method, so the window is adjusted from both sides until reaching the chip size or reaching the border of the raster. This fixes #180. --- src/vibe_lib/vibe_lib/spaceeye/dataset.py | 42 ++++++++++++++++++----- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/src/vibe_lib/vibe_lib/spaceeye/dataset.py b/src/vibe_lib/vibe_lib/spaceeye/dataset.py index 88e174d7..1b38bc17 100644 --- a/src/vibe_lib/vibe_lib/spaceeye/dataset.py +++ b/src/vibe_lib/vibe_lib/spaceeye/dataset.py @@ -160,6 +160,37 @@ def get_write_windows( ) +def adjust_dim( + window_dim: float, window_ranges: Tuple[float, float], chip_dim: float, raster_bounds: float +) -> Tuple[float, float]: + """ + Adjust a window's dimension (width or height) to make sure the window reaches the chip size + while still within the raster bounds. + + Args: + chip_dim: The chip dimension (width or height). + window_dim: The window dimension (width or height). + window_ranges: The window ranges (start, end). + raster_bounds: The raster dimension (width or height). + + Returns: + The adjusted window ranges. + """ + diff = chip_dim - window_dim + offset = diff // 2 + + offset_low = offset if window_ranges[0] - offset >= 0 else window_ranges[0] + offset_high = diff - offset_low + if offset_high + window_ranges[1] > raster_bounds: + offset_high = raster_bounds - window_ranges[1] + offset_low = diff - offset_high + + min_dim = max(window_ranges[0] - offset_low, 0) + max_dim = window_ranges[1] + offset_high + + return min_dim, max_dim + + class SpaceEyeReader(Dataset[DatasetReturnType]): """Dataset that lazily reads chips from sentinel 1 and 2 rasters. The dataset computes the necessary chips to cover the whole RoI according to @@ -266,16 +297,11 @@ def _adjust_roi_window(self, window: Window) -> Window: f"RoI has dimensions {window.width, window.height} and chip size is {self.chip_size}," f" adjusting to {width, height}" ) - diff_w = width - window.width - dw = diff_w // 2 - diff_h = height - window.height - dh = diff_h // 2 hs, ws = window.toranges() - min_w = max(ws[0] - dw, 0) - max_w = min(ws[1] + diff_w - dw, self.raster_width) - min_h = max(hs[0] - dh, 0) - max_h = min(hs[1] + diff_h - dh, self.raster_height) + + min_h, max_h = adjust_dim(window.height, hs, height, self.raster_height) + min_w, max_w = adjust_dim(window.width, ws, width, self.raster_width) new_win = Window.from_slices((min_h, max_h), (min_w, max_w)) LOGGER.info(f"Adjusting from {window} to {new_win}") From fdcf4dc758988354f58d5ad4322f8ca402011a2a Mon Sep 17 00:00:00 2001 From: Rafael Soares Padilha Date: Thu, 1 Aug 2024 14:40:43 -0300 Subject: [PATCH 06/13] SAM - Normalization and ChipWindow (#184) This PR replaces the `ChipWindow` named tuple with a tuple type alias. This fixes some serialization/deserialization errors that caused the workflow to break. Additionally, this PR modifies how we scale and offset the raster values during normalization before SAM image encoder. After applying the raster's scale and offset normalization on the RGB bands, we now clip the values to the range [0,1] before multiplying them by 255. The lack of clipping operation was leading to inconsistencies in the segmentation masks outputs. --- .../automatic_segmentation.yaml | 2 +- ops/segment_anything/sam_inference.py | 2 +- .../combine_sam_masks.py | 8 ++++---- .../test_combine_sam_masks.py | 4 ++-- src/vibe_core/vibe_core/data/core_types.py | 17 ++--------------- src/vibe_lib/vibe_lib/segment_anything.py | 18 +++++++++++++----- 6 files changed, 23 insertions(+), 28 deletions(-) diff --git a/ops/segment_anything/automatic_segmentation.yaml b/ops/segment_anything/automatic_segmentation.yaml index 567706bb..58ad0f32 100644 --- a/ops/segment_anything/automatic_segmentation.yaml +++ b/ops/segment_anything/automatic_segmentation.yaml @@ -41,7 +41,7 @@ description: parameters: model_type: SAM's image encoder backbone architecture, among 'vit_h', 'vit_l', or 'vit_b'. Before running the workflow, make sure the desired model has been exported to the cluster by running `scripts/export_sam_models.py`. For more information, refer to the FarmVibes.AI troubleshooting page in the documentation. band_names: Name of raster bands that should be selected to compose the 3-channel images expected by SAM. If not provided, will try to use ["R", "G", "B"]. If only a single band name is provided, will replicate it through all three channels. - band_scaling: A list of floats to scale each band by to the range of [0.0, 1.0] or [0.0, 255.0]. If not provided, will default to the raster scaling parameter. If a list with a single value is provided, will use it for all three bands. + band_scaling: A list of floats to scale each band by to the range of [0.0, 1.0]. If not provided, will default to the raster scaling parameter. If a list with a single value is provided, will use it for all three bands. band_offset: A list of floats to offset each band by. If not provided, will default to the raster offset value. If a list with a single value is provided, will use it for all three bands. spatial_overlap: Percentage of spatial overlap between chips in the range of [0.0, 1.0). points_per_side: The number of points to be sampled along one side of the chip to be prompts. The total number of points is points_per_side**2. diff --git a/ops/segment_anything/sam_inference.py b/ops/segment_anything/sam_inference.py index 5e833062..f6f13020 100644 --- a/ops/segment_anything/sam_inference.py +++ b/ops/segment_anything/sam_inference.py @@ -497,7 +497,7 @@ def generate_masks_from_grid( meta = cast(Dict[str, Any], write_info_list[0]["meta"]) meta.update({**INT_COMPRESSION_KWARGS}) - write_window = ChipWindow( + write_window = ( int(read_window.col_off - dataset.offset.width), int(read_window.row_off - dataset.offset.height), int(read_window.width), diff --git a/ops/segment_anything_combine_masks/combine_sam_masks.py b/ops/segment_anything_combine_masks/combine_sam_masks.py index e041023f..e457b98d 100644 --- a/ops/segment_anything_combine_masks/combine_sam_masks.py +++ b/ops/segment_anything_combine_masks/combine_sam_masks.py @@ -12,10 +12,10 @@ def touch_chip_boundaries(bbox: BBox, chip_window: ChipWindow) -> bool: return ( - bbox[0] <= chip_window.col_offset - or bbox[1] <= chip_window.row_offset - or bbox[2] >= chip_window.col_offset + chip_window.width - or bbox[3] >= chip_window.row_offset + chip_window.height + bbox[0] <= chip_window[0] # col_offset + or bbox[1] <= chip_window[1] # row_offset + or bbox[2] >= chip_window[0] + chip_window[2] # col_offset + width + or bbox[3] >= chip_window[1] + chip_window[3] # row_offset + height ) diff --git a/ops/segment_anything_combine_masks/test_combine_sam_masks.py b/ops/segment_anything_combine_masks/test_combine_sam_masks.py index a3d26354..febd5f62 100644 --- a/ops/segment_anything_combine_masks/test_combine_sam_masks.py +++ b/ops/segment_anything_combine_masks/test_combine_sam_masks.py @@ -8,7 +8,7 @@ import xarray as xr from shapely import geometry as shpg -from vibe_core.data.core_types import ChipWindow, gen_guid +from vibe_core.data.core_types import gen_guid from vibe_core.data.rasters import CategoricalRaster, SamMaskRaster from vibe_dev.testing.op_tester import OpTester from vibe_lib.raster import save_raster_to_asset @@ -59,7 +59,7 @@ def create_segmented_raster( categories=["background", "foreground"], mask_score=[mask_score], mask_bbox=[tuple([float(c) for c in mask_bbox])], # type: ignore - chip_window=ChipWindow(0.0, 0.0, float(raster_size), float(raster_size)), + chip_window=(0.0, 0.0, float(raster_size), float(raster_size)), ) diff --git a/src/vibe_core/vibe_core/data/core_types.py b/src/vibe_core/vibe_core/data/core_types.py index 55271080..da6b4fa2 100644 --- a/src/vibe_core/vibe_core/data/core_types.py +++ b/src/vibe_core/vibe_core/data/core_types.py @@ -15,7 +15,6 @@ ClassVar, Dict, List, - NamedTuple, Optional, Tuple, Type, @@ -51,20 +50,8 @@ """Type alias for a time range, as a tuple of two `datetime` objects (start, end).""" -class ChipWindow(NamedTuple): - """Represent a window of a raster chip. - - Attributes: - col_offset: The column offset of the window with relation to the raster chip. - row_offset: The row offset of the window with relation to the raster chip. - width: The width of the window. - height: The height of the window. - """ - - col_offset: float - row_offset: float - width: float - height: float +ChipWindow = Tuple[float, float, float, float] +"""Type alias representing a raster chip window, as (col_offset, row_offset, width, height).""" def gen_guid(): diff --git a/src/vibe_lib/vibe_lib/segment_anything.py b/src/vibe_lib/vibe_lib/segment_anything.py index 648acb74..0d4267c3 100644 --- a/src/vibe_lib/vibe_lib/segment_anything.py +++ b/src/vibe_lib/vibe_lib/segment_anything.py @@ -11,12 +11,13 @@ from geopandas import GeoDataFrame from numpy.typing import NDArray from rasterio import Affine +from rasterio.windows import Window from shapely.geometry.base import BaseGeometry from torchvision.transforms.functional import resize from vibe_core.data import GeometryCollection, Raster from vibe_core.data.core_types import BBox, Point -from vibe_lib.spaceeye.chip import ChipDataset, Dims, Window +from vibe_lib.spaceeye.chip import ChipDataset, Dims LOGGER = logging.getLogger(__name__) @@ -473,7 +474,7 @@ def build_chip_preprocessing_operation( elif len(band_scaling) != len(band_names): raise ValueError(f"Expected one or three scaling parameters. Got {band_scaling}") else: - band_scaling = [raster.scale] * 3 + band_scaling = [float(raster.scale)] * 3 scale = np.array(band_scaling).reshape(1, 3, 1, 1) if band_offset: @@ -483,13 +484,20 @@ def build_chip_preprocessing_operation( elif len(band_offset) != len(band_names): raise ValueError(f"Expected one or three offset parameters. Got {band_offset}") else: - band_offset = [raster.offset] * 3 + band_offset = [float(raster.offset)] * 3 offset = np.array(band_offset).reshape(1, 3, 1, 1) def preprocessing_operation(chip: NDArray[Any]) -> NDArray[Any]: normalized_chip = chip[:, band_idx, :, :] * scale + offset - if np.min(normalized_chip) >= 0 and np.max(normalized_chip) <= 1: - normalized_chip = normalized_chip * 255.0 + if np.min(normalized_chip) < 0 or np.max(normalized_chip) > 1: + LOGGER.warning( + "Chip values are outside the expected range [0, 1] after scaling and offset. " + f"Found max of {np.max(normalized_chip)} and min of {np.min(normalized_chip)}." + "Will clip to [0, 1] and normalize to [0, 255]. Please, verify the band_scaling " + "and band_offset parameters of the workflow." + ) + normalized_chip = np.clip(normalized_chip, 0, 1) + normalized_chip = normalized_chip * 255.0 return normalized_chip.astype(np.float32) return preprocessing_operation From 306040886c198574459b417328c3daad822640be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roberto=20de=20Moura=20Estev=C3=A3o=20Filho?= Date: Thu, 1 Aug 2024 15:08:18 -0300 Subject: [PATCH 07/13] Add workflow to build and push container images (#182) Add workflow to build and push `dev` and `main` container images. --- .github/workflows/docker-build.yml | 39 ++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .github/workflows/docker-build.yml diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml new file mode 100644 index 00000000..daca6037 --- /dev/null +++ b/.github/workflows/docker-build.yml @@ -0,0 +1,39 @@ +name: Build service images +on: + push: + branches: + - dev + - main + +permissions: + id-token: write + contents: read +jobs: + build-and-push: + runs-on: ubuntu-latest + environment: build + strategy: + matrix: + include: + - dockerfile: Dockerfile-api_orchestrator + image-name: api-orchestrator + - dockerfile: Dockerfile-worker + image-name: worker + - dockerfile: Dockerfile-cache + image-name: cache + steps: + - uses: actions/checkout@v4 + - name: 'Az CLI login' + uses: azure/login@v1 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + - name: 'Build and push image' + run: | + az acr login -n ${{ secrets.ACR_NAME }} + export VERSION_TAG=${GITHUB_REF#refs/heads/} + export IMAGE_TAG=${{ secrets.ACR_NAME }}.azurecr.io/unlisted/farmai/terravibes/${{ matrix.image-name }}:$VERSION_TAG + docker build . -f ./resources/docker/${{ matrix.dockerfile }} -t $IMAGE_TAG + docker push $IMAGE_TAG + From 5a4012d765884a2ecc73ecdda71b814976058f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roberto=20de=20Moura=20Estev=C3=A3o=20Filho?= Date: Thu, 1 Aug 2024 15:09:56 -0300 Subject: [PATCH 08/13] Add workflow to release to main (#188) Workflow that retags images, changes tags in the code, and opens a PR to main --------- Co-authored-by: Renato L. de F. Cunha --- .github/workflows/release-to-main.yml | 55 +++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 .github/workflows/release-to-main.yml diff --git a/.github/workflows/release-to-main.yml b/.github/workflows/release-to-main.yml new file mode 100644 index 00000000..3636d74d --- /dev/null +++ b/.github/workflows/release-to-main.yml @@ -0,0 +1,55 @@ +name: Release to main +on: + workflow_dispatch: + +permissions: + id-token: write + contents: write + pull-requests: write + +jobs: + release: + runs-on: ubuntu-latest + environment: build + steps: + - uses: actions/checkout@v4 + with: + ref: dev + - name: 'Az CLI login' + uses: azure/login@v1 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + - name: Retag images + run: | + az acr login -n ${{ secrets.ACR_NAME }} + for image in api-orchestrator worker cache; do + export IMAGE_TAG=${{ secrets.ACR_NAME }}.azurecr.io/unlisted/farmai/terravibes/$image + export DEV_TAG=$IMAGE_TAG:dev + export MAIN_TAG=$IMAGE_TAG:${{ github.run_id }} + echo Retagging from $DEV_TAG to $MAIN_TAG + docker pull $DEV_TAG + docker tag $DEV_TAG $MAIN_TAG + docker push $MAIN_TAG + done + - name: Adjust default tag + run: | + ROOT=$(git rev-parse --show-toplevel) + CONSTANTS_MODULE="$ROOT"/src/vibe_core/vibe_core/cli/constants.py + sed -i "s|DEFAULT_IMAGE_TAG.*|DEFAULT_IMAGE_TAG = ${{ github.run_id }}|g" "$CONSTANTS_MODULE" + - name: Check modified file + run: cat src/vibe_core/vibe_core/cli/constants.py + - name: Commit changes + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + git config --global user.email "farmvibesaicd@microsoft.com" + git config --global user.name "FarmVibes.AI Release Pipeline" + ROOT=$(git rev-parse --show-toplevel) + CONSTANTS_MODULE="$ROOT"/src/vibe_core/vibe_core/cli/constants.py + RELEASE_BRANCH=release-main-${{ github.run_id }} + git checkout -b $RELEASE_BRANCH + git add $CONSTANTS_MODULE + git commit -m "Update default tag to latest image" + git push --set-upstream origin $RELEASE_BRANCH From fb4578cc88552bafd0fee134188223561a8c7cc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roberto=20de=20Moura=20Estev=C3=A3o=20Filho?= Date: Thu, 1 Aug 2024 15:13:44 -0300 Subject: [PATCH 09/13] Add local cluster integration tests (#186) Create a cluster with local images and run some tests on it --- .github/workflows/cluster-build.yml | 105 ---------------------------- .github/workflows/lint-test.yml | 70 ++++++++++++++++++- scripts/local-k8s-diagnostics.sh | 43 ++++++++++++ 3 files changed, 112 insertions(+), 106 deletions(-) delete mode 100644 .github/workflows/cluster-build.yml create mode 100644 scripts/local-k8s-diagnostics.sh diff --git a/.github/workflows/cluster-build.yml b/.github/workflows/cluster-build.yml deleted file mode 100644 index 4b84cf20..00000000 --- a/.github/workflows/cluster-build.yml +++ /dev/null @@ -1,105 +0,0 @@ -name: Build FarmVibes.AI cluster -run-name: Cluster build and helloworld test -on: - push: - branches: - - dev - - main - pull_request: - branches: - - dev - - main - workflow_dispatch: -env: - FARMVIBES_AI_SKIP_DOCKER_FREE_SPACE_CHECK: yes -jobs: - build: - name: Build and test - runs-on: ubuntu-latest - steps: - - name: Free space before cleanup - shell: bash - run: | - echo "Memory and swap:" - free -h - echo - echo "Available storage:" - df -h - echo - - name: Remove unused software - shell: bash - run: | - sudo rm -rf /usr/share/dotnet - sudo rm -fr /usr/local/lib/android - sudo rm -fr /opt/ghc - - name: Free space after cleanup - shell: bash - run: | - echo "Memory and swap:" - free -h - echo - echo "Available storage:" - df -h - echo - - - name: Update pip version - shell: bash - run: pip install --upgrade pip - - - name: Checkout - uses: actions/checkout@v3 - - - name: Install Python requirements - shell: bash - run: pip install --user ./src/vibe_core - - - name: Actually build cluster - shell: bash - run: farmvibes-ai local setup - - - name: Cluster status - shell: bash - run: | - echo "Deployments:" - kubectl get deployments - kubectl describe deployments - echo "Pods:" - kubectl get pods - echo "Events:" - kubectl get events - - - name: Free space after cluster creation - shell: bash - run: | - echo "Memory and swap:" - free -h - echo - echo "Available storage:" - df -h - echo - - - name: Run Hello World workflow - shell: bash - run: python -m vibe_core.farmvibes_ai_hello_world 300 - - - name: Install tool to view helloworld output - shell: bash - run: yes | sudo apt install caca-utils || echo - - - name: Show helloworld output - shell: bash - run: find ~/.cache/farmvibes-ai/data -type f -name '*.tif' -exec img2txt {} \; 2> /dev/null - - - name: Restart cluster - shell: bash - run: farmvibes-ai local restart - - - name: Wait a bit before submitting a new run (as documented) - shell: bash - run: sleep 90 - - - name: Run Hello World workflow again - shell: bash - run: | - rm -fr ~/.cache/farmvibes-ai/data/{assets,stac} - python -m vibe_core.farmvibes_ai_hello_world 300 diff --git a/.github/workflows/lint-test.yml b/.github/workflows/lint-test.yml index 0698d743..8fac7957 100644 --- a/.github/workflows/lint-test.yml +++ b/.github/workflows/lint-test.yml @@ -126,4 +126,72 @@ jobs: ruff check --select D,D401 --ignore D105 --force-exclude --exclude src/vibe_core/vibe_core/farmvibes_ai_hello_world.py --config "lint.pydocstyle.convention = 'google'" src/vibe_core/vibe_core/*.py - name: "Check docstrings for vibe_core/data" run: | - ruff check --select D,D401 --ignore D105 --config "lint.pydocstyle.convention = 'google'" src/vibe_core/vibe_core/data/*.py \ No newline at end of file + ruff check --select D,D401 --ignore D105 --config "lint.pydocstyle.convention = 'google'" src/vibe_core/vibe_core/data/*.py + local-integration-tests: + runs-on: ubuntu-latest + steps: + - name: Free space before cleanup + shell: bash + run: | + echo "Memory and swap:" + free -h + echo + echo "Available storage:" + df -h + echo + - name: Remove unused software + shell: bash + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -fr /usr/local/lib/android + sudo rm -fr /opt/ghc + - name: Free space after cleanup + shell: bash + run: | + echo "Memory and swap:" + free -h + echo + echo "Available storage:" + df -h + echo + - uses: actions/checkout@v4 + with: + lfs: true + - name: Set up Python 3.11 + uses: actions/setup-python@v3 + with: + python-version: '3.11' + - name: Install packages + run: | + pip install ./src/vibe_core + pip install ./src/vibe_common + pip install ./src/vibe_agent + pip install ./src/vibe_server + pip install ./src/vibe_lib + pip install ./src/vibe_dev + - name: Actually build cluster + run: farmvibes-ai local setup --auto-confirm + - name: Cluster status before building local images + run: | + bash ./scripts/local-k8s-diagnostics.sh + - name: Build images + run: | + WAIT_AT_THE_END=1 make local + - name: Cluster status after building local images + run: | + bash ./scripts/local-k8s-diagnostics.sh + - name: Free space after cluster creation + run: | + echo "Memory and swap:" + free -h + echo + echo "Available storage:" + df -h + echo + - name: Run integration tests + run: | + pytest ./src/tests_local_cluster/ -v --junitxml=junit/test-results.xml + - name: Cluster status after running tests + if: always() + run: | + bash ./scripts/local-k8s-diagnostics.sh \ No newline at end of file diff --git a/scripts/local-k8s-diagnostics.sh b/scripts/local-k8s-diagnostics.sh new file mode 100644 index 00000000..aca46057 --- /dev/null +++ b/scripts/local-k8s-diagnostics.sh @@ -0,0 +1,43 @@ +#!/bin/sh + +PATH=$PATH:~/.config/farmvibes-ai + +echo "kubectl location:" +which kubectl + +echo "Cluster pods:" +kubectl get pods +kubectl get pods -o yaml + +echo "Docker images:" +docker images + +echo "REST API description:" +kubectl describe deployment terravibes-rest-api + +echo "Orchestrator description:" +kubectl describe deployment terravibes-orchestrator + +echo "Worker description:" +kubectl describe deployment terravibes-worker + +echo "Cache description:" +kubectl describe deployment terravibes-cache + +echo "REST API logs:" +kubectl logs -l app=terravibes-rest-api --all-containers=true --tail=-1 + +echo "Orchestrator logs:" +kubectl logs -l app=terravibes-orchestrator --all-containers=true --tail=-1 + +echo "Worker logs:" +kubectl logs -l app=terravibes-worker --max-log-requests=8 --all-containers=true --tail=-1 + +echo "Cache logs:" +kubectl logs -l app=terravibes-cache --all-containers=true --tail=-1 + +echo "Data Ops logs:" +kubectl logs -l app=terravibes-data-ops --all-containers=true --tail=-1 + +echo "Kubernetes logs:" +docker ps | egrep 'k3d-farmvibes-ai-.*-0' | awk '{ print $1 }' | xargs docker logs From 84f906d1b9132eb011dcc6f36896560639c37cbe Mon Sep 17 00:00:00 2001 From: "Renato L. de F. Cunha" Date: Fri, 2 Aug 2024 12:16:29 -0300 Subject: [PATCH 10/13] Add copyright header to all source files (#190) This PR adds a copyright header and license information to all FarmVibes.AI source files. --- .devcontainer/post-create.sh | 3 +++ ops/admag/admag_seasonal_field_op.py | 3 +++ ops/admag/get_prescription.py | 3 +++ ops/admag/list_prescriptions.py | 3 +++ ops/admag/prescriptions.py | 3 +++ ops/admag/test_admag.py | 3 +++ ops/aggregate_statistics_timeseries/aggregate_timeseries.py | 3 +++ ops/carbon_local/test_whatif.py | 3 +++ ops/carbon_local/whatif_comet_local.py | 3 +++ ops/chunk_raster/chunk_raster.py | 3 +++ ops/clip_raster/clip_raster.py | 3 +++ ops/combine_chunks/combine_chunks.py | 3 +++ ops/compute_cloud_prob/compute_cloud_prob.py | 3 +++ ops/compute_cloud_water_mask/compute_cloud_water_mask.py | 3 +++ .../compute_conservation_practice.py | 3 +++ .../compute_evaporative_fraction.py | 3 +++ ops/compute_fcover/fcover.py | 3 +++ ops/compute_ghg_fluxes/compute_ghg_fluxes.py | 3 +++ ops/compute_ghg_fluxes/test_ghg_fluxes.py | 3 +++ ops/compute_illuminance/compute_illuminance.py | 3 +++ ops/compute_index/index.py | 3 +++ ops/compute_index/test_index.py | 3 +++ .../compute_irrigation_probability.py | 3 +++ ops/compute_ngi_egi_layers/compute_ngi_egi_layers.py | 3 +++ ops/compute_onnx/compute_onnx.py | 3 +++ ops/compute_onnx/test_compute_onnx.py | 3 +++ ops/compute_onnx/test_compute_onnx_chunk.py | 3 +++ ops/compute_pixel_count/compute_pixel_count.py | 3 +++ ops/compute_pixel_count/test_compute_pixel_count.py | 3 +++ .../compute_raster_class_windowed_average.py | 3 +++ ops/compute_raster_cluster/compute_raster_cluster.py | 3 +++ ops/compute_raster_gradient/compute_raster_gradient.py | 3 +++ ops/compute_shadow_prob/compute_shadow_prob.py | 3 +++ ops/create_raster_sequence/create_raster_sequence.py | 3 +++ ops/datavibe_filter/datavibe_filter.py | 3 +++ ops/detect_driveway/detect_driveway.py | 3 +++ ops/detect_outliers/detect_outliers.py | 3 +++ ops/download_airbus/download_airbus.py | 3 +++ ops/download_alos/download_alos.py | 3 +++ ops/download_alos/test_download_alos.py | 3 +++ ops/download_ambient_weather/download_ambient_weather.py | 3 +++ ops/download_bing_basemap/download_bing_basemap.py | 3 +++ ops/download_bing_basemap/test_download_bing_basemap.py | 3 +++ ops/download_cdl_data/download_cdl.py | 3 +++ ops/download_cdl_data/download_cdl_data.py | 3 +++ ops/download_chirps/download_chirps.py | 3 +++ ops/download_climatology_lab/download_climatology_lab.py | 3 +++ ops/download_climatology_lab/test_download_climatology_lab.py | 3 +++ ops/download_dem/download_dem.py | 3 +++ ops/download_dem/test_download_dem.py | 3 +++ ops/download_era5/download_era5.py | 3 +++ .../download_esri_landuse_landcover.py | 3 +++ .../test_download_esri_landuse_landcover.py | 3 +++ ops/download_from_ref/download_from_ref.py | 3 +++ ops/download_from_smb/download_rasters_from_smb.py | 3 +++ ops/download_gedi_product/download_gedi_product.py | 3 +++ ops/download_gedi_product/test_download_gedi_product.py | 3 +++ ops/download_glad_data/download_glad.py | 3 +++ ops/download_glad_data/test_download_glad_product.py | 3 +++ ops/download_gnatsgo/download_gnatsgo_raster.py | 3 +++ ops/download_gnatsgo/test_download_gnatsgo.py | 3 +++ ops/download_hansen/download_hansen.py | 3 +++ ops/download_hansen/test_download_hansen_product.py | 3 +++ ops/download_herbie/download_herbie.py | 3 +++ ops/download_herbie/forecast_range_split.py | 3 +++ ops/download_herbie/forecast_weather.py | 3 +++ ops/download_landsat_from_pc/download_landsat_pc.py | 3 +++ ops/download_modis_sr/download_modis_sr.py | 3 +++ ops/download_modis_vegetation/download_modis_vegetation.py | 3 +++ .../test_download_modis_vegetation.py | 3 +++ ops/download_naip/download_naip.py | 3 +++ ops/download_naip/test_download_naip.py | 3 +++ ops/download_road_geometries/download_road_geometries.py | 3 +++ ops/download_sentinel1/download_sentinel1_rtc.py | 3 +++ ops/download_sentinel1/test_download_sentinel1_rtc.py | 3 +++ ops/download_sentinel1_grd/download_sentinel1_grd.py | 3 +++ ops/download_sentinel1_grd/test_download_sentinel1.py | 3 +++ ops/download_sentinel2_from_pc/download_s2_pc.py | 3 +++ ops/download_soilgrids/download_soilgrids.py | 3 +++ ops/download_stack_sentinel2/download_stack_s2.py | 3 +++ ops/download_usda_soils/download_usda_soils.py | 3 +++ ops/ensemble_cloud_prob/ensemble_cloud_prob.py | 3 +++ ops/estimate_canopy_cover/estimate_canopy.py | 3 +++ ops/extract_gedi_rh100/extract_gedi_rh100.py | 3 +++ ops/extract_gedi_rh100/test_extract_gedi_rh100.py | 3 +++ ops/extract_protein_sequence/extract_protein_sequence.py | 3 +++ ops/get_angles/get_angles.py | 3 +++ ops/gfs_download/gfs_download.py | 3 +++ ops/gfs_preprocess/gfs_preprocess.py | 3 +++ .../group_rasters_by_geometries.py | 3 +++ ops/group_rasters_by_time/group_rasters_by_time.py | 3 +++ ops/group_rasters_by_time/test_group_rasters_by_time.py | 3 +++ ops/group_sentinel1_orbits/group_sentinel1_orbits.py | 3 +++ ops/group_sentinel2_orbits/group_sentinel2_orbits.py | 3 +++ ops/group_tile_sequence/group_tile_sequence.py | 3 +++ .../soil_sample_heatmap_using_classification.py | 3 +++ ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.py | 3 +++ .../test_soil_cluster_sample_heatmap_using_neighbors.py | 3 +++ .../test_soil_sample_heatmap_using_classification.py | 3 +++ ops/helloworld/helloworld.py | 3 +++ ops/linear_trend/linear_trend.py | 3 +++ ops/linear_trend/test_linear_trend.py | 3 +++ ops/list_airbus_products/list_airbus.py | 3 +++ ops/list_alos_products/list_alos_products.py | 3 +++ ops/list_alos_products/test_alos_list.py | 3 +++ ops/list_bing_maps/list_bing_maps.py | 3 +++ ops/list_bing_maps/test_list_bing_maps.py | 3 +++ ops/list_cdl_products/list_cdl_products.py | 3 +++ ops/list_chirps/list_chirps.py | 3 +++ ops/list_climatology_lab/list_climatology_lab.py | 3 +++ ops/list_climatology_lab/test_list_climatology_lab.py | 3 +++ ops/list_dem_products/list_dem_products.py | 3 +++ ops/list_dem_products/test_list_dem_products.py | 3 +++ ops/list_era5/list_era5.py | 3 +++ ops/list_era5/list_era5_cds.py | 3 +++ .../list_esri_landuse_landcover.py | 3 +++ .../test_list_esri_landuse_landcover.py | 3 +++ ops/list_gedi_products/list_gedi_products.py | 3 +++ ops/list_gedi_products/test_list_gedi_products.py | 3 +++ ops/list_glad_products/list_glad_products.py | 3 +++ ops/list_glad_products/test_glad_list.py | 3 +++ ops/list_gnatsgo_products/list_gnatsgo_products.py | 3 +++ ops/list_gnatsgo_products/test_list_gnatsgo_products.py | 3 +++ ops/list_hansen_products/list_hansen_products.py | 3 +++ ops/list_hansen_products/test_hansen_list.py | 3 +++ ops/list_herbie/list_herbie.py | 3 +++ ops/list_landsat_products_pc/list_landsat_pc.py | 3 +++ ops/list_modis_sr/list_modis_sr.py | 3 +++ ops/list_modis_vegetation/list_modis_vegetation.py | 3 +++ ops/list_modis_vegetation/test_list_modis_vegetation.py | 3 +++ ops/list_naip_products/list_naip_products.py | 3 +++ ops/list_naip_products/test_list_naip_products.py | 3 +++ ops/list_sentinel1_products/list_sentinel1_products_pc.py | 3 +++ ops/list_sentinel1_products/test_list_sentinel1.py | 3 +++ ops/list_sentinel2_products/list_s2_pc.py | 3 +++ ops/list_to_sequence/list_to_sequence.py | 3 +++ ops/list_to_sequence/test_list_to_sequence.py | 3 +++ ops/match_raster_to_ref/match_raster_to_ref.py | 3 +++ ops/merge_cloud_masks/merge_cloud_masks.py | 3 +++ ops/merge_cloud_masks/merge_cloud_masks_simple.py | 3 +++ ops/merge_geometries/merge_geometries.py | 3 +++ ops/merge_geometries/test_merge_geometries.py | 3 +++ .../merge_geometry_and_time_range.py | 3 +++ .../test_merge_geometry_and_time_range.py | 3 +++ ops/merge_rasters/merge_rasters.py | 3 +++ ops/merge_sentinel1_orbits/merge_sentinel1.py | 3 +++ ops/merge_sentinel2_orbits/merge_sentinel2_orbits.py | 3 +++ ops/minimum_samples/find_soil_sample_locations.py | 3 +++ ops/minimum_samples/test_soil_sample_heatmap.py | 3 +++ ops/ordinal_trend_test/ordinal_trend_test.py | 3 +++ ops/ordinal_trend_test/test_ordinal_trend.py | 3 +++ ops/pair_intersecting_rasters/pair_intersecting_rasters.py | 3 +++ ops/price_airbus_products/price_airbus.py | 3 +++ ops/protlearn/protlearn.py | 3 +++ ops/read_grib_forecast/read_grib_forecast.py | 3 +++ ops/recode_raster/recode_raster.py | 3 +++ ops/recode_raster/test_recode_raster.py | 3 +++ ops/remove_clouds/remove_clouds.py | 3 +++ ops/remove_clouds/test_remove_clouds.py | 3 +++ ops/segment_anything/sam_inference.py | 3 +++ ops/segment_anything/test_sam_inference.py | 3 +++ ops/segment_anything_combine_masks/combine_sam_masks.py | 3 +++ ops/segment_anything_combine_masks/test_combine_sam_masks.py | 3 +++ ops/segment_driveway/segment_driveway.py | 3 +++ ops/select_necessary_coverage_items/filter_items.py | 3 +++ ops/select_necessary_coverage_items/test_filter.py | 3 +++ ops/select_sequence/select_sequence.py | 3 +++ ops/split_sequence/split_sequence.py | 3 +++ ops/split_sequence/test_split_sequence.py | 3 +++ ops/stack_landsat/stack_landsat.py | 3 +++ ops/stack_sentinel2_bands/stack_sentinel2_bands.py | 3 +++ ops/summarize_raster/raster_summary.py | 3 +++ ops/threshold_raster/threshold_raster.py | 3 +++ ops/tile_sentinel1/tile_sentinel1.py | 3 +++ ops/unpack_refs/unpack_refs.py | 3 +++ ops/weed_detection/weed_detection.py | 3 +++ resources/docker/Dockerfile-api_orchestrator | 4 +++- resources/docker/Dockerfile-cache | 4 +++- resources/docker/Dockerfile-dev | 4 +++- resources/docker/Dockerfile-devcontainer | 3 +++ resources/docker/Dockerfile-services-base | 4 +++- resources/docker/Dockerfile-worker | 4 +++- resources/docker/Dockerfile-worker-base | 4 +++- resources/vm/setup_farmvibes_ai_vm.sh | 3 +++ scripts/local-k8s-diagnostics.sh | 3 +++ scripts/setup_python_develop_env.sh | 3 +++ src/tests/__init__.py | 3 +++ src/tests/benchmark/test_spaceeye_ops.py | 3 +++ src/tests/conftest.py | 3 +++ src/tests/test_notebooks.py | 3 +++ src/tests/test_op_workflows_integration.py | 3 +++ src/tests/test_ops_building.py | 3 +++ src/tests/test_rest_api.py | 3 +++ src/tests/test_rest_api_client_integration.py | 3 +++ src/tests/test_subprocess_client.py | 3 +++ src/tests/workflows_integration/__init__.py | 3 +++ .../workflows_integration/test_helloworld_integration.py | 3 +++ src/tests_local_cluster/test_cluster_integration.py | 3 +++ src/vibe_agent/setup.py | 3 +++ src/vibe_agent/tests/conftest.py | 3 +++ src/vibe_agent/tests/ops/test_dependencies_integration.py | 3 +++ src/vibe_agent/tests/ops/test_op_cache_builder.py | 3 +++ src/vibe_agent/tests/ops/test_op_parser.py | 3 +++ src/vibe_agent/tests/ops/test_operation.py | 3 +++ .../tests/{test_eywa_asset.py => test_asset_vibe.py} | 3 +++ src/vibe_agent/tests/test_cache_metadata_store.py | 3 +++ src/vibe_agent/tests/test_local_asset_manager.py | 3 +++ src/vibe_agent/tests/test_storage.py | 3 +++ src/vibe_agent/tests/test_uri_handling.py | 3 +++ src/vibe_agent/vibe_agent/__init__.py | 3 +++ src/vibe_agent/vibe_agent/agent_config.py | 3 +++ src/vibe_agent/vibe_agent/cache.py | 3 +++ src/vibe_agent/vibe_agent/cache_metadata_store.py | 3 +++ src/vibe_agent/vibe_agent/cache_metadata_store_client.py | 3 +++ src/vibe_agent/vibe_agent/data_ops.py | 3 +++ src/vibe_agent/vibe_agent/launch_cache.py | 3 +++ src/vibe_agent/vibe_agent/launch_data_ops.py | 3 +++ src/vibe_agent/vibe_agent/launch_worker.py | 3 +++ src/vibe_agent/vibe_agent/ops.py | 3 +++ src/vibe_agent/vibe_agent/ops_helper.py | 3 +++ src/vibe_agent/vibe_agent/storage/__init__.py | 3 +++ src/vibe_agent/vibe_agent/storage/asset_management.py | 3 +++ src/vibe_agent/vibe_agent/storage/file_upload.py | 3 +++ src/vibe_agent/vibe_agent/storage/local_storage.py | 3 +++ src/vibe_agent/vibe_agent/storage/remote_storage.py | 3 +++ src/vibe_agent/vibe_agent/storage/storage.py | 3 +++ src/vibe_agent/vibe_agent/worker.py | 3 +++ src/vibe_common/setup.py | 3 +++ src/vibe_common/tests/conftest.py | 3 +++ src/vibe_common/tests/test_input_handlers.py | 3 +++ src/vibe_common/tests/test_messaging.py | 3 +++ src/vibe_common/tests/test_statestore.py | 3 +++ src/vibe_common/tests/test_vibe_dapr_client.py | 3 +++ src/vibe_common/vibe_common/__init__.py | 3 +++ src/vibe_common/vibe_common/constants.py | 3 +++ src/vibe_common/vibe_common/dapr.py | 3 +++ src/vibe_common/vibe_common/dropdapr.py | 3 +++ src/vibe_common/vibe_common/input_handlers.py | 3 +++ src/vibe_common/vibe_common/messaging.py | 3 +++ src/vibe_common/vibe_common/schemas.py | 3 +++ src/vibe_common/vibe_common/secret_provider.py | 3 +++ src/vibe_common/vibe_common/statestore.py | 3 +++ src/vibe_common/vibe_common/telemetry.py | 3 +++ src/vibe_common/vibe_common/tokens.py | 3 +++ src/vibe_common/vibe_common/vibe_dapr_client.py | 3 +++ src/vibe_common/vibe_common/workflow/__init__.py | 3 +++ src/vibe_core/setup.py | 3 +++ src/vibe_core/tests/test_register.py | 3 +++ src/vibe_core/tests/test_stac_converter.py | 3 +++ src/vibe_core/tests/test_type_serialization.py | 3 +++ src/vibe_core/tests/test_utils.py | 3 +++ src/vibe_core/vibe_core/__init__.py | 3 +++ src/vibe_core/vibe_core/admag_client.py | 3 +++ src/vibe_core/vibe_core/cli/__init__.py | 3 +++ src/vibe_core/vibe_core/cli/constants.py | 3 +++ src/vibe_core/vibe_core/cli/help_descriptions.py | 3 +++ src/vibe_core/vibe_core/cli/helper.py | 3 +++ src/vibe_core/vibe_core/cli/local.py | 3 +++ src/vibe_core/vibe_core/cli/logging.py | 3 +++ src/vibe_core/vibe_core/cli/main.py | 3 +++ src/vibe_core/vibe_core/cli/osartifacts.py | 3 +++ src/vibe_core/vibe_core/cli/parsers.py | 3 +++ src/vibe_core/vibe_core/cli/remote.py | 3 +++ src/vibe_core/vibe_core/cli/wrappers.py | 3 +++ src/vibe_core/vibe_core/client.py | 3 +++ src/vibe_core/vibe_core/data/__init__.py | 3 +++ src/vibe_core/vibe_core/data/airbus.py | 3 +++ src/vibe_core/vibe_core/data/core_types.py | 3 +++ src/vibe_core/vibe_core/data/data_registry.py | 3 +++ src/vibe_core/vibe_core/data/farm.py | 3 +++ src/vibe_core/vibe_core/data/json_converter.py | 3 +++ src/vibe_core/vibe_core/data/products.py | 3 +++ src/vibe_core/vibe_core/data/rasters.py | 3 +++ src/vibe_core/vibe_core/data/sentinel.py | 3 +++ src/vibe_core/vibe_core/data/utils.py | 3 +++ src/vibe_core/vibe_core/data/weather.py | 3 +++ src/vibe_core/vibe_core/datamodel.py | 3 +++ src/vibe_core/vibe_core/farmvibes_ai_hello_world.py | 3 +++ src/vibe_core/vibe_core/file_downloader.py | 3 +++ src/vibe_core/vibe_core/file_utils.py | 3 +++ src/vibe_core/vibe_core/logconfig.py | 3 +++ src/vibe_core/vibe_core/monitor.py | 3 +++ src/vibe_core/vibe_core/terraform/__init__.py | 3 +++ src/vibe_core/vibe_core/terraform/aks/__init__.py | 3 +++ src/vibe_core/vibe_core/terraform/aks/main.tf | 3 +++ src/vibe_core/vibe_core/terraform/aks/modules/__init__.py | 3 +++ .../vibe_core/terraform/aks/modules/infra/azure_monitor.tf | 3 +++ src/vibe_core/vibe_core/terraform/aks/modules/infra/cosmos.tf | 3 +++ src/vibe_core/vibe_core/terraform/aks/modules/infra/data.tf | 3 +++ .../vibe_core/terraform/aks/modules/infra/keyvault.tf | 3 +++ .../vibe_core/terraform/aks/modules/infra/kubernetes.tf | 3 +++ .../vibe_core/terraform/aks/modules/infra/outputs.tf | 3 +++ .../vibe_core/terraform/aks/modules/infra/providers.tf | 3 +++ .../vibe_core/terraform/aks/modules/infra/publicip.tf | 3 +++ src/vibe_core/vibe_core/terraform/aks/modules/infra/random.tf | 3 +++ .../vibe_core/terraform/aks/modules/infra/resourcegroup.tf | 3 +++ .../vibe_core/terraform/aks/modules/infra/storage.tf | 3 +++ .../vibe_core/terraform/aks/modules/infra/variables.tf | 3 +++ src/vibe_core/vibe_core/terraform/aks/modules/infra/vnet.tf | 3 +++ .../vibe_core/terraform/aks/modules/kubernetes/cert.tf | 3 +++ .../vibe_core/terraform/aks/modules/kubernetes/dapr.tf | 3 +++ .../vibe_core/terraform/aks/modules/kubernetes/init.tf | 3 +++ .../vibe_core/terraform/aks/modules/kubernetes/otel.tf | 3 +++ .../vibe_core/terraform/aks/modules/kubernetes/outputs.tf | 3 +++ .../terraform/aks/modules/kubernetes/persistentvolume.tf | 3 +++ .../vibe_core/terraform/aks/modules/kubernetes/providers.tf | 3 +++ .../vibe_core/terraform/aks/modules/kubernetes/rabbitmq.tf | 3 +++ .../vibe_core/terraform/aks/modules/kubernetes/redis.tf | 3 +++ .../vibe_core/terraform/aks/modules/kubernetes/variables.tf | 3 +++ src/vibe_core/vibe_core/terraform/aks/modules/rg/providers.tf | 3 +++ src/vibe_core/vibe_core/terraform/aks/modules/rg/random.tf | 3 +++ .../vibe_core/terraform/aks/modules/rg/resourcegroup.tf | 3 +++ src/vibe_core/vibe_core/terraform/aks/modules/rg/variables.tf | 3 +++ src/vibe_core/vibe_core/terraform/aks/variables.tf | 3 +++ src/vibe_core/vibe_core/terraform/local/__init__.py | 3 +++ src/vibe_core/vibe_core/terraform/local/main.tf | 3 +++ src/vibe_core/vibe_core/terraform/local/modules/__init__.py | 3 +++ .../vibe_core/terraform/local/modules/kubernetes/dapr.tf | 3 +++ .../vibe_core/terraform/local/modules/kubernetes/jaeger.tf | 3 +++ .../vibe_core/terraform/local/modules/kubernetes/otel.tf | 3 +++ .../vibe_core/terraform/local/modules/kubernetes/outputs.tf | 3 +++ .../terraform/local/modules/kubernetes/persistentvolume.tf | 3 +++ .../vibe_core/terraform/local/modules/kubernetes/providers.tf | 3 +++ .../vibe_core/terraform/local/modules/kubernetes/rabbitmq.tf | 3 +++ .../vibe_core/terraform/local/modules/kubernetes/redis.tf | 3 +++ .../vibe_core/terraform/local/modules/kubernetes/variables.tf | 3 +++ src/vibe_core/vibe_core/terraform/local/variables.tf | 3 +++ src/vibe_core/vibe_core/terraform/services/__init__.py | 3 +++ src/vibe_core/vibe_core/terraform/services/cache.tf | 3 +++ src/vibe_core/vibe_core/terraform/services/dataops.tf | 3 +++ src/vibe_core/vibe_core/terraform/services/init.tf | 3 +++ src/vibe_core/vibe_core/terraform/services/orchestrator.tf | 3 +++ src/vibe_core/vibe_core/terraform/services/providers.tf | 3 +++ src/vibe_core/vibe_core/terraform/services/restapi.tf | 3 +++ src/vibe_core/vibe_core/terraform/services/variables.tf | 3 +++ src/vibe_core/vibe_core/terraform/services/worker.tf | 3 +++ src/vibe_core/vibe_core/testing/__init__.py | 3 +++ src/vibe_core/vibe_core/testing/comparison.py | 3 +++ src/vibe_core/vibe_core/uri.py | 3 +++ src/vibe_core/vibe_core/utils.py | 3 +++ src/vibe_dev/setup.py | 3 +++ src/vibe_dev/vibe_dev/__init__.py | 3 +++ src/vibe_dev/vibe_dev/client/__init__.py | 3 +++ src/vibe_dev/vibe_dev/client/remote_client.py | 3 +++ src/vibe_dev/vibe_dev/client/subprocess_client.py | 3 +++ src/vibe_dev/vibe_dev/local_runner.py | 3 +++ src/vibe_dev/vibe_dev/mock_utils.py | 3 +++ src/vibe_dev/vibe_dev/testing/__init__.py | 3 +++ src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_op.py | 3 +++ src/vibe_dev/vibe_dev/testing/fake_ops/fake/op.py | 3 +++ src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_item_op.py | 3 +++ src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_list_op.py | 3 +++ src/vibe_dev/vibe_dev/testing/fake_ops/fake/vibe_op.py | 3 +++ src/vibe_dev/vibe_dev/testing/fake_workflows_fixtures.py | 3 +++ src/vibe_dev/vibe_dev/testing/op_tester.py | 3 +++ src/vibe_dev/vibe_dev/testing/storage_fixtures.py | 3 +++ src/vibe_dev/vibe_dev/testing/utils.py | 3 +++ src/vibe_dev/vibe_dev/testing/workflow_fixtures.py | 3 +++ src/vibe_lib/setup.py | 3 +++ src/vibe_lib/tests/test_airbus_api.py | 3 +++ src/vibe_lib/tests/test_earthdata.py | 3 +++ src/vibe_lib/tests/test_predict_chips.py | 3 +++ src/vibe_lib/tests/test_raster_chipping.py | 3 +++ src/vibe_lib/vibe_lib/__init__.py | 3 +++ src/vibe_lib/vibe_lib/airbus.py | 3 +++ src/vibe_lib/vibe_lib/archive.py | 3 +++ src/vibe_lib/vibe_lib/bing_maps.py | 3 +++ src/vibe_lib/vibe_lib/climatology_lab.py | 3 +++ src/vibe_lib/vibe_lib/comet_farm/__init__.py | 3 +++ src/vibe_lib/vibe_lib/comet_farm/comet_model.py | 3 +++ src/vibe_lib/vibe_lib/comet_farm/comet_requester.py | 3 +++ src/vibe_lib/vibe_lib/comet_farm/comet_server.py | 3 +++ src/vibe_lib/vibe_lib/deepmc/encoder.py | 3 +++ src/vibe_lib/vibe_lib/deepmc/helpers.py | 3 +++ src/vibe_lib/vibe_lib/deepmc/locally_connected.py | 3 +++ src/vibe_lib/vibe_lib/deepmc/models.py | 3 +++ src/vibe_lib/vibe_lib/deepmc/time.py | 3 +++ src/vibe_lib/vibe_lib/deepmc/transform.py | 3 +++ src/vibe_lib/vibe_lib/earthdata.py | 3 +++ src/vibe_lib/vibe_lib/gaussian_mixture.py | 3 +++ src/vibe_lib/vibe_lib/geometry.py | 3 +++ src/vibe_lib/vibe_lib/gfs_blob_utils.py | 3 +++ src/vibe_lib/vibe_lib/glad.py | 3 +++ src/vibe_lib/vibe_lib/heatmap_neighbor.py | 3 +++ src/vibe_lib/vibe_lib/overlap_clustering.py | 3 +++ src/vibe_lib/vibe_lib/planetary_computer.py | 3 +++ src/vibe_lib/vibe_lib/raster.py | 3 +++ src/vibe_lib/vibe_lib/segment_anything.py | 3 +++ src/vibe_lib/vibe_lib/shapefile.py | 3 +++ src/vibe_lib/vibe_lib/spaceeye/__init__.py | 3 +++ src/vibe_lib/vibe_lib/spaceeye/chip.py | 3 +++ src/vibe_lib/vibe_lib/spaceeye/dataset.py | 3 +++ src/vibe_lib/vibe_lib/spaceeye/illumination.py | 3 +++ src/vibe_lib/vibe_lib/spaceeye/interpolation.py | 3 +++ src/vibe_lib/vibe_lib/spaceeye/utils.py | 3 +++ src/vibe_lib/vibe_lib/stats.py | 3 +++ src/vibe_lib/vibe_lib/timeseries.py | 3 +++ src/vibe_notebook/setup.py | 3 +++ src/vibe_notebook/vibe_notebook/__init__.py | 3 +++ src/vibe_notebook/vibe_notebook/deepmc/__init__.py | 3 +++ src/vibe_notebook/vibe_notebook/deepmc/forecast.py | 3 +++ src/vibe_notebook/vibe_notebook/deepmc/prediction.py | 3 +++ src/vibe_notebook/vibe_notebook/deepmc/preprocess.py | 3 +++ src/vibe_notebook/vibe_notebook/deepmc/utils.py | 3 +++ src/vibe_notebook/vibe_notebook/plot.py | 3 +++ src/vibe_notebook/vibe_notebook/raster.py | 3 +++ src/vibe_notebook/vibe_notebook/utils.py | 3 +++ src/vibe_server/setup.py | 3 +++ src/vibe_server/tests/conftest.py | 3 +++ src/vibe_server/tests/test_graph.py | 3 +++ src/vibe_server/tests/test_href_handler.py | 3 +++ src/vibe_server/tests/test_op_parallelism.py | 3 +++ src/vibe_server/tests/test_orchestrator.py | 3 +++ src/vibe_server/tests/test_parameter_resolver.py | 3 +++ src/vibe_server/tests/test_remote_workflow_runner.py | 3 +++ src/vibe_server/tests/test_workflow.py | 3 +++ src/vibe_server/tests/test_workflow_input_handler.py | 3 +++ src/vibe_server/tests/test_workflow_parser.py | 3 +++ src/vibe_server/tests/test_workflow_runner.py | 3 +++ src/vibe_server/tests/test_workflow_spec_validator.py | 3 +++ src/vibe_server/tests/test_workflow_state.py | 3 +++ src/vibe_server/vibe_server/__init__.py | 3 +++ src/vibe_server/vibe_server/href_handler.py | 3 +++ src/vibe_server/vibe_server/orchestrator.py | 3 +++ src/vibe_server/vibe_server/server.py | 3 +++ src/vibe_server/vibe_server/sniffer.py | 3 +++ src/vibe_server/vibe_server/workflow/__init__.py | 3 +++ src/vibe_server/vibe_server/workflow/description_validator.py | 3 +++ src/vibe_server/vibe_server/workflow/graph.py | 3 +++ src/vibe_server/vibe_server/workflow/input_handler.py | 3 +++ src/vibe_server/vibe_server/workflow/parameter.py | 3 +++ src/vibe_server/vibe_server/workflow/runner/__init__.py | 3 +++ src/vibe_server/vibe_server/workflow/runner/remote_runner.py | 3 +++ src/vibe_server/vibe_server/workflow/runner/runner.py | 3 +++ .../vibe_server/workflow/runner/task_io_handler.py | 3 +++ src/vibe_server/vibe_server/workflow/spec_parser.py | 3 +++ src/vibe_server/vibe_server/workflow/spec_validator.py | 3 +++ src/vibe_server/vibe_server/workflow/workflow.py | 3 +++ 438 files changed, 1314 insertions(+), 6 deletions(-) rename src/vibe_agent/tests/{test_eywa_asset.py => test_asset_vibe.py} (93%) diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh index 46297a55..170f9f2f 100755 --- a/.devcontainer/post-create.sh +++ b/.devcontainer/post-create.sh @@ -1,4 +1,7 @@ #!/bin/sh +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + DOCKER_VERSION=24.0.2 VSCODE_HOME=/home/vscode diff --git a/ops/admag/admag_seasonal_field_op.py b/ops/admag/admag_seasonal_field_op.py index 12c98100..e2513fc8 100644 --- a/ops/admag/admag_seasonal_field_op.py +++ b/ops/admag/admag_seasonal_field_op.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from typing import Any, Dict, Tuple diff --git a/ops/admag/get_prescription.py b/ops/admag/get_prescription.py index 4b4c9101..5bf9ff41 100644 --- a/ops/admag/get_prescription.py +++ b/ops/admag/get_prescription.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Dict from vibe_core.admag_client import ADMAgClient diff --git a/ops/admag/list_prescriptions.py b/ops/admag/list_prescriptions.py index d88ed8f0..cb89004f 100644 --- a/ops/admag/list_prescriptions.py +++ b/ops/admag/list_prescriptions.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, Dict, List, Tuple from vibe_core.admag_client import ADMAgClient diff --git a/ops/admag/prescriptions.py b/ops/admag/prescriptions.py index 06de40a3..2917d62b 100644 --- a/ops/admag/prescriptions.py +++ b/ops/admag/prescriptions.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from tempfile import TemporaryDirectory from typing import Any, Dict, List, Tuple diff --git a/ops/admag/test_admag.py b/ops/admag/test_admag.py index f21357a1..770e3bfa 100644 --- a/ops/admag/test_admag.py +++ b/ops/admag/test_admag.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import copy import json import os diff --git a/ops/aggregate_statistics_timeseries/aggregate_timeseries.py b/ops/aggregate_statistics_timeseries/aggregate_timeseries.py index d353fb3b..ef2e245b 100644 --- a/ops/aggregate_statistics_timeseries/aggregate_timeseries.py +++ b/ops/aggregate_statistics_timeseries/aggregate_timeseries.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Dict, List, cast diff --git a/ops/carbon_local/test_whatif.py b/ops/carbon_local/test_whatif.py index edab312e..e8e45af3 100644 --- a/ops/carbon_local/test_whatif.py +++ b/ops/carbon_local/test_whatif.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from typing import List diff --git a/ops/carbon_local/whatif_comet_local.py b/ops/carbon_local/whatif_comet_local.py index 96546842..d6bfd337 100644 --- a/ops/carbon_local/whatif_comet_local.py +++ b/ops/carbon_local/whatif_comet_local.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import xml.etree.ElementTree as ET from datetime import datetime, timezone from typing import Any, Dict, List diff --git a/ops/chunk_raster/chunk_raster.py b/ops/chunk_raster/chunk_raster.py index a25edcc2..9c51873e 100644 --- a/ops/chunk_raster/chunk_raster.py +++ b/ops/chunk_raster/chunk_raster.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from typing import Any, Dict, List, Tuple, Union diff --git a/ops/clip_raster/clip_raster.py b/ops/clip_raster/clip_raster.py index 9f083dee..c9762a18 100644 --- a/ops/clip_raster/clip_raster.py +++ b/ops/clip_raster/clip_raster.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import os from tempfile import TemporaryDirectory diff --git a/ops/combine_chunks/combine_chunks.py b/ops/combine_chunks/combine_chunks.py index c7ae7931..cf5955e4 100644 --- a/ops/combine_chunks/combine_chunks.py +++ b/ops/combine_chunks/combine_chunks.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib import logging import mimetypes diff --git a/ops/compute_cloud_prob/compute_cloud_prob.py b/ops/compute_cloud_prob/compute_cloud_prob.py index d3ab8465..ff4a6fe8 100644 --- a/ops/compute_cloud_prob/compute_cloud_prob.py +++ b/ops/compute_cloud_prob/compute_cloud_prob.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Any, Dict diff --git a/ops/compute_cloud_water_mask/compute_cloud_water_mask.py b/ops/compute_cloud_water_mask/compute_cloud_water_mask.py index 7df295b8..f687e000 100644 --- a/ops/compute_cloud_water_mask/compute_cloud_water_mask.py +++ b/ops/compute_cloud_water_mask/compute_cloud_water_mask.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Dict diff --git a/ops/compute_conservation_practice/compute_conservation_practice.py b/ops/compute_conservation_practice/compute_conservation_practice.py index 0f6e6352..4865f11e 100644 --- a/ops/compute_conservation_practice/compute_conservation_practice.py +++ b/ops/compute_conservation_practice/compute_conservation_practice.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Any, Dict diff --git a/ops/compute_evaporative_fraction/compute_evaporative_fraction.py b/ops/compute_evaporative_fraction/compute_evaporative_fraction.py index 0d74d261..7adb7f70 100644 --- a/ops/compute_evaporative_fraction/compute_evaporative_fraction.py +++ b/ops/compute_evaporative_fraction/compute_evaporative_fraction.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Any, Dict, cast diff --git a/ops/compute_fcover/fcover.py b/ops/compute_fcover/fcover.py index 7618b018..9766a9a3 100644 --- a/ops/compute_fcover/fcover.py +++ b/ops/compute_fcover/fcover.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """ FCOVER computation using a neural network as described in https://step.esa.int/docs/extra/ATBD_S2ToolBox_L2B_V1.1.pdf diff --git a/ops/compute_ghg_fluxes/compute_ghg_fluxes.py b/ops/compute_ghg_fluxes/compute_ghg_fluxes.py index 53b704ef..137665a4 100644 --- a/ops/compute_ghg_fluxes/compute_ghg_fluxes.py +++ b/ops/compute_ghg_fluxes/compute_ghg_fluxes.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from copy import copy from dataclasses import asdict, dataclass from enum import Enum, IntEnum, auto diff --git a/ops/compute_ghg_fluxes/test_ghg_fluxes.py b/ops/compute_ghg_fluxes/test_ghg_fluxes.py index 4c7bee80..73264f64 100644 --- a/ops/compute_ghg_fluxes/test_ghg_fluxes.py +++ b/ops/compute_ghg_fluxes/test_ghg_fluxes.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from math import isclose diff --git a/ops/compute_illuminance/compute_illuminance.py b/ops/compute_illuminance/compute_illuminance.py index 90b14eb0..e36a78a5 100644 --- a/ops/compute_illuminance/compute_illuminance.py +++ b/ops/compute_illuminance/compute_illuminance.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + # pyright: reportUnknownMemberType=false from typing import Dict, List, Union, cast diff --git a/ops/compute_index/index.py b/ops/compute_index/index.py index f6eefda4..f9851bac 100644 --- a/ops/compute_index/index.py +++ b/ops/compute_index/index.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from collections import defaultdict from tempfile import TemporaryDirectory from typing import Any, Callable, Dict, List, cast diff --git a/ops/compute_index/test_index.py b/ops/compute_index/test_index.py index 114daa98..30eb4c72 100644 --- a/ops/compute_index/test_index.py +++ b/ops/compute_index/test_index.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from tempfile import TemporaryDirectory diff --git a/ops/compute_irrigation_probability/compute_irrigation_probability.py b/ops/compute_irrigation_probability/compute_irrigation_probability.py index b13fb3e2..04f0f9fb 100644 --- a/ops/compute_irrigation_probability/compute_irrigation_probability.py +++ b/ops/compute_irrigation_probability/compute_irrigation_probability.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Dict diff --git a/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.py b/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.py index c2a529d6..ad7d2c38 100644 --- a/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.py +++ b/ops/compute_ngi_egi_layers/compute_ngi_egi_layers.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Dict diff --git a/ops/compute_onnx/compute_onnx.py b/ops/compute_onnx/compute_onnx.py index e4294a47..25d4fe66 100644 --- a/ops/compute_onnx/compute_onnx.py +++ b/ops/compute_onnx/compute_onnx.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Dict, List, Optional, Union diff --git a/ops/compute_onnx/test_compute_onnx.py b/ops/compute_onnx/test_compute_onnx.py index eeb173b9..148571f7 100644 --- a/ops/compute_onnx/test_compute_onnx.py +++ b/ops/compute_onnx/test_compute_onnx.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from datetime import datetime, timedelta diff --git a/ops/compute_onnx/test_compute_onnx_chunk.py b/ops/compute_onnx/test_compute_onnx_chunk.py index 8ec5922b..f487073c 100644 --- a/ops/compute_onnx/test_compute_onnx_chunk.py +++ b/ops/compute_onnx/test_compute_onnx_chunk.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timedelta from pathlib import Path diff --git a/ops/compute_pixel_count/compute_pixel_count.py b/ops/compute_pixel_count/compute_pixel_count.py index 110fbe79..a6910254 100644 --- a/ops/compute_pixel_count/compute_pixel_count.py +++ b/ops/compute_pixel_count/compute_pixel_count.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Any, Dict diff --git a/ops/compute_pixel_count/test_compute_pixel_count.py b/ops/compute_pixel_count/test_compute_pixel_count.py index 138a00c0..5016dc5f 100644 --- a/ops/compute_pixel_count/test_compute_pixel_count.py +++ b/ops/compute_pixel_count/test_compute_pixel_count.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from typing import cast diff --git a/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.py b/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.py index 3ef7c532..e83fdacd 100644 --- a/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.py +++ b/ops/compute_raster_class_windowed_average/compute_raster_class_windowed_average.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from tempfile import TemporaryDirectory from typing import Any, Dict, List diff --git a/ops/compute_raster_cluster/compute_raster_cluster.py b/ops/compute_raster_cluster/compute_raster_cluster.py index 9dfbe2ab..1537d990 100644 --- a/ops/compute_raster_cluster/compute_raster_cluster.py +++ b/ops/compute_raster_cluster/compute_raster_cluster.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from tempfile import TemporaryDirectory from typing import Any, Dict diff --git a/ops/compute_raster_gradient/compute_raster_gradient.py b/ops/compute_raster_gradient/compute_raster_gradient.py index e6fc94cf..d1e35255 100644 --- a/ops/compute_raster_gradient/compute_raster_gradient.py +++ b/ops/compute_raster_gradient/compute_raster_gradient.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from tempfile import TemporaryDirectory diff --git a/ops/compute_shadow_prob/compute_shadow_prob.py b/ops/compute_shadow_prob/compute_shadow_prob.py index 6a6880eb..3576693b 100644 --- a/ops/compute_shadow_prob/compute_shadow_prob.py +++ b/ops/compute_shadow_prob/compute_shadow_prob.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Any, Dict diff --git a/ops/create_raster_sequence/create_raster_sequence.py b/ops/create_raster_sequence/create_raster_sequence.py index 5042c754..14d681ea 100644 --- a/ops/create_raster_sequence/create_raster_sequence.py +++ b/ops/create_raster_sequence/create_raster_sequence.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from typing import Any, Dict, List, Tuple, Union diff --git a/ops/datavibe_filter/datavibe_filter.py b/ops/datavibe_filter/datavibe_filter.py index 84d0d0ba..96bdaa44 100644 --- a/ops/datavibe_filter/datavibe_filter.py +++ b/ops/datavibe_filter/datavibe_filter.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from functools import partial from typing import Dict diff --git a/ops/detect_driveway/detect_driveway.py b/ops/detect_driveway/detect_driveway.py index db1e69ce..cb56cf65 100644 --- a/ops/detect_driveway/detect_driveway.py +++ b/ops/detect_driveway/detect_driveway.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Any, Dict, List, Optional, Tuple, cast diff --git a/ops/detect_outliers/detect_outliers.py b/ops/detect_outliers/detect_outliers.py index 68f2d176..c35bffa5 100644 --- a/ops/detect_outliers/detect_outliers.py +++ b/ops/detect_outliers/detect_outliers.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from tempfile import TemporaryDirectory from typing import Any, Callable, Dict, List, Sequence, Tuple, Union, cast diff --git a/ops/download_airbus/download_airbus.py b/ops/download_airbus/download_airbus.py index 9ed1d39d..f7bc601a 100644 --- a/ops/download_airbus/download_airbus.py +++ b/ops/download_airbus/download_airbus.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import re from datetime import datetime from tempfile import TemporaryDirectory diff --git a/ops/download_alos/download_alos.py b/ops/download_alos/download_alos.py index a1fe00d9..66227133 100644 --- a/ops/download_alos/download_alos.py +++ b/ops/download_alos/download_alos.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Dict diff --git a/ops/download_alos/test_download_alos.py b/ops/download_alos/test_download_alos.py index 540eb555..3ae53dc8 100644 --- a/ops/download_alos/test_download_alos.py +++ b/ops/download_alos/test_download_alos.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from typing import cast diff --git a/ops/download_ambient_weather/download_ambient_weather.py b/ops/download_ambient_weather/download_ambient_weather.py index c55e3321..8353af4a 100644 --- a/ops/download_ambient_weather/download_ambient_weather.py +++ b/ops/download_ambient_weather/download_ambient_weather.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import mimetypes import os diff --git a/ops/download_bing_basemap/download_bing_basemap.py b/ops/download_bing_basemap/download_bing_basemap.py index acfc7bd1..ac90533f 100644 --- a/ops/download_bing_basemap/download_bing_basemap.py +++ b/ops/download_bing_basemap/download_bing_basemap.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib import os from tempfile import TemporaryDirectory diff --git a/ops/download_bing_basemap/test_download_bing_basemap.py b/ops/download_bing_basemap/test_download_bing_basemap.py index 865e98b8..9c506b59 100644 --- a/ops/download_bing_basemap/test_download_bing_basemap.py +++ b/ops/download_bing_basemap/test_download_bing_basemap.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from unittest.mock import MagicMock, patch diff --git a/ops/download_cdl_data/download_cdl.py b/ops/download_cdl_data/download_cdl.py index 6085c660..02d1e1da 100644 --- a/ops/download_cdl_data/download_cdl.py +++ b/ops/download_cdl_data/download_cdl.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from tempfile import TemporaryDirectory diff --git a/ops/download_cdl_data/download_cdl_data.py b/ops/download_cdl_data/download_cdl_data.py index 8c6eb7bc..1bf4c98f 100644 --- a/ops/download_cdl_data/download_cdl_data.py +++ b/ops/download_cdl_data/download_cdl_data.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os import xml.etree.ElementTree as ET diff --git a/ops/download_chirps/download_chirps.py b/ops/download_chirps/download_chirps.py index 30f65c09..37369df0 100644 --- a/ops/download_chirps/download_chirps.py +++ b/ops/download_chirps/download_chirps.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import os import re diff --git a/ops/download_climatology_lab/download_climatology_lab.py b/ops/download_climatology_lab/download_climatology_lab.py index 16181ca1..6bfcf227 100644 --- a/ops/download_climatology_lab/download_climatology_lab.py +++ b/ops/download_climatology_lab/download_climatology_lab.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from tempfile import TemporaryDirectory diff --git a/ops/download_climatology_lab/test_download_climatology_lab.py b/ops/download_climatology_lab/test_download_climatology_lab.py index 705965e2..6d8b9a02 100644 --- a/ops/download_climatology_lab/test_download_climatology_lab.py +++ b/ops/download_climatology_lab/test_download_climatology_lab.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from unittest.mock import MagicMock, patch diff --git a/ops/download_dem/download_dem.py b/ops/download_dem/download_dem.py index 64905300..e37990e0 100644 --- a/ops/download_dem/download_dem.py +++ b/ops/download_dem/download_dem.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from tempfile import TemporaryDirectory diff --git a/ops/download_dem/test_download_dem.py b/ops/download_dem/test_download_dem.py index 64893a18..6663847c 100644 --- a/ops/download_dem/test_download_dem.py +++ b/ops/download_dem/test_download_dem.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from unittest.mock import MagicMock, patch diff --git a/ops/download_era5/download_era5.py b/ops/download_era5/download_era5.py index 7460e11c..75d46395 100644 --- a/ops/download_era5/download_era5.py +++ b/ops/download_era5/download_era5.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import mimetypes import os diff --git a/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.py b/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.py index 230b0fc7..29f958bd 100644 --- a/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.py +++ b/ops/download_esri_landuse_landcover/download_esri_landuse_landcover.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from tempfile import TemporaryDirectory diff --git a/ops/download_esri_landuse_landcover/test_download_esri_landuse_landcover.py b/ops/download_esri_landuse_landcover/test_download_esri_landuse_landcover.py index 950cad34..9068a0cf 100644 --- a/ops/download_esri_landuse_landcover/test_download_esri_landuse_landcover.py +++ b/ops/download_esri_landuse_landcover/test_download_esri_landuse_landcover.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from unittest.mock import MagicMock, patch diff --git a/ops/download_from_ref/download_from_ref.py b/ops/download_from_ref/download_from_ref.py index b9af2ac9..d63cae43 100644 --- a/ops/download_from_ref/download_from_ref.py +++ b/ops/download_from_ref/download_from_ref.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib import mimetypes import os diff --git a/ops/download_from_smb/download_rasters_from_smb.py b/ops/download_from_smb/download_rasters_from_smb.py index 4a0fe263..12ede0b0 100644 --- a/ops/download_from_smb/download_rasters_from_smb.py +++ b/ops/download_from_smb/download_rasters_from_smb.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes from pathlib import Path from tempfile import TemporaryDirectory diff --git a/ops/download_gedi_product/download_gedi_product.py b/ops/download_gedi_product/download_gedi_product.py index 1359a652..6e8eead8 100644 --- a/ops/download_gedi_product/download_gedi_product.py +++ b/ops/download_gedi_product/download_gedi_product.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import os from tempfile import TemporaryDirectory diff --git a/ops/download_gedi_product/test_download_gedi_product.py b/ops/download_gedi_product/test_download_gedi_product.py index 69c070d9..73a371bc 100644 --- a/ops/download_gedi_product/test_download_gedi_product.py +++ b/ops/download_gedi_product/test_download_gedi_product.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from typing import Any, cast diff --git a/ops/download_glad_data/download_glad.py b/ops/download_glad_data/download_glad.py index 3cd2f2b0..e567d445 100644 --- a/ops/download_glad_data/download_glad.py +++ b/ops/download_glad_data/download_glad.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from tempfile import TemporaryDirectory diff --git a/ops/download_glad_data/test_download_glad_product.py b/ops/download_glad_data/test_download_glad_product.py index 34c1dfc8..dcb6211a 100644 --- a/ops/download_glad_data/test_download_glad_product.py +++ b/ops/download_glad_data/test_download_glad_product.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from typing import cast diff --git a/ops/download_gnatsgo/download_gnatsgo_raster.py b/ops/download_gnatsgo/download_gnatsgo_raster.py index 4663e874..bb25eefc 100644 --- a/ops/download_gnatsgo/download_gnatsgo_raster.py +++ b/ops/download_gnatsgo/download_gnatsgo_raster.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Dict, Optional diff --git a/ops/download_gnatsgo/test_download_gnatsgo.py b/ops/download_gnatsgo/test_download_gnatsgo.py index 5e621e7f..d36c8422 100644 --- a/ops/download_gnatsgo/test_download_gnatsgo.py +++ b/ops/download_gnatsgo/test_download_gnatsgo.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from unittest.mock import MagicMock, patch diff --git a/ops/download_hansen/download_hansen.py b/ops/download_hansen/download_hansen.py index 9a6e43b0..058e8199 100644 --- a/ops/download_hansen/download_hansen.py +++ b/ops/download_hansen/download_hansen.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from tempfile import TemporaryDirectory diff --git a/ops/download_hansen/test_download_hansen_product.py b/ops/download_hansen/test_download_hansen_product.py index 8ced9f4f..20ecc5fa 100644 --- a/ops/download_hansen/test_download_hansen_product.py +++ b/ops/download_hansen/test_download_hansen_product.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from typing import List, cast diff --git a/ops/download_herbie/download_herbie.py b/ops/download_herbie/download_herbie.py index ebf4e567..0bc06504 100644 --- a/ops/download_herbie/download_herbie.py +++ b/ops/download_herbie/download_herbie.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import timedelta from tempfile import TemporaryDirectory from typing import Dict, Optional diff --git a/ops/download_herbie/forecast_range_split.py b/ops/download_herbie/forecast_range_split.py index b152f106..2d1df602 100644 --- a/ops/download_herbie/forecast_range_split.py +++ b/ops/download_herbie/forecast_range_split.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Dict, List import pandas as pd diff --git a/ops/download_herbie/forecast_weather.py b/ops/download_herbie/forecast_weather.py index 20e6282c..219c3b50 100644 --- a/ops/download_herbie/forecast_weather.py +++ b/ops/download_herbie/forecast_weather.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import shutil import tempfile diff --git a/ops/download_landsat_from_pc/download_landsat_pc.py b/ops/download_landsat_from_pc/download_landsat_pc.py index 3778b948..d5e7feea 100644 --- a/ops/download_landsat_from_pc/download_landsat_pc.py +++ b/ops/download_landsat_from_pc/download_landsat_pc.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from tempfile import TemporaryDirectory from typing import Dict, Optional diff --git a/ops/download_modis_sr/download_modis_sr.py b/ops/download_modis_sr/download_modis_sr.py index c1b328d7..bc2d9c7e 100644 --- a/ops/download_modis_sr/download_modis_sr.py +++ b/ops/download_modis_sr/download_modis_sr.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from tempfile import TemporaryDirectory from typing import Dict, Optional diff --git a/ops/download_modis_vegetation/download_modis_vegetation.py b/ops/download_modis_vegetation/download_modis_vegetation.py index f6afa9fe..e4b7f7ec 100644 --- a/ops/download_modis_vegetation/download_modis_vegetation.py +++ b/ops/download_modis_vegetation/download_modis_vegetation.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from tempfile import TemporaryDirectory from typing import Dict, Optional diff --git a/ops/download_modis_vegetation/test_download_modis_vegetation.py b/ops/download_modis_vegetation/test_download_modis_vegetation.py index 8c1079ff..46508c0a 100644 --- a/ops/download_modis_vegetation/test_download_modis_vegetation.py +++ b/ops/download_modis_vegetation/test_download_modis_vegetation.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from unittest.mock import MagicMock, patch diff --git a/ops/download_naip/download_naip.py b/ops/download_naip/download_naip.py index 23682e75..b14c3e29 100644 --- a/ops/download_naip/download_naip.py +++ b/ops/download_naip/download_naip.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from tempfile import TemporaryDirectory diff --git a/ops/download_naip/test_download_naip.py b/ops/download_naip/test_download_naip.py index 49f7931e..48131738 100644 --- a/ops/download_naip/test_download_naip.py +++ b/ops/download_naip/test_download_naip.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from unittest.mock import MagicMock, patch diff --git a/ops/download_road_geometries/download_road_geometries.py b/ops/download_road_geometries/download_road_geometries.py index 0c30c766..a1142c31 100644 --- a/ops/download_road_geometries/download_road_geometries.py +++ b/ops/download_road_geometries/download_road_geometries.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Dict, cast diff --git a/ops/download_sentinel1/download_sentinel1_rtc.py b/ops/download_sentinel1/download_sentinel1_rtc.py index d5dea014..46c351fd 100644 --- a/ops/download_sentinel1/download_sentinel1_rtc.py +++ b/ops/download_sentinel1/download_sentinel1_rtc.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import os from concurrent.futures import TimeoutError diff --git a/ops/download_sentinel1/test_download_sentinel1_rtc.py b/ops/download_sentinel1/test_download_sentinel1_rtc.py index 2e99bd64..5648e94e 100644 --- a/ops/download_sentinel1/test_download_sentinel1_rtc.py +++ b/ops/download_sentinel1/test_download_sentinel1_rtc.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from pathlib import Path diff --git a/ops/download_sentinel1_grd/download_sentinel1_grd.py b/ops/download_sentinel1_grd/download_sentinel1_grd.py index 6e06c65e..66f16fa1 100644 --- a/ops/download_sentinel1_grd/download_sentinel1_grd.py +++ b/ops/download_sentinel1_grd/download_sentinel1_grd.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import os import shutil diff --git a/ops/download_sentinel1_grd/test_download_sentinel1.py b/ops/download_sentinel1_grd/test_download_sentinel1.py index e1d14290..798f36c1 100644 --- a/ops/download_sentinel1_grd/test_download_sentinel1.py +++ b/ops/download_sentinel1_grd/test_download_sentinel1.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from unittest.mock import Mock, patch diff --git a/ops/download_sentinel2_from_pc/download_s2_pc.py b/ops/download_sentinel2_from_pc/download_s2_pc.py index c327ffc0..f47fa23a 100644 --- a/ops/download_sentinel2_from_pc/download_s2_pc.py +++ b/ops/download_sentinel2_from_pc/download_s2_pc.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import os import re diff --git a/ops/download_soilgrids/download_soilgrids.py b/ops/download_soilgrids/download_soilgrids.py index c4a7adf2..db48457d 100644 --- a/ops/download_soilgrids/download_soilgrids.py +++ b/ops/download_soilgrids/download_soilgrids.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import mimetypes import os diff --git a/ops/download_stack_sentinel2/download_stack_s2.py b/ops/download_stack_sentinel2/download_stack_s2.py index 381790fa..47819cde 100644 --- a/ops/download_stack_sentinel2/download_stack_s2.py +++ b/ops/download_stack_sentinel2/download_stack_s2.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib import logging import os diff --git a/ops/download_usda_soils/download_usda_soils.py b/ops/download_usda_soils/download_usda_soils.py index d31c8bba..9c52bc02 100644 --- a/ops/download_usda_soils/download_usda_soils.py +++ b/ops/download_usda_soils/download_usda_soils.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import zipfile from datetime import datetime diff --git a/ops/ensemble_cloud_prob/ensemble_cloud_prob.py b/ops/ensemble_cloud_prob/ensemble_cloud_prob.py index 50367e97..86529f16 100644 --- a/ops/ensemble_cloud_prob/ensemble_cloud_prob.py +++ b/ops/ensemble_cloud_prob/ensemble_cloud_prob.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from tempfile import TemporaryDirectory from typing import Dict diff --git a/ops/estimate_canopy_cover/estimate_canopy.py b/ops/estimate_canopy_cover/estimate_canopy.py index 504a40cc..39aae687 100644 --- a/ops/estimate_canopy_cover/estimate_canopy.py +++ b/ops/estimate_canopy_cover/estimate_canopy.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from tempfile import TemporaryDirectory from typing import Any, Dict, List diff --git a/ops/extract_gedi_rh100/extract_gedi_rh100.py b/ops/extract_gedi_rh100/extract_gedi_rh100.py index af65d4b8..1da1b506 100644 --- a/ops/extract_gedi_rh100/extract_gedi_rh100.py +++ b/ops/extract_gedi_rh100/extract_gedi_rh100.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import os from collections import defaultdict diff --git a/ops/extract_gedi_rh100/test_extract_gedi_rh100.py b/ops/extract_gedi_rh100/test_extract_gedi_rh100.py index fba0cbda..8af3add8 100644 --- a/ops/extract_gedi_rh100/test_extract_gedi_rh100.py +++ b/ops/extract_gedi_rh100/test_extract_gedi_rh100.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from pathlib import Path diff --git a/ops/extract_protein_sequence/extract_protein_sequence.py b/ops/extract_protein_sequence/extract_protein_sequence.py index bf0418c0..41a4ebb8 100644 --- a/ops/extract_protein_sequence/extract_protein_sequence.py +++ b/ops/extract_protein_sequence/extract_protein_sequence.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from tempfile import TemporaryDirectory diff --git a/ops/get_angles/get_angles.py b/ops/get_angles/get_angles.py index 68470285..78709d39 100644 --- a/ops/get_angles/get_angles.py +++ b/ops/get_angles/get_angles.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import io import mimetypes import os diff --git a/ops/gfs_download/gfs_download.py b/ops/gfs_download/gfs_download.py index 953f53ae..c228a027 100644 --- a/ops/gfs_download/gfs_download.py +++ b/ops/gfs_download/gfs_download.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import os from datetime import datetime diff --git a/ops/gfs_preprocess/gfs_preprocess.py b/ops/gfs_preprocess/gfs_preprocess.py index 71838008..d1e9904b 100644 --- a/ops/gfs_preprocess/gfs_preprocess.py +++ b/ops/gfs_preprocess/gfs_preprocess.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from datetime import datetime, time, timedelta, timezone from typing import Any, Dict, List, Tuple, Union diff --git a/ops/group_rasters_by_geometries/group_rasters_by_geometries.py b/ops/group_rasters_by_geometries/group_rasters_by_geometries.py index b36a5045..1337e3d7 100644 --- a/ops/group_rasters_by_geometries/group_rasters_by_geometries.py +++ b/ops/group_rasters_by_geometries/group_rasters_by_geometries.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from functools import partial from typing import Dict, List diff --git a/ops/group_rasters_by_time/group_rasters_by_time.py b/ops/group_rasters_by_time/group_rasters_by_time.py index 2e237cdb..ee0da933 100644 --- a/ops/group_rasters_by_time/group_rasters_by_time.py +++ b/ops/group_rasters_by_time/group_rasters_by_time.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from functools import partial from itertools import groupby from typing import Dict, List diff --git a/ops/group_rasters_by_time/test_group_rasters_by_time.py b/ops/group_rasters_by_time/test_group_rasters_by_time.py index a380b9f2..57cda8d2 100644 --- a/ops/group_rasters_by_time/test_group_rasters_by_time.py +++ b/ops/group_rasters_by_time/test_group_rasters_by_time.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from datetime import datetime, timedelta diff --git a/ops/group_sentinel1_orbits/group_sentinel1_orbits.py b/ops/group_sentinel1_orbits/group_sentinel1_orbits.py index 61b74910..735d1021 100644 --- a/ops/group_sentinel1_orbits/group_sentinel1_orbits.py +++ b/ops/group_sentinel1_orbits/group_sentinel1_orbits.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from collections import defaultdict from typing import Dict, List, Tuple diff --git a/ops/group_sentinel2_orbits/group_sentinel2_orbits.py b/ops/group_sentinel2_orbits/group_sentinel2_orbits.py index 1275c86f..0f136154 100644 --- a/ops/group_sentinel2_orbits/group_sentinel2_orbits.py +++ b/ops/group_sentinel2_orbits/group_sentinel2_orbits.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from collections import defaultdict from typing import Dict, List, Tuple, Union, cast diff --git a/ops/group_tile_sequence/group_tile_sequence.py b/ops/group_tile_sequence/group_tile_sequence.py index d0fc8bb2..ca59ea41 100644 --- a/ops/group_tile_sequence/group_tile_sequence.py +++ b/ops/group_tile_sequence/group_tile_sequence.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib import logging from collections import defaultdict diff --git a/ops/heatmap_sensor/soil_sample_heatmap_using_classification.py b/ops/heatmap_sensor/soil_sample_heatmap_using_classification.py index 3d15e4a6..794b4788 100644 --- a/ops/heatmap_sensor/soil_sample_heatmap_using_classification.py +++ b/ops/heatmap_sensor/soil_sample_heatmap_using_classification.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from tempfile import TemporaryDirectory from typing import Any, Dict, Optional, cast diff --git a/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.py b/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.py index 59ba93f7..65d7ea6d 100644 --- a/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.py +++ b/ops/heatmap_sensor/soil_sample_heatmap_using_neighbors.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Any, Dict, List, Tuple, cast diff --git a/ops/heatmap_sensor/test_soil_cluster_sample_heatmap_using_neighbors.py b/ops/heatmap_sensor/test_soil_cluster_sample_heatmap_using_neighbors.py index 62b54d0a..3defa642 100644 --- a/ops/heatmap_sensor/test_soil_cluster_sample_heatmap_using_neighbors.py +++ b/ops/heatmap_sensor/test_soil_cluster_sample_heatmap_using_neighbors.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import time from datetime import datetime diff --git a/ops/heatmap_sensor/test_soil_sample_heatmap_using_classification.py b/ops/heatmap_sensor/test_soil_sample_heatmap_using_classification.py index 5349c30e..cbeb89f0 100644 --- a/ops/heatmap_sensor/test_soil_sample_heatmap_using_classification.py +++ b/ops/heatmap_sensor/test_soil_sample_heatmap_using_classification.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from typing import Any, Dict, Union, cast diff --git a/ops/helloworld/helloworld.py b/ops/helloworld/helloworld.py index c9e33e50..d4ea8aa1 100644 --- a/ops/helloworld/helloworld.py +++ b/ops/helloworld/helloworld.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Dict, List, Union diff --git a/ops/linear_trend/linear_trend.py b/ops/linear_trend/linear_trend.py index 2390ecde..2ff2dd34 100644 --- a/ops/linear_trend/linear_trend.py +++ b/ops/linear_trend/linear_trend.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from tempfile import TemporaryDirectory from typing import Dict, List, Tuple diff --git a/ops/linear_trend/test_linear_trend.py b/ops/linear_trend/test_linear_trend.py index 544d09d9..c38db869 100644 --- a/ops/linear_trend/test_linear_trend.py +++ b/ops/linear_trend/test_linear_trend.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import numpy as np import xarray as xr from linear_trend import fit_model_in_bulk diff --git a/ops/list_airbus_products/list_airbus.py b/ops/list_airbus_products/list_airbus.py index 5daa1a7c..b5f4a373 100644 --- a/ops/list_airbus_products/list_airbus.py +++ b/ops/list_airbus_products/list_airbus.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from tempfile import TemporaryDirectory from typing import Any, Dict, List diff --git a/ops/list_alos_products/list_alos_products.py b/ops/list_alos_products/list_alos_products.py index afc09a97..436aa871 100644 --- a/ops/list_alos_products/list_alos_products.py +++ b/ops/list_alos_products/list_alos_products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, Dict, List, cast from dateutil.parser import parse diff --git a/ops/list_alos_products/test_alos_list.py b/ops/list_alos_products/test_alos_list.py index 9596a898..fefb920b 100644 --- a/ops/list_alos_products/test_alos_list.py +++ b/ops/list_alos_products/test_alos_list.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from typing import Any, Dict, Tuple diff --git a/ops/list_bing_maps/list_bing_maps.py b/ops/list_bing_maps/list_bing_maps.py index 766e6090..d37da255 100644 --- a/ops/list_bing_maps/list_bing_maps.py +++ b/ops/list_bing_maps/list_bing_maps.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from datetime import datetime from typing import Dict, List, Optional diff --git a/ops/list_bing_maps/test_list_bing_maps.py b/ops/list_bing_maps/test_list_bing_maps.py index 86cd9bcc..4c0699af 100644 --- a/ops/list_bing_maps/test_list_bing_maps.py +++ b/ops/list_bing_maps/test_list_bing_maps.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from typing import List, Optional, cast diff --git a/ops/list_cdl_products/list_cdl_products.py b/ops/list_cdl_products/list_cdl_products.py index 59caf831..d8b18d3c 100644 --- a/ops/list_cdl_products/list_cdl_products.py +++ b/ops/list_cdl_products/list_cdl_products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + # This op receives a date range and geometry and list the respective CDL products from datetime import datetime from typing import Dict, List diff --git a/ops/list_chirps/list_chirps.py b/ops/list_chirps/list_chirps.py index abb41701..53c0c98e 100644 --- a/ops/list_chirps/list_chirps.py +++ b/ops/list_chirps/list_chirps.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from calendar import monthrange from datetime import datetime, timedelta, timezone diff --git a/ops/list_climatology_lab/list_climatology_lab.py b/ops/list_climatology_lab/list_climatology_lab.py index 7c87cea3..d1cd5d72 100644 --- a/ops/list_climatology_lab/list_climatology_lab.py +++ b/ops/list_climatology_lab/list_climatology_lab.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from typing import Dict, List diff --git a/ops/list_climatology_lab/test_list_climatology_lab.py b/ops/list_climatology_lab/test_list_climatology_lab.py index 709d7f0f..fcf09677 100644 --- a/ops/list_climatology_lab/test_list_climatology_lab.py +++ b/ops/list_climatology_lab/test_list_climatology_lab.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from typing import List, cast diff --git a/ops/list_dem_products/list_dem_products.py b/ops/list_dem_products/list_dem_products.py index 2405e76c..d5020ecb 100644 --- a/ops/list_dem_products/list_dem_products.py +++ b/ops/list_dem_products/list_dem_products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + # This operator receives a region and obtains the digital elevation model # items associated with the input region. The collection 3dep-seamless # only covers CONUS (continental us) and contains tiles with distinct diff --git a/ops/list_dem_products/test_list_dem_products.py b/ops/list_dem_products/test_list_dem_products.py index 64d04bb2..bfb3b280 100644 --- a/ops/list_dem_products/test_list_dem_products.py +++ b/ops/list_dem_products/test_list_dem_products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from typing import List, cast diff --git a/ops/list_era5/list_era5.py b/ops/list_era5/list_era5.py index 6535efb5..7d4972d8 100644 --- a/ops/list_era5/list_era5.py +++ b/ops/list_era5/list_era5.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from functools import partial from typing import Any, Dict, List diff --git a/ops/list_era5/list_era5_cds.py b/ops/list_era5/list_era5_cds.py index 59e21e07..5a6d0fb9 100644 --- a/ops/list_era5/list_era5_cds.py +++ b/ops/list_era5/list_era5_cds.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from datetime import datetime from functools import partial diff --git a/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.py b/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.py index 2a06cf7a..d66c81d7 100644 --- a/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.py +++ b/ops/list_esri_landuse_landcover/list_esri_landuse_landcover.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, Dict, List, cast from dateutil.parser import isoparse diff --git a/ops/list_esri_landuse_landcover/test_list_esri_landuse_landcover.py b/ops/list_esri_landuse_landcover/test_list_esri_landuse_landcover.py index 240b7714..b56ba7a2 100644 --- a/ops/list_esri_landuse_landcover/test_list_esri_landuse_landcover.py +++ b/ops/list_esri_landuse_landcover/test_list_esri_landuse_landcover.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from typing import List, cast diff --git a/ops/list_gedi_products/list_gedi_products.py b/ops/list_gedi_products/list_gedi_products.py index dcc415a3..bf6ee014 100644 --- a/ops/list_gedi_products/list_gedi_products.py +++ b/ops/list_gedi_products/list_gedi_products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from typing import Any, Dict, List diff --git a/ops/list_gedi_products/test_list_gedi_products.py b/ops/list_gedi_products/test_list_gedi_products.py index fdaaa9ae..f73a56d7 100644 --- a/ops/list_gedi_products/test_list_gedi_products.py +++ b/ops/list_gedi_products/test_list_gedi_products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import json import os from datetime import datetime diff --git a/ops/list_glad_products/list_glad_products.py b/ops/list_glad_products/list_glad_products.py index 6820fd77..4f439f6b 100644 --- a/ops/list_glad_products/list_glad_products.py +++ b/ops/list_glad_products/list_glad_products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib import itertools from datetime import datetime diff --git a/ops/list_glad_products/test_glad_list.py b/ops/list_glad_products/test_glad_list.py index 4e98fdba..83567682 100644 --- a/ops/list_glad_products/test_glad_list.py +++ b/ops/list_glad_products/test_glad_list.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import itertools import os from datetime import datetime diff --git a/ops/list_gnatsgo_products/list_gnatsgo_products.py b/ops/list_gnatsgo_products/list_gnatsgo_products.py index a11c26d3..ff40cdb5 100644 --- a/ops/list_gnatsgo_products/list_gnatsgo_products.py +++ b/ops/list_gnatsgo_products/list_gnatsgo_products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Dict, List from pystac import Item diff --git a/ops/list_gnatsgo_products/test_list_gnatsgo_products.py b/ops/list_gnatsgo_products/test_list_gnatsgo_products.py index eb3e276c..e5ff53eb 100644 --- a/ops/list_gnatsgo_products/test_list_gnatsgo_products.py +++ b/ops/list_gnatsgo_products/test_list_gnatsgo_products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from typing import List, cast diff --git a/ops/list_hansen_products/list_hansen_products.py b/ops/list_hansen_products/list_hansen_products.py index 5bc296ad..e42fa62d 100644 --- a/ops/list_hansen_products/list_hansen_products.py +++ b/ops/list_hansen_products/list_hansen_products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from datetime import datetime from typing import Dict, List, cast diff --git a/ops/list_hansen_products/test_hansen_list.py b/ops/list_hansen_products/test_hansen_list.py index b7367616..c088135c 100644 --- a/ops/list_hansen_products/test_hansen_list.py +++ b/ops/list_hansen_products/test_hansen_list.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import itertools import os from datetime import datetime diff --git a/ops/list_herbie/list_herbie.py b/ops/list_herbie/list_herbie.py index 4509a70e..b05f84e6 100644 --- a/ops/list_herbie/list_herbie.py +++ b/ops/list_herbie/list_herbie.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from datetime import datetime from typing import Dict, List, Optional diff --git a/ops/list_landsat_products_pc/list_landsat_pc.py b/ops/list_landsat_products_pc/list_landsat_pc.py index 40483448..a00c0568 100644 --- a/ops/list_landsat_products_pc/list_landsat_pc.py +++ b/ops/list_landsat_products_pc/list_landsat_pc.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, Dict, List from dateutil.parser import isoparse diff --git a/ops/list_modis_sr/list_modis_sr.py b/ops/list_modis_sr/list_modis_sr.py index 2611299f..299aad09 100644 --- a/ops/list_modis_sr/list_modis_sr.py +++ b/ops/list_modis_sr/list_modis_sr.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Dict, List from dateutil.parser import parse diff --git a/ops/list_modis_vegetation/list_modis_vegetation.py b/ops/list_modis_vegetation/list_modis_vegetation.py index 91c9ddf4..6c504f95 100644 --- a/ops/list_modis_vegetation/list_modis_vegetation.py +++ b/ops/list_modis_vegetation/list_modis_vegetation.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Dict, List from dateutil.parser import parse diff --git a/ops/list_modis_vegetation/test_list_modis_vegetation.py b/ops/list_modis_vegetation/test_list_modis_vegetation.py index 23bff6f4..1062023a 100644 --- a/ops/list_modis_vegetation/test_list_modis_vegetation.py +++ b/ops/list_modis_vegetation/test_list_modis_vegetation.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from unittest.mock import MagicMock, patch diff --git a/ops/list_naip_products/list_naip_products.py b/ops/list_naip_products/list_naip_products.py index ac22fea0..5125d7a9 100644 --- a/ops/list_naip_products/list_naip_products.py +++ b/ops/list_naip_products/list_naip_products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + # This operator receives a region and a date range and obtains the respective # NAIP items, returning a list of NaipProduct. from typing import Any, Dict, List, Tuple, cast diff --git a/ops/list_naip_products/test_list_naip_products.py b/ops/list_naip_products/test_list_naip_products.py index 3162638b..3f8764b5 100644 --- a/ops/list_naip_products/test_list_naip_products.py +++ b/ops/list_naip_products/test_list_naip_products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from typing import List, cast diff --git a/ops/list_sentinel1_products/list_sentinel1_products_pc.py b/ops/list_sentinel1_products/list_sentinel1_products_pc.py index d37b1ccb..70377f96 100644 --- a/ops/list_sentinel1_products/list_sentinel1_products_pc.py +++ b/ops/list_sentinel1_products/list_sentinel1_products_pc.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from typing import Dict, List diff --git a/ops/list_sentinel1_products/test_list_sentinel1.py b/ops/list_sentinel1_products/test_list_sentinel1.py index d0c2c9c8..c4f02cc7 100644 --- a/ops/list_sentinel1_products/test_list_sentinel1.py +++ b/ops/list_sentinel1_products/test_list_sentinel1.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import json import os from datetime import datetime, timezone diff --git a/ops/list_sentinel2_products/list_s2_pc.py b/ops/list_sentinel2_products/list_s2_pc.py index d4c7589c..78213abb 100644 --- a/ops/list_sentinel2_products/list_s2_pc.py +++ b/ops/list_sentinel2_products/list_s2_pc.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from concurrent.futures import ThreadPoolExecutor from typing import Dict, List diff --git a/ops/list_to_sequence/list_to_sequence.py b/ops/list_to_sequence/list_to_sequence.py index 845d924e..60753170 100644 --- a/ops/list_to_sequence/list_to_sequence.py +++ b/ops/list_to_sequence/list_to_sequence.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from datetime import datetime from typing import Any, Dict, List, Tuple diff --git a/ops/list_to_sequence/test_list_to_sequence.py b/ops/list_to_sequence/test_list_to_sequence.py index 80c5add7..64d9f1dc 100644 --- a/ops/list_to_sequence/test_list_to_sequence.py +++ b/ops/list_to_sequence/test_list_to_sequence.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from datetime import datetime, timezone diff --git a/ops/match_raster_to_ref/match_raster_to_ref.py b/ops/match_raster_to_ref/match_raster_to_ref.py index be7fc269..6fda0506 100644 --- a/ops/match_raster_to_ref/match_raster_to_ref.py +++ b/ops/match_raster_to_ref/match_raster_to_ref.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from tempfile import TemporaryDirectory from typing import Dict diff --git a/ops/merge_cloud_masks/merge_cloud_masks.py b/ops/merge_cloud_masks/merge_cloud_masks.py index d7715a7a..c4f79ebf 100644 --- a/ops/merge_cloud_masks/merge_cloud_masks.py +++ b/ops/merge_cloud_masks/merge_cloud_masks.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import datetime import gc import mimetypes diff --git a/ops/merge_cloud_masks/merge_cloud_masks_simple.py b/ops/merge_cloud_masks/merge_cloud_masks_simple.py index 6d01f25e..7ea8bc96 100644 --- a/ops/merge_cloud_masks/merge_cloud_masks_simple.py +++ b/ops/merge_cloud_masks/merge_cloud_masks_simple.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Any, Dict, List, Tuple diff --git a/ops/merge_geometries/merge_geometries.py b/ops/merge_geometries/merge_geometries.py index 09c88c29..ab32034f 100644 --- a/ops/merge_geometries/merge_geometries.py +++ b/ops/merge_geometries/merge_geometries.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from enum import auto from typing import Dict, List, TypeVar diff --git a/ops/merge_geometries/test_merge_geometries.py b/ops/merge_geometries/test_merge_geometries.py index 38319436..c8f44c0b 100644 --- a/ops/merge_geometries/test_merge_geometries.py +++ b/ops/merge_geometries/test_merge_geometries.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime diff --git a/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.py b/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.py index 2c37e06c..abc0361e 100644 --- a/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.py +++ b/ops/merge_geometry_and_time_range/merge_geometry_and_time_range.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from typing import Dict diff --git a/ops/merge_geometry_and_time_range/test_merge_geometry_and_time_range.py b/ops/merge_geometry_and_time_range/test_merge_geometry_and_time_range.py index d2551410..158b5453 100644 --- a/ops/merge_geometry_and_time_range/test_merge_geometry_and_time_range.py +++ b/ops/merge_geometry_and_time_range/test_merge_geometry_and_time_range.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime diff --git a/ops/merge_rasters/merge_rasters.py b/ops/merge_rasters/merge_rasters.py index 8ef352a0..0d385fa1 100644 --- a/ops/merge_rasters/merge_rasters.py +++ b/ops/merge_rasters/merge_rasters.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import mimetypes import os diff --git a/ops/merge_sentinel1_orbits/merge_sentinel1.py b/ops/merge_sentinel1_orbits/merge_sentinel1.py index bb350ef9..8d588b4e 100644 --- a/ops/merge_sentinel1_orbits/merge_sentinel1.py +++ b/ops/merge_sentinel1_orbits/merge_sentinel1.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Any, Dict, List, Tuple diff --git a/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.py b/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.py index 5e1194be..244771fc 100644 --- a/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.py +++ b/ops/merge_sentinel2_orbits/merge_sentinel2_orbits.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from tempfile import TemporaryDirectory diff --git a/ops/minimum_samples/find_soil_sample_locations.py b/ops/minimum_samples/find_soil_sample_locations.py index 9eb75586..ecad43a0 100644 --- a/ops/minimum_samples/find_soil_sample_locations.py +++ b/ops/minimum_samples/find_soil_sample_locations.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Any, Dict, List, Tuple, cast diff --git a/ops/minimum_samples/test_soil_sample_heatmap.py b/ops/minimum_samples/test_soil_sample_heatmap.py index 1e6f35fa..aec1323a 100644 --- a/ops/minimum_samples/test_soil_sample_heatmap.py +++ b/ops/minimum_samples/test_soil_sample_heatmap.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import time from datetime import datetime diff --git a/ops/ordinal_trend_test/ordinal_trend_test.py b/ops/ordinal_trend_test/ordinal_trend_test.py index fb15b94e..29b9c2dd 100644 --- a/ops/ordinal_trend_test/ordinal_trend_test.py +++ b/ops/ordinal_trend_test/ordinal_trend_test.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime as dt from tempfile import TemporaryDirectory diff --git a/ops/ordinal_trend_test/test_ordinal_trend.py b/ops/ordinal_trend_test/test_ordinal_trend.py index b00aba62..b7ea1658 100644 --- a/ops/ordinal_trend_test/test_ordinal_trend.py +++ b/ops/ordinal_trend_test/test_ordinal_trend.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from tempfile import TemporaryDirectory diff --git a/ops/pair_intersecting_rasters/pair_intersecting_rasters.py b/ops/pair_intersecting_rasters/pair_intersecting_rasters.py index 8d6ed1e2..b26291ec 100644 --- a/ops/pair_intersecting_rasters/pair_intersecting_rasters.py +++ b/ops/pair_intersecting_rasters/pair_intersecting_rasters.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Dict, List, Union from shapely import geometry as shpg diff --git a/ops/price_airbus_products/price_airbus.py b/ops/price_airbus_products/price_airbus.py index af8396b2..67ee4610 100644 --- a/ops/price_airbus_products/price_airbus.py +++ b/ops/price_airbus_products/price_airbus.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from tempfile import TemporaryDirectory from typing import Dict, List diff --git a/ops/protlearn/protlearn.py b/ops/protlearn/protlearn.py index 2d33561e..e4932873 100644 --- a/ops/protlearn/protlearn.py +++ b/ops/protlearn/protlearn.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from tempfile import TemporaryDirectory diff --git a/ops/read_grib_forecast/read_grib_forecast.py b/ops/read_grib_forecast/read_grib_forecast.py index 46e4ff1c..ca5595a9 100644 --- a/ops/read_grib_forecast/read_grib_forecast.py +++ b/ops/read_grib_forecast/read_grib_forecast.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os from tempfile import TemporaryDirectory diff --git a/ops/recode_raster/recode_raster.py b/ops/recode_raster/recode_raster.py index ae487a6f..522611b8 100644 --- a/ops/recode_raster/recode_raster.py +++ b/ops/recode_raster/recode_raster.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from tempfile import TemporaryDirectory from typing import Dict, List diff --git a/ops/recode_raster/test_recode_raster.py b/ops/recode_raster/test_recode_raster.py index 9e59beba..07645d6f 100644 --- a/ops/recode_raster/test_recode_raster.py +++ b/ops/recode_raster/test_recode_raster.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from tempfile import TemporaryDirectory diff --git a/ops/remove_clouds/remove_clouds.py b/ops/remove_clouds/remove_clouds.py index 88aa6a08..6813c48c 100644 --- a/ops/remove_clouds/remove_clouds.py +++ b/ops/remove_clouds/remove_clouds.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + # pyright: reportUnknownMemberType=false import logging import os diff --git a/ops/remove_clouds/test_remove_clouds.py b/ops/remove_clouds/test_remove_clouds.py index 13e67745..b66178c6 100644 --- a/ops/remove_clouds/test_remove_clouds.py +++ b/ops/remove_clouds/test_remove_clouds.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from typing import Any, Dict diff --git a/ops/segment_anything/sam_inference.py b/ops/segment_anything/sam_inference.py index f6f13020..2c749a4f 100644 --- a/ops/segment_anything/sam_inference.py +++ b/ops/segment_anything/sam_inference.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import os from tempfile import TemporaryDirectory diff --git a/ops/segment_anything/test_sam_inference.py b/ops/segment_anything/test_sam_inference.py index a4bdd1a4..d99a85b4 100644 --- a/ops/segment_anything/test_sam_inference.py +++ b/ops/segment_anything/test_sam_inference.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from tempfile import TemporaryDirectory diff --git a/ops/segment_anything_combine_masks/combine_sam_masks.py b/ops/segment_anything_combine_masks/combine_sam_masks.py index e457b98d..238ed20c 100644 --- a/ops/segment_anything_combine_masks/combine_sam_masks.py +++ b/ops/segment_anything_combine_masks/combine_sam_masks.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Dict, List, Tuple diff --git a/ops/segment_anything_combine_masks/test_combine_sam_masks.py b/ops/segment_anything_combine_masks/test_combine_sam_masks.py index febd5f62..43f76bb2 100644 --- a/ops/segment_anything_combine_masks/test_combine_sam_masks.py +++ b/ops/segment_anything_combine_masks/test_combine_sam_masks.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from tempfile import TemporaryDirectory diff --git a/ops/segment_driveway/segment_driveway.py b/ops/segment_driveway/segment_driveway.py index 0444b4f0..e5374a5d 100644 --- a/ops/segment_driveway/segment_driveway.py +++ b/ops/segment_driveway/segment_driveway.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Any, Callable, Dict, Tuple diff --git a/ops/select_necessary_coverage_items/filter_items.py b/ops/select_necessary_coverage_items/filter_items.py index e11e5541..44f4e722 100644 --- a/ops/select_necessary_coverage_items/filter_items.py +++ b/ops/select_necessary_coverage_items/filter_items.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """ Selects a (locally?) minimum subset of items that covers the desired input geometry (if suchs subset exists) for each timestamp. diff --git a/ops/select_necessary_coverage_items/test_filter.py b/ops/select_necessary_coverage_items/test_filter.py index 07bf5c2d..ca3d664c 100644 --- a/ops/select_necessary_coverage_items/test_filter.py +++ b/ops/select_necessary_coverage_items/test_filter.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from typing import List, cast diff --git a/ops/select_sequence/select_sequence.py b/ops/select_sequence/select_sequence.py index 3517f2b6..af91056c 100644 --- a/ops/select_sequence/select_sequence.py +++ b/ops/select_sequence/select_sequence.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from functools import partial from typing import Dict, List, Union diff --git a/ops/split_sequence/split_sequence.py b/ops/split_sequence/split_sequence.py index df38ec73..80696380 100644 --- a/ops/split_sequence/split_sequence.py +++ b/ops/split_sequence/split_sequence.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Dict, List from vibe_core.data.core_types import gen_guid diff --git a/ops/split_sequence/test_split_sequence.py b/ops/split_sequence/test_split_sequence.py index 7828d73a..a2370c7c 100644 --- a/ops/split_sequence/test_split_sequence.py +++ b/ops/split_sequence/test_split_sequence.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from typing import Any, Dict diff --git a/ops/stack_landsat/stack_landsat.py b/ops/stack_landsat/stack_landsat.py index fdbf0981..aa331a4c 100644 --- a/ops/stack_landsat/stack_landsat.py +++ b/ops/stack_landsat/stack_landsat.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from tempfile import TemporaryDirectory from typing import Dict, Tuple diff --git a/ops/stack_sentinel2_bands/stack_sentinel2_bands.py b/ops/stack_sentinel2_bands/stack_sentinel2_bands.py index 0ca054bb..265e4e58 100644 --- a/ops/stack_sentinel2_bands/stack_sentinel2_bands.py +++ b/ops/stack_sentinel2_bands/stack_sentinel2_bands.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import mimetypes import os diff --git a/ops/summarize_raster/raster_summary.py b/ops/summarize_raster/raster_summary.py index c029614b..c741ef0a 100644 --- a/ops/summarize_raster/raster_summary.py +++ b/ops/summarize_raster/raster_summary.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from typing import Any, Dict, Optional diff --git a/ops/threshold_raster/threshold_raster.py b/ops/threshold_raster/threshold_raster.py index 9e6e2f35..91f84b19 100644 --- a/ops/threshold_raster/threshold_raster.py +++ b/ops/threshold_raster/threshold_raster.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from tempfile import TemporaryDirectory from typing import Dict, Optional, cast diff --git a/ops/tile_sentinel1/tile_sentinel1.py b/ops/tile_sentinel1/tile_sentinel1.py index 60f00a54..8e14ca55 100644 --- a/ops/tile_sentinel1/tile_sentinel1.py +++ b/ops/tile_sentinel1/tile_sentinel1.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib import logging from tempfile import TemporaryDirectory diff --git a/ops/unpack_refs/unpack_refs.py b/ops/unpack_refs/unpack_refs.py index 00875d6c..33c1db1a 100644 --- a/ops/unpack_refs/unpack_refs.py +++ b/ops/unpack_refs/unpack_refs.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Dict, List from vibe_core.data import ExternalReference, ExternalReferenceList, gen_guid diff --git a/ops/weed_detection/weed_detection.py b/ops/weed_detection/weed_detection.py index f686c4db..1434abc1 100644 --- a/ops/weed_detection/weed_detection.py +++ b/ops/weed_detection/weed_detection.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from dataclasses import dataclass from enum import auto diff --git a/resources/docker/Dockerfile-api_orchestrator b/resources/docker/Dockerfile-api_orchestrator index fd4bb246..cc9bca7b 100644 --- a/resources/docker/Dockerfile-api_orchestrator +++ b/resources/docker/Dockerfile-api_orchestrator @@ -1,4 +1,6 @@ -# syntax=docker/dockerfile:1 +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + FROM mcr.microsoft.com/farmai/terravibes/services-base:12380 COPY src /app/src diff --git a/resources/docker/Dockerfile-cache b/resources/docker/Dockerfile-cache index 322fb693..bb975c3d 100644 --- a/resources/docker/Dockerfile-cache +++ b/resources/docker/Dockerfile-cache @@ -1,4 +1,6 @@ -# syntax=docker/dockerfile:1 +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + FROM mcr.microsoft.com/farmai/terravibes/services-base:12380 COPY src /app/src diff --git a/resources/docker/Dockerfile-dev b/resources/docker/Dockerfile-dev index 58316bb8..7567c838 100644 --- a/resources/docker/Dockerfile-dev +++ b/resources/docker/Dockerfile-dev @@ -1,4 +1,6 @@ -# syntax=docker/dockerfile:1 +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + ARG BASE_IMAGE FROM $BASE_IMAGE COPY resources/envs/dev.yaml /tmp/dev.yaml diff --git a/resources/docker/Dockerfile-devcontainer b/resources/docker/Dockerfile-devcontainer index 464b9e9e..a1259414 100644 --- a/resources/docker/Dockerfile-devcontainer +++ b/resources/docker/Dockerfile-devcontainer @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + FROM mcr.microsoft.com/farmai/terravibes/worker-base:12380 ARG USERNAME=vscode diff --git a/resources/docker/Dockerfile-services-base b/resources/docker/Dockerfile-services-base index d17675d7..f0368c21 100644 --- a/resources/docker/Dockerfile-services-base +++ b/resources/docker/Dockerfile-services-base @@ -1,4 +1,6 @@ -# syntax=docker/dockerfile:1 +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + FROM mcr.microsoft.com/cbl-mariner/base/core:2.0 AS builder diff --git a/resources/docker/Dockerfile-worker b/resources/docker/Dockerfile-worker index b9177152..bfaca499 100644 --- a/resources/docker/Dockerfile-worker +++ b/resources/docker/Dockerfile-worker @@ -1,4 +1,6 @@ -# syntax=docker/dockerfile:1 +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + FROM mcr.microsoft.com/farmai/terravibes/worker-base:12380 COPY src /app/src diff --git a/resources/docker/Dockerfile-worker-base b/resources/docker/Dockerfile-worker-base index b235e501..7e95ddd7 100644 --- a/resources/docker/Dockerfile-worker-base +++ b/resources/docker/Dockerfile-worker-base @@ -1,4 +1,6 @@ -# syntax=docker/dockerfile:1 +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + FROM mcr.microsoft.com/cbl-mariner/base/core:2.0 AS builder RUN tdnf update -y || echo "Not updating anything..." && tdnf install -y \ diff --git a/resources/vm/setup_farmvibes_ai_vm.sh b/resources/vm/setup_farmvibes_ai_vm.sh index a6ba85de..762fa3b6 100755 --- a/resources/vm/setup_farmvibes_ai_vm.sh +++ b/resources/vm/setup_farmvibes_ai_vm.sh @@ -1,4 +1,7 @@ #!/bin/bash +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + # Update apt sudo apt update diff --git a/scripts/local-k8s-diagnostics.sh b/scripts/local-k8s-diagnostics.sh index aca46057..9c0c25cd 100644 --- a/scripts/local-k8s-diagnostics.sh +++ b/scripts/local-k8s-diagnostics.sh @@ -1,4 +1,7 @@ #!/bin/sh +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + PATH=$PATH:~/.config/farmvibes-ai diff --git a/scripts/setup_python_develop_env.sh b/scripts/setup_python_develop_env.sh index 9785bbfd..0cb9c36e 100644 --- a/scripts/setup_python_develop_env.sh +++ b/scripts/setup_python_develop_env.sh @@ -1,4 +1,7 @@ #!/usr/bin/env bash +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + SCRIPTFILE=$(readlink -f "$0") SCRIPTPATH=$(dirname "$SCRIPTFILE") diff --git a/src/tests/__init__.py b/src/tests/__init__.py index e69de29b..b7c52582 100644 --- a/src/tests/__init__.py +++ b/src/tests/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/tests/benchmark/test_spaceeye_ops.py b/src/tests/benchmark/test_spaceeye_ops.py index d4cb92db..0dbc68f8 100644 --- a/src/tests/benchmark/test_spaceeye_ops.py +++ b/src/tests/benchmark/test_spaceeye_ops.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import time from typing import List, cast diff --git a/src/tests/conftest.py b/src/tests/conftest.py index 927a27c6..d447dc1f 100644 --- a/src/tests/conftest.py +++ b/src/tests/conftest.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import pytest from vibe_dev.testing import anyio_backend # type: ignore # noqa diff --git a/src/tests/test_notebooks.py b/src/tests/test_notebooks.py index c2d254fb..259cc4cf 100644 --- a/src/tests/test_notebooks.py +++ b/src/tests/test_notebooks.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import json import os from typing import List diff --git a/src/tests/test_op_workflows_integration.py b/src/tests/test_op_workflows_integration.py index f00d58a1..14a63f7d 100644 --- a/src/tests/test_op_workflows_integration.py +++ b/src/tests/test_op_workflows_integration.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import shutil import tempfile diff --git a/src/tests/test_ops_building.py b/src/tests/test_ops_building.py index 989d7889..602a299c 100644 --- a/src/tests/test_ops_building.py +++ b/src/tests/test_ops_building.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from pathlib import Path from typing import List diff --git a/src/tests/test_rest_api.py b/src/tests/test_rest_api.py index 77548ebb..cb8fd2ca 100644 --- a/src/tests/test_rest_api.py +++ b/src/tests/test_rest_api.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from dataclasses import asdict from typing import Any, Dict, List, Optional, Tuple, Union, cast from unittest.mock import MagicMock, patch diff --git a/src/tests/test_rest_api_client_integration.py b/src/tests/test_rest_api_client_integration.py index 6d7cae53..329d39d2 100644 --- a/src/tests/test_rest_api_client_integration.py +++ b/src/tests/test_rest_api_client_integration.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from dataclasses import asdict from datetime import datetime from os.path import join as j diff --git a/src/tests/test_subprocess_client.py b/src/tests/test_subprocess_client.py index 1d1ce342..dd2966cb 100644 --- a/src/tests/test_subprocess_client.py +++ b/src/tests/test_subprocess_client.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from typing import Tuple diff --git a/src/tests/workflows_integration/__init__.py b/src/tests/workflows_integration/__init__.py index e69de29b..b7c52582 100644 --- a/src/tests/workflows_integration/__init__.py +++ b/src/tests/workflows_integration/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/tests/workflows_integration/test_helloworld_integration.py b/src/tests/workflows_integration/test_helloworld_integration.py index cca9ecc6..2681f8b2 100644 --- a/src/tests/workflows_integration/test_helloworld_integration.py +++ b/src/tests/workflows_integration/test_helloworld_integration.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from typing import Any, Dict, List, Optional diff --git a/src/tests_local_cluster/test_cluster_integration.py b/src/tests_local_cluster/test_cluster_integration.py index 753361b8..06bdc88a 100644 --- a/src/tests_local_cluster/test_cluster_integration.py +++ b/src/tests_local_cluster/test_cluster_integration.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import codecs import getpass import os diff --git a/src/vibe_agent/setup.py b/src/vibe_agent/setup.py index 7291bc44..35d93eaa 100644 --- a/src/vibe_agent/setup.py +++ b/src/vibe_agent/setup.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from setuptools import find_packages, setup setup( diff --git a/src/vibe_agent/tests/conftest.py b/src/vibe_agent/tests/conftest.py index f045d7c5..cd899eec 100644 --- a/src/vibe_agent/tests/conftest.py +++ b/src/vibe_agent/tests/conftest.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + # flake8: noqa import os import uuid diff --git a/src/vibe_agent/tests/ops/test_dependencies_integration.py b/src/vibe_agent/tests/ops/test_dependencies_integration.py index 7e29f26b..5dfbaf4d 100644 --- a/src/vibe_agent/tests/ops/test_dependencies_integration.py +++ b/src/vibe_agent/tests/ops/test_dependencies_integration.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import pytest from vibe_agent.ops import EntryPointDict, OperationDependencyResolver, OperationSpec diff --git a/src/vibe_agent/tests/ops/test_op_cache_builder.py b/src/vibe_agent/tests/ops/test_op_cache_builder.py index 3b66f28c..34079f46 100644 --- a/src/vibe_agent/tests/ops/test_op_cache_builder.py +++ b/src/vibe_agent/tests/ops/test_op_cache_builder.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import datetime import random from dataclasses import dataclass diff --git a/src/vibe_agent/tests/ops/test_op_parser.py b/src/vibe_agent/tests/ops/test_op_parser.py index 278c1e7b..c67805fa 100644 --- a/src/vibe_agent/tests/ops/test_op_parser.py +++ b/src/vibe_agent/tests/ops/test_op_parser.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from typing import Any, Dict diff --git a/src/vibe_agent/tests/ops/test_operation.py b/src/vibe_agent/tests/ops/test_operation.py index f472c708..1ff91519 100644 --- a/src/vibe_agent/tests/ops/test_operation.py +++ b/src/vibe_agent/tests/ops/test_operation.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from typing import Any, Callable diff --git a/src/vibe_agent/tests/test_eywa_asset.py b/src/vibe_agent/tests/test_asset_vibe.py similarity index 93% rename from src/vibe_agent/tests/test_eywa_asset.py rename to src/vibe_agent/tests/test_asset_vibe.py index 0759905b..0c563446 100644 --- a/src/vibe_agent/tests/test_eywa_asset.py +++ b/src/vibe_agent/tests/test_asset_vibe.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes from pathlib import Path diff --git a/src/vibe_agent/tests/test_cache_metadata_store.py b/src/vibe_agent/tests/test_cache_metadata_store.py index 92d08f68..4586dc52 100644 --- a/src/vibe_agent/tests/test_cache_metadata_store.py +++ b/src/vibe_agent/tests/test_cache_metadata_store.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio import uuid from dataclasses import asdict diff --git a/src/vibe_agent/tests/test_local_asset_manager.py b/src/vibe_agent/tests/test_local_asset_manager.py index 3ec48dac..6bca971c 100644 --- a/src/vibe_agent/tests/test_local_asset_manager.py +++ b/src/vibe_agent/tests/test_local_asset_manager.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from tempfile import TemporaryDirectory from unittest.mock import MagicMock, Mock, patch diff --git a/src/vibe_agent/tests/test_storage.py b/src/vibe_agent/tests/test_storage.py index 0574b67a..273521c8 100644 --- a/src/vibe_agent/tests/test_storage.py +++ b/src/vibe_agent/tests/test_storage.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime, timezone from typing import Any, Dict diff --git a/src/vibe_agent/tests/test_uri_handling.py b/src/vibe_agent/tests/test_uri_handling.py index bbe97e5e..c644824b 100644 --- a/src/vibe_agent/tests/test_uri_handling.py +++ b/src/vibe_agent/tests/test_uri_handling.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from pathlib import Path diff --git a/src/vibe_agent/vibe_agent/__init__.py b/src/vibe_agent/vibe_agent/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_agent/vibe_agent/__init__.py +++ b/src/vibe_agent/vibe_agent/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_agent/vibe_agent/agent_config.py b/src/vibe_agent/vibe_agent/agent_config.py index 782c154f..3a3f1d69 100644 --- a/src/vibe_agent/vibe_agent/agent_config.py +++ b/src/vibe_agent/vibe_agent/agent_config.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import os diff --git a/src/vibe_agent/vibe_agent/cache.py b/src/vibe_agent/vibe_agent/cache.py index cbc42bfb..7a49ece1 100644 --- a/src/vibe_agent/vibe_agent/cache.py +++ b/src/vibe_agent/vibe_agent/cache.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio import logging import os diff --git a/src/vibe_agent/vibe_agent/cache_metadata_store.py b/src/vibe_agent/vibe_agent/cache_metadata_store.py index 9c4f186c..cf8565ca 100644 --- a/src/vibe_agent/vibe_agent/cache_metadata_store.py +++ b/src/vibe_agent/vibe_agent/cache_metadata_store.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from typing import Dict, Protocol, Set diff --git a/src/vibe_agent/vibe_agent/cache_metadata_store_client.py b/src/vibe_agent/vibe_agent/cache_metadata_store_client.py index 84522e07..9dca0ec2 100644 --- a/src/vibe_agent/vibe_agent/cache_metadata_store_client.py +++ b/src/vibe_agent/vibe_agent/cache_metadata_store_client.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from vibe_common.constants import DATA_OPS_INVOKE_URL_TEMPLATE diff --git a/src/vibe_agent/vibe_agent/data_ops.py b/src/vibe_agent/vibe_agent/data_ops.py index cc1b5e31..789615ff 100644 --- a/src/vibe_agent/vibe_agent/data_ops.py +++ b/src/vibe_agent/vibe_agent/data_ops.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio import logging from typing import List, Optional, Set, cast diff --git a/src/vibe_agent/vibe_agent/launch_cache.py b/src/vibe_agent/vibe_agent/launch_cache.py index 73623dee..3336b546 100644 --- a/src/vibe_agent/vibe_agent/launch_cache.py +++ b/src/vibe_agent/vibe_agent/launch_cache.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio from typing import Any diff --git a/src/vibe_agent/vibe_agent/launch_data_ops.py b/src/vibe_agent/vibe_agent/launch_data_ops.py index 4083e2a8..7d126cad 100644 --- a/src/vibe_agent/vibe_agent/launch_data_ops.py +++ b/src/vibe_agent/vibe_agent/launch_data_ops.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio from typing import Any diff --git a/src/vibe_agent/vibe_agent/launch_worker.py b/src/vibe_agent/vibe_agent/launch_worker.py index fa9b7d19..54ad3385 100644 --- a/src/vibe_agent/vibe_agent/launch_worker.py +++ b/src/vibe_agent/vibe_agent/launch_worker.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio import signal from multiprocessing import set_start_method diff --git a/src/vibe_agent/vibe_agent/ops.py b/src/vibe_agent/vibe_agent/ops.py index ae652dcd..3be691e4 100644 --- a/src/vibe_agent/vibe_agent/ops.py +++ b/src/vibe_agent/vibe_agent/ops.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import importlib.util import inspect import logging diff --git a/src/vibe_agent/vibe_agent/ops_helper.py b/src/vibe_agent/vibe_agent/ops_helper.py index 753a9ec4..eac939a9 100644 --- a/src/vibe_agent/vibe_agent/ops_helper.py +++ b/src/vibe_agent/vibe_agent/ops_helper.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from vibe_core.data.core_types import OpIOType from vibe_core.data.utils import deserialize_stac, serialize_stac diff --git a/src/vibe_agent/vibe_agent/storage/__init__.py b/src/vibe_agent/vibe_agent/storage/__init__.py index 90e4562f..a366373d 100644 --- a/src/vibe_agent/vibe_agent/storage/__init__.py +++ b/src/vibe_agent/vibe_agent/storage/__init__.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from .asset_management import BlobAssetManagerConfig, LocalFileAssetManagerConfig from .local_storage import LocalStorage, LocalStorageConfig from .remote_storage import CosmosStorage, CosmosStorageConfig diff --git a/src/vibe_agent/vibe_agent/storage/asset_management.py b/src/vibe_agent/vibe_agent/storage/asset_management.py index c118f591..821d2880 100644 --- a/src/vibe_agent/vibe_agent/storage/asset_management.py +++ b/src/vibe_agent/vibe_agent/storage/asset_management.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import os import shutil diff --git a/src/vibe_agent/vibe_agent/storage/file_upload.py b/src/vibe_agent/vibe_agent/storage/file_upload.py index 7d07fc66..07ee46ae 100644 --- a/src/vibe_agent/vibe_agent/storage/file_upload.py +++ b/src/vibe_agent/vibe_agent/storage/file_upload.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any from azure.storage.blob import BlobClient diff --git a/src/vibe_agent/vibe_agent/storage/local_storage.py b/src/vibe_agent/vibe_agent/storage/local_storage.py index 9178687f..ec567777 100644 --- a/src/vibe_agent/vibe_agent/storage/local_storage.py +++ b/src/vibe_agent/vibe_agent/storage/local_storage.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio import logging import os diff --git a/src/vibe_agent/vibe_agent/storage/remote_storage.py b/src/vibe_agent/vibe_agent/storage/remote_storage.py index 76aaf973..b94dfd4e 100644 --- a/src/vibe_agent/vibe_agent/storage/remote_storage.py +++ b/src/vibe_agent/vibe_agent/storage/remote_storage.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from dataclasses import asdict, dataclass, fields from functools import lru_cache diff --git a/src/vibe_agent/vibe_agent/storage/storage.py b/src/vibe_agent/vibe_agent/storage/storage.py index 10022ebc..3be0bd2c 100644 --- a/src/vibe_agent/vibe_agent/storage/storage.py +++ b/src/vibe_agent/vibe_agent/storage/storage.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """ Storage module for TerraVibes. Helps store, index, retrieve, and catalog geospatial knowledge that an instance of TerraVibes contains. diff --git a/src/vibe_agent/vibe_agent/worker.py b/src/vibe_agent/vibe_agent/worker.py index 7b45fc4c..0a374e65 100644 --- a/src/vibe_agent/vibe_agent/worker.py +++ b/src/vibe_agent/vibe_agent/worker.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio import concurrent.futures import json diff --git a/src/vibe_common/setup.py b/src/vibe_common/setup.py index cc805405..185a55fb 100644 --- a/src/vibe_common/setup.py +++ b/src/vibe_common/setup.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from setuptools import find_packages, setup setup( diff --git a/src/vibe_common/tests/conftest.py b/src/vibe_common/tests/conftest.py index 20b9d94e..440204e3 100644 --- a/src/vibe_common/tests/conftest.py +++ b/src/vibe_common/tests/conftest.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from vibe_dev.testing import anyio_backend from vibe_dev.testing.fake_workflows_fixtures import fake_ops_dir, fake_workflows_dir from vibe_dev.testing.workflow_fixtures import ( diff --git a/src/vibe_common/tests/test_input_handlers.py b/src/vibe_common/tests/test_input_handlers.py index 67e0527e..b8f57d05 100644 --- a/src/vibe_common/tests/test_input_handlers.py +++ b/src/vibe_common/tests/test_input_handlers.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime, timedelta, timezone from typing import Any, Dict from unittest.mock import Mock, patch diff --git a/src/vibe_common/tests/test_messaging.py b/src/vibe_common/tests/test_messaging.py index 4d7cbf35..6445c0a9 100644 --- a/src/vibe_common/tests/test_messaging.py +++ b/src/vibe_common/tests/test_messaging.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import json import sys import traceback as tb diff --git a/src/vibe_common/tests/test_statestore.py b/src/vibe_common/tests/test_statestore.py index 80f39786..ef004cee 100644 --- a/src/vibe_common/tests/test_statestore.py +++ b/src/vibe_common/tests/test_statestore.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any import pytest diff --git a/src/vibe_common/tests/test_vibe_dapr_client.py b/src/vibe_common/tests/test_vibe_dapr_client.py index c9c7e68c..b84866b8 100644 --- a/src/vibe_common/tests/test_vibe_dapr_client.py +++ b/src/vibe_common/tests/test_vibe_dapr_client.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from typing import Any diff --git a/src/vibe_common/vibe_common/__init__.py b/src/vibe_common/vibe_common/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_common/vibe_common/__init__.py +++ b/src/vibe_common/vibe_common/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_common/vibe_common/constants.py b/src/vibe_common/vibe_common/constants.py index 2aa0e480..3ea3828a 100644 --- a/src/vibe_common/vibe_common/constants.py +++ b/src/vibe_common/vibe_common/constants.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from typing import Dict, Final, List, Tuple, cast diff --git a/src/vibe_common/vibe_common/dapr.py b/src/vibe_common/vibe_common/dapr.py index 781728bf..31ecbf62 100644 --- a/src/vibe_common/vibe_common/dapr.py +++ b/src/vibe_common/vibe_common/dapr.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio import logging from functools import partial, wraps diff --git a/src/vibe_common/vibe_common/dropdapr.py b/src/vibe_common/vibe_common/dropdapr.py index 5e8926f8..09d749ed 100644 --- a/src/vibe_common/vibe_common/dropdapr.py +++ b/src/vibe_common/vibe_common/dropdapr.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """ dropdapr - A drop-in replacement for dapr-ext-grpc subscribe using FastAPI. """ diff --git a/src/vibe_common/vibe_common/input_handlers.py b/src/vibe_common/vibe_common/input_handlers.py index 66a873ba..710b29cd 100644 --- a/src/vibe_common/vibe_common/input_handlers.py +++ b/src/vibe_common/vibe_common/input_handlers.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from typing import Any, Dict diff --git a/src/vibe_common/vibe_common/messaging.py b/src/vibe_common/vibe_common/messaging.py index 237ec076..e7025b5f 100644 --- a/src/vibe_common/vibe_common/messaging.py +++ b/src/vibe_common/vibe_common/messaging.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import json import logging import sys diff --git a/src/vibe_common/vibe_common/schemas.py b/src/vibe_common/vibe_common/schemas.py index c3fa06b1..afc20070 100644 --- a/src/vibe_common/vibe_common/schemas.py +++ b/src/vibe_common/vibe_common/schemas.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from copy import deepcopy from dataclasses import field, fields diff --git a/src/vibe_common/vibe_common/secret_provider.py b/src/vibe_common/vibe_common/secret_provider.py index 798d294c..d48f1374 100644 --- a/src/vibe_common/vibe_common/secret_provider.py +++ b/src/vibe_common/vibe_common/secret_provider.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import re import time diff --git a/src/vibe_common/vibe_common/statestore.py b/src/vibe_common/vibe_common/statestore.py index 21a1ddc3..778eae0a 100644 --- a/src/vibe_common/vibe_common/statestore.py +++ b/src/vibe_common/vibe_common/statestore.py @@ -1,4 +1,7 @@ #!/usr/bin/env python +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + # -*- coding: utf-8 -*- import logging diff --git a/src/vibe_common/vibe_common/telemetry.py b/src/vibe_common/vibe_common/telemetry.py index b902a573..e2ae147e 100644 --- a/src/vibe_common/vibe_common/telemetry.py +++ b/src/vibe_common/vibe_common/telemetry.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import inspect import logging from functools import wraps diff --git a/src/vibe_common/vibe_common/tokens.py b/src/vibe_common/vibe_common/tokens.py index f4a6595b..905ad863 100644 --- a/src/vibe_common/vibe_common/tokens.py +++ b/src/vibe_common/vibe_common/tokens.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from abc import ABC, abstractmethod from datetime import datetime, timedelta diff --git a/src/vibe_common/vibe_common/vibe_dapr_client.py b/src/vibe_common/vibe_common/vibe_dapr_client.py index 66551f83..9e2aca5e 100644 --- a/src/vibe_common/vibe_common/vibe_dapr_client.py +++ b/src/vibe_common/vibe_common/vibe_dapr_client.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import json import logging from functools import partial diff --git a/src/vibe_common/vibe_common/workflow/__init__.py b/src/vibe_common/vibe_common/workflow/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_common/vibe_common/workflow/__init__.py +++ b/src/vibe_common/vibe_common/workflow/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_core/setup.py b/src/vibe_core/setup.py index 7f1a1763..fa5a68c9 100644 --- a/src/vibe_core/setup.py +++ b/src/vibe_core/setup.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from setuptools import setup if __name__ == "__main__": diff --git a/src/vibe_core/tests/test_register.py b/src/vibe_core/tests/test_register.py index b2b9f094..e02d938a 100644 --- a/src/vibe_core/tests/test_register.py +++ b/src/vibe_core/tests/test_register.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from vibe_core.data import DataVibe, data_registry diff --git a/src/vibe_core/tests/test_stac_converter.py b/src/vibe_core/tests/test_stac_converter.py index 23224363..4321d8c0 100644 --- a/src/vibe_core/tests/test_stac_converter.py +++ b/src/vibe_core/tests/test_stac_converter.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + # pyright: reportUnknownMemberType=false from dataclasses import dataclass diff --git a/src/vibe_core/tests/test_type_serialization.py b/src/vibe_core/tests/test_type_serialization.py index c37290fd..c4019fe4 100644 --- a/src/vibe_core/tests/test_type_serialization.py +++ b/src/vibe_core/tests/test_type_serialization.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import inspect import typing from datetime import datetime diff --git a/src/vibe_core/tests/test_utils.py b/src/vibe_core/tests/test_utils.py index c2301ec4..331dbbc0 100644 --- a/src/vibe_core/tests/test_utils.py +++ b/src/vibe_core/tests/test_utils.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from dataclasses import dataclass from datetime import datetime from typing import Any, Dict, List, Tuple, cast diff --git a/src/vibe_core/vibe_core/__init__.py b/src/vibe_core/vibe_core/__init__.py index ffeb1cd5..3cd867d2 100644 --- a/src/vibe_core/vibe_core/__init__.py +++ b/src/vibe_core/vibe_core/__init__.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Core types and functions, as well as constants used throughout FarmVibes.AI.""" from .client import Client, FarmvibesAiClient diff --git a/src/vibe_core/vibe_core/admag_client.py b/src/vibe_core/vibe_core/admag_client.py index 62c4695f..2d84d701 100644 --- a/src/vibe_core/vibe_core/admag_client.py +++ b/src/vibe_core/vibe_core/admag_client.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Data types and supporting functions for interacting with Azure Data Manager for Agriculture.""" import json diff --git a/src/vibe_core/vibe_core/cli/__init__.py b/src/vibe_core/vibe_core/cli/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_core/vibe_core/cli/__init__.py +++ b/src/vibe_core/vibe_core/cli/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_core/vibe_core/cli/constants.py b/src/vibe_core/vibe_core/cli/constants.py index be6bdb8c..09d9651a 100644 --- a/src/vibe_core/vibe_core/cli/constants.py +++ b/src/vibe_core/vibe_core/cli/constants.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + DEFAULT_IMAGE_PREFIX = "farmai/terravibes/" DEFAULT_IMAGE_TAG = "dev" DEFAULT_REGISTRY_PATH = "mcr.microsoft.com" diff --git a/src/vibe_core/vibe_core/cli/help_descriptions.py b/src/vibe_core/vibe_core/cli/help_descriptions.py index 9d671d71..7ce72327 100644 --- a/src/vibe_core/vibe_core/cli/help_descriptions.py +++ b/src/vibe_core/vibe_core/cli/help_descriptions.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + ADD_ONNX_HELP = """\ Adds an Onnx model to the TerraVibes cluster. After being added, one can use this \ model in the ops compute_onnx and compute_onnx_from_sequence (by setting the parameter model_file \ diff --git a/src/vibe_core/vibe_core/cli/helper.py b/src/vibe_core/vibe_core/cli/helper.py index 51f7f6b9..f2d6b02e 100644 --- a/src/vibe_core/vibe_core/cli/helper.py +++ b/src/vibe_core/vibe_core/cli/helper.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import locale import os import socket diff --git a/src/vibe_core/vibe_core/cli/local.py b/src/vibe_core/vibe_core/cli/local.py index 85fb0c30..2677df45 100644 --- a/src/vibe_core/vibe_core/cli/local.py +++ b/src/vibe_core/vibe_core/cli/local.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import argparse import codecs import os diff --git a/src/vibe_core/vibe_core/cli/logging.py b/src/vibe_core/vibe_core/cli/logging.py index b5ef0c63..e6a44a95 100644 --- a/src/vibe_core/vibe_core/cli/logging.py +++ b/src/vibe_core/vibe_core/cli/logging.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import logging.handlers import pathlib diff --git a/src/vibe_core/vibe_core/cli/main.py b/src/vibe_core/vibe_core/cli/main.py index 92031069..5b889e4f 100644 --- a/src/vibe_core/vibe_core/cli/main.py +++ b/src/vibe_core/vibe_core/cli/main.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import argparse import sys diff --git a/src/vibe_core/vibe_core/cli/osartifacts.py b/src/vibe_core/vibe_core/cli/osartifacts.py index c5f2bc87..eafaa883 100644 --- a/src/vibe_core/vibe_core/cli/osartifacts.py +++ b/src/vibe_core/vibe_core/cli/osartifacts.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import pathlib import platform diff --git a/src/vibe_core/vibe_core/cli/parsers.py b/src/vibe_core/vibe_core/cli/parsers.py index c10ff46e..13fb0f30 100644 --- a/src/vibe_core/vibe_core/cli/parsers.py +++ b/src/vibe_core/vibe_core/cli/parsers.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import argparse import getpass import os diff --git a/src/vibe_core/vibe_core/cli/remote.py b/src/vibe_core/vibe_core/cli/remote.py index 4b6644d5..5ea44d0c 100644 --- a/src/vibe_core/vibe_core/cli/remote.py +++ b/src/vibe_core/vibe_core/cli/remote.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import argparse import os from typing import Optional diff --git a/src/vibe_core/vibe_core/cli/wrappers.py b/src/vibe_core/vibe_core/cli/wrappers.py index 6c82722a..6deaf4a6 100644 --- a/src/vibe_core/vibe_core/cli/wrappers.py +++ b/src/vibe_core/vibe_core/cli/wrappers.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib import json import os diff --git a/src/vibe_core/vibe_core/client.py b/src/vibe_core/vibe_core/client.py index 56ac95b1..f6c2497f 100644 --- a/src/vibe_core/vibe_core/client.py +++ b/src/vibe_core/vibe_core/client.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """FarmVibes.AI client. This module provides a client for the FarmVibes.AI service, which allows users to interact with the diff --git a/src/vibe_core/vibe_core/data/__init__.py b/src/vibe_core/vibe_core/data/__init__.py index 5a1e78da..ecacf110 100644 --- a/src/vibe_core/vibe_core/data/__init__.py +++ b/src/vibe_core/vibe_core/data/__init__.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Core data model for FarmVibes.AI.""" from .airbus import AirbusPrice, AirbusProduct, AirbusRaster diff --git a/src/vibe_core/vibe_core/data/airbus.py b/src/vibe_core/vibe_core/data/airbus.py index e3e618d8..046a8d4f 100644 --- a/src/vibe_core/vibe_core/data/airbus.py +++ b/src/vibe_core/vibe_core/data/airbus.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """AirBus data types.""" from dataclasses import dataclass diff --git a/src/vibe_core/vibe_core/data/core_types.py b/src/vibe_core/vibe_core/data/core_types.py index da6b4fa2..231fbed3 100644 --- a/src/vibe_core/vibe_core/data/core_types.py +++ b/src/vibe_core/vibe_core/data/core_types.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Core data classes, functions, and constants of FarmVibes.AI.""" import hashlib diff --git a/src/vibe_core/vibe_core/data/data_registry.py b/src/vibe_core/vibe_core/data/data_registry.py index a55c1fcd..eabc995a 100644 --- a/src/vibe_core/vibe_core/data/data_registry.py +++ b/src/vibe_core/vibe_core/data/data_registry.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Data registry types and functions used in FarmVibes.AI.""" import warnings diff --git a/src/vibe_core/vibe_core/data/farm.py b/src/vibe_core/vibe_core/data/farm.py index c5c46881..e0c63aae 100644 --- a/src/vibe_core/vibe_core/data/farm.py +++ b/src/vibe_core/vibe_core/data/farm.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Azure Data Manager for Agriculture (ADMA) data types.""" from dataclasses import dataclass diff --git a/src/vibe_core/vibe_core/data/json_converter.py b/src/vibe_core/vibe_core/data/json_converter.py index 4729dfde..9032f58f 100644 --- a/src/vibe_core/vibe_core/data/json_converter.py +++ b/src/vibe_core/vibe_core/data/json_converter.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """JSON serialization/deserialization utilities.""" import json diff --git a/src/vibe_core/vibe_core/data/products.py b/src/vibe_core/vibe_core/data/products.py index 231bc5ec..3683ba33 100644 --- a/src/vibe_core/vibe_core/data/products.py +++ b/src/vibe_core/vibe_core/data/products.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Data type and functions definitions related to the products supported in FarmVibes.AI.""" import mimetypes diff --git a/src/vibe_core/vibe_core/data/rasters.py b/src/vibe_core/vibe_core/data/rasters.py index 131f55f1..d4ee84ca 100644 --- a/src/vibe_core/vibe_core/data/rasters.py +++ b/src/vibe_core/vibe_core/data/rasters.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Data types, constants, and supporting functions for manipulating rasters in FarmVibes.AI.""" from dataclasses import dataclass, field diff --git a/src/vibe_core/vibe_core/data/sentinel.py b/src/vibe_core/vibe_core/data/sentinel.py index 7f7bc40e..4b7459fe 100644 --- a/src/vibe_core/vibe_core/data/sentinel.py +++ b/src/vibe_core/vibe_core/data/sentinel.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Data types and supporting functions for Sentinel data in FarmVibes.AI.""" import mimetypes diff --git a/src/vibe_core/vibe_core/data/utils.py b/src/vibe_core/vibe_core/data/utils.py index a4423853..ba32aa73 100644 --- a/src/vibe_core/vibe_core/data/utils.py +++ b/src/vibe_core/vibe_core/data/utils.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Utilities for interacting with STAC items and serialization/deserialization.""" import json diff --git a/src/vibe_core/vibe_core/data/weather.py b/src/vibe_core/vibe_core/data/weather.py index e2fe5296..df2b8d30 100644 --- a/src/vibe_core/vibe_core/data/weather.py +++ b/src/vibe_core/vibe_core/data/weather.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Weather data types and function definitions.""" import hashlib diff --git a/src/vibe_core/vibe_core/datamodel.py b/src/vibe_core/vibe_core/datamodel.py index 3fd863fa..b9c0a0d9 100644 --- a/src/vibe_core/vibe_core/datamodel.py +++ b/src/vibe_core/vibe_core/datamodel.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Data model classes definition used throughout FarmVibes.AI.""" import codecs diff --git a/src/vibe_core/vibe_core/farmvibes_ai_hello_world.py b/src/vibe_core/vibe_core/farmvibes_ai_hello_world.py index b52ecc17..5211ed1c 100644 --- a/src/vibe_core/vibe_core/farmvibes_ai_hello_world.py +++ b/src/vibe_core/vibe_core/farmvibes_ai_hello_world.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from datetime import datetime, timezone diff --git a/src/vibe_core/vibe_core/file_downloader.py b/src/vibe_core/vibe_core/file_downloader.py index 651d7f92..9816fc21 100644 --- a/src/vibe_core/vibe_core/file_downloader.py +++ b/src/vibe_core/vibe_core/file_downloader.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """File downloader utility methods and classes.""" import logging diff --git a/src/vibe_core/vibe_core/file_utils.py b/src/vibe_core/vibe_core/file_utils.py index e5569388..c190450c 100644 --- a/src/vibe_core/vibe_core/file_utils.py +++ b/src/vibe_core/vibe_core/file_utils.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Utility functions for working with files.""" import os diff --git a/src/vibe_core/vibe_core/logconfig.py b/src/vibe_core/vibe_core/logconfig.py index 1e2eaa50..96c7a318 100644 --- a/src/vibe_core/vibe_core/logconfig.py +++ b/src/vibe_core/vibe_core/logconfig.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Utility functions for configuring logging.""" import json diff --git a/src/vibe_core/vibe_core/monitor.py b/src/vibe_core/vibe_core/monitor.py index 1c7010c4..7c8ef56d 100644 --- a/src/vibe_core/vibe_core/monitor.py +++ b/src/vibe_core/vibe_core/monitor.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Classes and functions definition for monitoring workflow runs.""" from collections import Counter diff --git a/src/vibe_core/vibe_core/terraform/__init__.py b/src/vibe_core/vibe_core/terraform/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_core/vibe_core/terraform/__init__.py +++ b/src/vibe_core/vibe_core/terraform/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_core/vibe_core/terraform/aks/__init__.py b/src/vibe_core/vibe_core/terraform/aks/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_core/vibe_core/terraform/aks/__init__.py +++ b/src/vibe_core/vibe_core/terraform/aks/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_core/vibe_core/terraform/aks/main.tf b/src/vibe_core/vibe_core/terraform/aks/main.tf index 68b7dbc3..f3f11a1d 100644 --- a/src/vibe_core/vibe_core/terraform/aks/main.tf +++ b/src/vibe_core/vibe_core/terraform/aks/main.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + terraform { required_version = ">=0.12" } diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/__init__.py b/src/vibe_core/vibe_core/terraform/aks/modules/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/__init__.py +++ b/src/vibe_core/vibe_core/terraform/aks/modules/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/azure_monitor.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/azure_monitor.tf index 0c9bca85..e096b5d0 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/azure_monitor.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/azure_monitor.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "azurerm_log_analytics_workspace" "analyticsworkspace" { name = "${var.prefix}-analytics-workspace-${resource.random_string.name_suffix.result}" count = var.enable_telemetry ? 1 : 0 diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/cosmos.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/cosmos.tf index 309f552a..76b51151 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/cosmos.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/cosmos.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "azurerm_cosmosdb_account" "cosmos" { name = "${var.prefix}-cosmos-${resource.random_string.name_suffix.result}" location = var.location diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/data.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/data.tf index bb087fd1..10d56e0b 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/data.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/data.tf @@ -1 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + data "azurerm_client_config" "current" {} \ No newline at end of file diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/keyvault.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/keyvault.tf index 8ac3a46c..747456f8 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/keyvault.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/keyvault.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + data "http" "ip" { url = "https://ipv4.icanhazip.com" } diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/kubernetes.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/kubernetes.tf index f2f33f0b..a7bec902 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/kubernetes.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/kubernetes.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + locals { default_node_pool_max_count = 3 } diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/outputs.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/outputs.tf index 44a54738..879112f3 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/outputs.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/outputs.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + output "kubernetes_config_path" { value = local_file.kubeconfig.filename diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/providers.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/providers.tf index 826f447e..6877ac1b 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/providers.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/providers.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + terraform { required_version = ">=0.12" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/publicip.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/publicip.tf index 3227a708..81161cba 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/publicip.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/publicip.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "azurerm_public_ip" "publicip" { name = "${var.prefix}-${substr(sha256(var.resource_group_name), 0, 6)}-ip" resource_group_name = azurerm_kubernetes_cluster.kubernetes.node_resource_group diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/random.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/random.tf index c1b4455e..019c2a32 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/random.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/random.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "random_string" "name_suffix" { length = 5 special = false diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/resourcegroup.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/resourcegroup.tf index 425d2c8e..abde1fb7 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/resourcegroup.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/resourcegroup.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + data "azurerm_resource_group" "resourcegroup" { name = var.resource_group_name } \ No newline at end of file diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/storage.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/storage.tf index 878098b8..8675cc90 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/storage.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/storage.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "azurerm_storage_account" "storageaccount" { name = "storage${resource.random_string.name_suffix.result}" resource_group_name = var.resource_group_name diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/variables.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/variables.tf index da7cd61d..8f1cbbcd 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/variables.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/variables.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + variable "location" { description = "Location of the resources." } diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/infra/vnet.tf b/src/vibe_core/vibe_core/terraform/aks/modules/infra/vnet.tf index 3b4c9666..474ede52 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/infra/vnet.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/infra/vnet.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "azurerm_network_security_group" "aks-nsg" { name = "${var.prefix}-nsg" location = var.location diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/cert.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/cert.tf index cfe11999..f6e345b2 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/cert.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/cert.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "helm_release" "letsencrypt" { name = "cert-manager" repository = "https://charts.jetstack.io" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/dapr.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/dapr.tf index 7c4a1c1e..b1c23dfa 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/dapr.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/dapr.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "kubernetes_namespace" "kubernetesdaprnamespace" { metadata { name = "dapr-system" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/init.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/init.tf index 5aee9b92..87195161 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/init.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/init.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "kubernetes_namespace" "kubernetesnamespace" { count = var.namespace == "default" ? 0 : 1 metadata { diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf index b6d2953c..22fa2964 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/otel.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "kubernetes_config_map" "otel" { count = var.enable_telemetry ? 1 : 0 metadata { diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/outputs.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/outputs.tf index 2ffb7325..1b038ae5 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/outputs.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/outputs.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + output "dapr_sidecars_deployed" { value = true diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/persistentvolume.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/persistentvolume.tf index 698e124a..0e0f194f 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/persistentvolume.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/persistentvolume.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "kubernetes_storage_class" "csi_storage_class" { metadata { name = "csi-storage-class" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/providers.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/providers.tf index 8f6f9128..f89df4ca 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/providers.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/providers.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + terraform { required_version = ">=0.12" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/rabbitmq.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/rabbitmq.tf index 7f654d51..0c1ecbe0 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/rabbitmq.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/rabbitmq.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "helm_release" "rabbitmq" { name = "rabbitmq" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/redis.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/redis.tf index e72404f6..d35ff912 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/redis.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/redis.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "helm_release" "redis" { name = "redis" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/variables.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/variables.tf index 0ee12e20..0499a01c 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/variables.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/variables.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + variable "tenantId" { description = "Tenant ID" } diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/rg/providers.tf b/src/vibe_core/vibe_core/terraform/aks/modules/rg/providers.tf index 3b1e5d28..75ff3fbc 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/rg/providers.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/rg/providers.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + terraform { required_version = ">=0.12" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/rg/random.tf b/src/vibe_core/vibe_core/terraform/aks/modules/rg/random.tf index c1b4455e..019c2a32 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/rg/random.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/rg/random.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "random_string" "name_suffix" { length = 5 special = false diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/rg/resourcegroup.tf b/src/vibe_core/vibe_core/terraform/aks/modules/rg/resourcegroup.tf index dc40a4d8..23957b70 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/rg/resourcegroup.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/rg/resourcegroup.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "azurerm_resource_group" "resourcegroup" { location = var.location name = var.resource_group_name diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/rg/variables.tf b/src/vibe_core/vibe_core/terraform/aks/modules/rg/variables.tf index e7e8ac2f..c28d9946 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/rg/variables.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/rg/variables.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + variable "location" { description = "Location of the resources." } diff --git a/src/vibe_core/vibe_core/terraform/aks/variables.tf b/src/vibe_core/vibe_core/terraform/aks/variables.tf index 8669e52f..9f3222fc 100644 --- a/src/vibe_core/vibe_core/terraform/aks/variables.tf +++ b/src/vibe_core/vibe_core/terraform/aks/variables.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + variable "location" { description = "Azure Location of the resources." } diff --git a/src/vibe_core/vibe_core/terraform/local/__init__.py b/src/vibe_core/vibe_core/terraform/local/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_core/vibe_core/terraform/local/__init__.py +++ b/src/vibe_core/vibe_core/terraform/local/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_core/vibe_core/terraform/local/main.tf b/src/vibe_core/vibe_core/terraform/local/main.tf index a8eba912..ad1a247c 100644 --- a/src/vibe_core/vibe_core/terraform/local/main.tf +++ b/src/vibe_core/vibe_core/terraform/local/main.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + terraform { required_version = ">=0.12" backend "local" { diff --git a/src/vibe_core/vibe_core/terraform/local/modules/__init__.py b/src/vibe_core/vibe_core/terraform/local/modules/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_core/vibe_core/terraform/local/modules/__init__.py +++ b/src/vibe_core/vibe_core/terraform/local/modules/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/dapr.tf b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/dapr.tf index b2bec611..3073820d 100644 --- a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/dapr.tf +++ b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/dapr.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "kubernetes_namespace" "kubernetesdaprnamespace" { metadata { name = "dapr-system" diff --git a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/jaeger.tf b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/jaeger.tf index 8c4f8122..0ae49f75 100644 --- a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/jaeger.tf +++ b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/jaeger.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "kubernetes_deployment" "jaeger" { count = var.enable_telemetry ? 1 : 0 metadata { diff --git a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/otel.tf b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/otel.tf index d506aca7..b4e2d2a7 100644 --- a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/otel.tf +++ b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/otel.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "kubernetes_config_map" "otel_collector_config" { count = var.enable_telemetry ? 1 : 0 metadata { diff --git a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/outputs.tf b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/outputs.tf index aad39080..7e2ca6b2 100644 --- a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/outputs.tf +++ b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/outputs.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + output "ready_to_deploy" { value = true diff --git a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/persistentvolume.tf b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/persistentvolume.tf index c74e2f8d..278115c1 100644 --- a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/persistentvolume.tf +++ b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/persistentvolume.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "kubernetes_persistent_volume" "user_storage_pv" { metadata { name = "user-storage-pv" diff --git a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/providers.tf b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/providers.tf index a8987627..d933e5fe 100644 --- a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/providers.tf +++ b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/providers.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + terraform { required_version = ">=0.12" diff --git a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/rabbitmq.tf b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/rabbitmq.tf index cb5690c9..35cccd5d 100644 --- a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/rabbitmq.tf +++ b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/rabbitmq.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "helm_release" "rabbitmq" { name = "rabbitmq" diff --git a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/redis.tf b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/redis.tf index 537ce6ce..da2d11e3 100644 --- a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/redis.tf +++ b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/redis.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + resource "helm_release" "redis" { name = "redis" diff --git a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/variables.tf b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/variables.tf index 5da0bdef..8942f97f 100644 --- a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/variables.tf +++ b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/variables.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + variable "namespace" { description = "Namespace" } diff --git a/src/vibe_core/vibe_core/terraform/local/variables.tf b/src/vibe_core/vibe_core/terraform/local/variables.tf index 4c226a9c..bc080f19 100644 --- a/src/vibe_core/vibe_core/terraform/local/variables.tf +++ b/src/vibe_core/vibe_core/terraform/local/variables.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + variable "acr_registry" { description = "ACR Registry" } diff --git a/src/vibe_core/vibe_core/terraform/services/__init__.py b/src/vibe_core/vibe_core/terraform/services/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_core/vibe_core/terraform/services/__init__.py +++ b/src/vibe_core/vibe_core/terraform/services/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_core/vibe_core/terraform/services/cache.tf b/src/vibe_core/vibe_core/terraform/services/cache.tf index 75c46859..95015e4f 100644 --- a/src/vibe_core/vibe_core/terraform/services/cache.tf +++ b/src/vibe_core/vibe_core/terraform/services/cache.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + locals { cache_common_args = concat( [ diff --git a/src/vibe_core/vibe_core/terraform/services/dataops.tf b/src/vibe_core/vibe_core/terraform/services/dataops.tf index 1856228e..bbd40e80 100644 --- a/src/vibe_core/vibe_core/terraform/services/dataops.tf +++ b/src/vibe_core/vibe_core/terraform/services/dataops.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + locals { service_name = "terravibes-data-ops" data_ops_common_args = concat( diff --git a/src/vibe_core/vibe_core/terraform/services/init.tf b/src/vibe_core/vibe_core/terraform/services/init.tf index e69de29b..b7c52582 100644 --- a/src/vibe_core/vibe_core/terraform/services/init.tf +++ b/src/vibe_core/vibe_core/terraform/services/init.tf @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_core/vibe_core/terraform/services/orchestrator.tf b/src/vibe_core/vibe_core/terraform/services/orchestrator.tf index f0b14631..d077c2d9 100644 --- a/src/vibe_core/vibe_core/terraform/services/orchestrator.tf +++ b/src/vibe_core/vibe_core/terraform/services/orchestrator.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + locals { orchestrator_common_args = concat( [ diff --git a/src/vibe_core/vibe_core/terraform/services/providers.tf b/src/vibe_core/vibe_core/terraform/services/providers.tf index f313fa96..7f4027dd 100644 --- a/src/vibe_core/vibe_core/terraform/services/providers.tf +++ b/src/vibe_core/vibe_core/terraform/services/providers.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + terraform { required_version = ">=0.12" diff --git a/src/vibe_core/vibe_core/terraform/services/restapi.tf b/src/vibe_core/vibe_core/terraform/services/restapi.tf index 829ffd9c..cf973c34 100644 --- a/src/vibe_core/vibe_core/terraform/services/restapi.tf +++ b/src/vibe_core/vibe_core/terraform/services/restapi.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + locals { restapi_common_args = concat( [ diff --git a/src/vibe_core/vibe_core/terraform/services/variables.tf b/src/vibe_core/vibe_core/terraform/services/variables.tf index 767c3333..8cf617a3 100644 --- a/src/vibe_core/vibe_core/terraform/services/variables.tf +++ b/src/vibe_core/vibe_core/terraform/services/variables.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + variable "prefix" { description = "Prefix for resources" } diff --git a/src/vibe_core/vibe_core/terraform/services/worker.tf b/src/vibe_core/vibe_core/terraform/services/worker.tf index 743770d8..d15f2e88 100644 --- a/src/vibe_core/vibe_core/terraform/services/worker.tf +++ b/src/vibe_core/vibe_core/terraform/services/worker.tf @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + locals { worker_common_args = concat( [ diff --git a/src/vibe_core/vibe_core/testing/__init__.py b/src/vibe_core/vibe_core/testing/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_core/vibe_core/testing/__init__.py +++ b/src/vibe_core/vibe_core/testing/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_core/vibe_core/testing/comparison.py b/src/vibe_core/vibe_core/testing/comparison.py index 9b279eba..e08e30e4 100644 --- a/src/vibe_core/vibe_core/testing/comparison.py +++ b/src/vibe_core/vibe_core/testing/comparison.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from dataclasses import fields from typing import List diff --git a/src/vibe_core/vibe_core/uri.py b/src/vibe_core/vibe_core/uri.py index 26161512..db835d2e 100644 --- a/src/vibe_core/vibe_core/uri.py +++ b/src/vibe_core/vibe_core/uri.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Utility functions for working with URIs.""" import os diff --git a/src/vibe_core/vibe_core/utils.py b/src/vibe_core/vibe_core/utils.py index 357cdeff..661a843a 100644 --- a/src/vibe_core/vibe_core/utils.py +++ b/src/vibe_core/vibe_core/utils.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """General utility functions used across FarmVibes.AI codebase.""" from dataclasses import dataclass diff --git a/src/vibe_dev/setup.py b/src/vibe_dev/setup.py index dd5dbec8..debeb452 100644 --- a/src/vibe_dev/setup.py +++ b/src/vibe_dev/setup.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from setuptools import find_packages, setup setup( diff --git a/src/vibe_dev/vibe_dev/__init__.py b/src/vibe_dev/vibe_dev/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_dev/vibe_dev/__init__.py +++ b/src/vibe_dev/vibe_dev/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_dev/vibe_dev/client/__init__.py b/src/vibe_dev/vibe_dev/client/__init__.py index 11a0e1a7..cf22b87d 100644 --- a/src/vibe_dev/vibe_dev/client/__init__.py +++ b/src/vibe_dev/vibe_dev/client/__init__.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from vibe_core.client import get_default_vibe_client from .remote_client import get_ppe_vibe_client diff --git a/src/vibe_dev/vibe_dev/client/remote_client.py b/src/vibe_dev/vibe_dev/client/remote_client.py index 0b796562..4b0c2caa 100644 --- a/src/vibe_dev/vibe_dev/client/remote_client.py +++ b/src/vibe_dev/vibe_dev/client/remote_client.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from vibe_core.client import FarmvibesAiClient PPE_URL = "https://ppe-terravibes-api.57fb76945e6d4b66a912.eastus.aksapp.io/" diff --git a/src/vibe_dev/vibe_dev/client/subprocess_client.py b/src/vibe_dev/vibe_dev/client/subprocess_client.py index dc9e671b..ae4fbc0e 100644 --- a/src/vibe_dev/vibe_dev/client/subprocess_client.py +++ b/src/vibe_dev/vibe_dev/client/subprocess_client.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from datetime import datetime from typing import Any, List, Tuple, cast diff --git a/src/vibe_dev/vibe_dev/local_runner.py b/src/vibe_dev/vibe_dev/local_runner.py index 5951219d..067cf81a 100644 --- a/src/vibe_dev/vibe_dev/local_runner.py +++ b/src/vibe_dev/vibe_dev/local_runner.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import cast from uuid import UUID diff --git a/src/vibe_dev/vibe_dev/mock_utils.py b/src/vibe_dev/vibe_dev/mock_utils.py index 436dc6ca..8dc511e4 100644 --- a/src/vibe_dev/vibe_dev/mock_utils.py +++ b/src/vibe_dev/vibe_dev/mock_utils.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Dict from pydantic import BaseModel diff --git a/src/vibe_dev/vibe_dev/testing/__init__.py b/src/vibe_dev/vibe_dev/testing/__init__.py index af7e4799..4863cccf 100644 --- a/src/vibe_dev/vibe_dev/testing/__init__.py +++ b/src/vibe_dev/vibe_dev/testing/__init__.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import pytest diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_op.py index e8eae5d4..84787cc3 100644 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_op.py +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/base_op.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from dataclasses import asdict from typing import Any, List, Union diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/op.py index cb20bb81..0e59efc6 100644 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/op.py +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/op.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from typing import Any diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_item_op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_item_op.py index 1825a6be..1cd6c821 100644 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_item_op.py +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_item_op.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, List from vibe_core.data import DataVibe diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_list_op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_list_op.py index f9a30042..7460d81f 100644 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_list_op.py +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/to_list_op.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any from vibe_core.data import DataVibe diff --git a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/vibe_op.py b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/vibe_op.py index 326ccefc..39159f9a 100644 --- a/src/vibe_dev/vibe_dev/testing/fake_ops/fake/vibe_op.py +++ b/src/vibe_dev/vibe_dev/testing/fake_ops/fake/vibe_op.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, List, Union from vibe_core.data import DataVibe diff --git a/src/vibe_dev/vibe_dev/testing/fake_workflows_fixtures.py b/src/vibe_dev/vibe_dev/testing/fake_workflows_fixtures.py index 3fa3ddcc..9e7d63e9 100644 --- a/src/vibe_dev/vibe_dev/testing/fake_workflows_fixtures.py +++ b/src/vibe_dev/vibe_dev/testing/fake_workflows_fixtures.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from dataclasses import dataclass diff --git a/src/vibe_dev/vibe_dev/testing/op_tester.py b/src/vibe_dev/vibe_dev/testing/op_tester.py index 7a2f0736..7c247985 100644 --- a/src/vibe_dev/vibe_dev/testing/op_tester.py +++ b/src/vibe_dev/vibe_dev/testing/op_tester.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import json import logging import os diff --git a/src/vibe_dev/vibe_dev/testing/storage_fixtures.py b/src/vibe_dev/vibe_dev/testing/storage_fixtures.py index 61f9012a..2b29d2d2 100644 --- a/src/vibe_dev/vibe_dev/testing/storage_fixtures.py +++ b/src/vibe_dev/vibe_dev/testing/storage_fixtures.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import uuid from typing import cast diff --git a/src/vibe_dev/vibe_dev/testing/utils.py b/src/vibe_dev/vibe_dev/testing/utils.py index 7409a474..17634ce7 100644 --- a/src/vibe_dev/vibe_dev/testing/utils.py +++ b/src/vibe_dev/vibe_dev/testing/utils.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import List from unittest import TestCase diff --git a/src/vibe_dev/vibe_dev/testing/workflow_fixtures.py b/src/vibe_dev/vibe_dev/testing/workflow_fixtures.py index aa0bf2a9..1e1e1a98 100644 --- a/src/vibe_dev/vibe_dev/testing/workflow_fixtures.py +++ b/src/vibe_dev/vibe_dev/testing/workflow_fixtures.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import uuid from dataclasses import dataclass from datetime import datetime diff --git a/src/vibe_lib/setup.py b/src/vibe_lib/setup.py index 154d8ad3..42c7f0bc 100644 --- a/src/vibe_lib/setup.py +++ b/src/vibe_lib/setup.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from setuptools import find_packages, setup setup( diff --git a/src/vibe_lib/tests/test_airbus_api.py b/src/vibe_lib/tests/test_airbus_api.py index 50cc531c..cdcab08a 100644 --- a/src/vibe_lib/tests/test_airbus_api.py +++ b/src/vibe_lib/tests/test_airbus_api.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Dict from unittest.mock import Mock, patch diff --git a/src/vibe_lib/tests/test_earthdata.py b/src/vibe_lib/tests/test_earthdata.py index c3aa3858..39a3cc99 100644 --- a/src/vibe_lib/tests/test_earthdata.py +++ b/src/vibe_lib/tests/test_earthdata.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import math from datetime import datetime from typing import Any, Optional, Tuple, cast diff --git a/src/vibe_lib/tests/test_predict_chips.py b/src/vibe_lib/tests/test_predict_chips.py index 407e4b32..6d3db524 100644 --- a/src/vibe_lib/tests/test_predict_chips.py +++ b/src/vibe_lib/tests/test_predict_chips.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from pathlib import Path from unittest.mock import MagicMock, patch diff --git a/src/vibe_lib/tests/test_raster_chipping.py b/src/vibe_lib/tests/test_raster_chipping.py index 420e2e7f..ba8444fe 100644 --- a/src/vibe_lib/tests/test_raster_chipping.py +++ b/src/vibe_lib/tests/test_raster_chipping.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from tempfile import TemporaryDirectory diff --git a/src/vibe_lib/vibe_lib/__init__.py b/src/vibe_lib/vibe_lib/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_lib/vibe_lib/__init__.py +++ b/src/vibe_lib/vibe_lib/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_lib/vibe_lib/airbus.py b/src/vibe_lib/vibe_lib/airbus.py index e7002c3a..e099e385 100644 --- a/src/vibe_lib/vibe_lib/airbus.py +++ b/src/vibe_lib/vibe_lib/airbus.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import json import os import time diff --git a/src/vibe_lib/vibe_lib/archive.py b/src/vibe_lib/vibe_lib/archive.py index d984f82c..bbf71844 100644 --- a/src/vibe_lib/vibe_lib/archive.py +++ b/src/vibe_lib/vibe_lib/archive.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import shutil diff --git a/src/vibe_lib/vibe_lib/bing_maps.py b/src/vibe_lib/vibe_lib/bing_maps.py index d23863f3..18ea2a3e 100644 --- a/src/vibe_lib/vibe_lib/bing_maps.py +++ b/src/vibe_lib/vibe_lib/bing_maps.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """ BingMaps API interface and auxiliary method to query tiles, download basemaps, and manipulate between lat-lon coordinates and tile x-y coordinates. Part of the code diff --git a/src/vibe_lib/vibe_lib/climatology_lab.py b/src/vibe_lib/vibe_lib/climatology_lab.py index 9e8d2f17..a3d64029 100644 --- a/src/vibe_lib/vibe_lib/climatology_lab.py +++ b/src/vibe_lib/vibe_lib/climatology_lab.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import hashlib from datetime import datetime from typing import List, Tuple diff --git a/src/vibe_lib/vibe_lib/comet_farm/__init__.py b/src/vibe_lib/vibe_lib/comet_farm/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_lib/vibe_lib/comet_farm/__init__.py +++ b/src/vibe_lib/vibe_lib/comet_farm/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_lib/vibe_lib/comet_farm/comet_model.py b/src/vibe_lib/vibe_lib/comet_farm/comet_model.py index 2a51400e..b9c9968d 100644 --- a/src/vibe_lib/vibe_lib/comet_farm/comet_model.py +++ b/src/vibe_lib/vibe_lib/comet_farm/comet_model.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, Dict, List, Union from pydantic import BaseModel, Field diff --git a/src/vibe_lib/vibe_lib/comet_farm/comet_requester.py b/src/vibe_lib/vibe_lib/comet_farm/comet_requester.py index 8947fb18..7794814d 100644 --- a/src/vibe_lib/vibe_lib/comet_farm/comet_requester.py +++ b/src/vibe_lib/vibe_lib/comet_farm/comet_requester.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import json from queue import Queue from typing import Any, Dict, Optional diff --git a/src/vibe_lib/vibe_lib/comet_farm/comet_server.py b/src/vibe_lib/vibe_lib/comet_farm/comet_server.py index 6fd4dab3..05ed1aec 100644 --- a/src/vibe_lib/vibe_lib/comet_farm/comet_server.py +++ b/src/vibe_lib/vibe_lib/comet_farm/comet_server.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import io import logging import os diff --git a/src/vibe_lib/vibe_lib/deepmc/encoder.py b/src/vibe_lib/vibe_lib/deepmc/encoder.py index c0dacfcd..6af9f5c5 100644 --- a/src/vibe_lib/vibe_lib/deepmc/encoder.py +++ b/src/vibe_lib/vibe_lib/deepmc/encoder.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Optional from torch import Tensor, nn diff --git a/src/vibe_lib/vibe_lib/deepmc/helpers.py b/src/vibe_lib/vibe_lib/deepmc/helpers.py index cec74153..9c4147b5 100644 --- a/src/vibe_lib/vibe_lib/deepmc/helpers.py +++ b/src/vibe_lib/vibe_lib/deepmc/helpers.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, Optional import numpy as np diff --git a/src/vibe_lib/vibe_lib/deepmc/locally_connected.py b/src/vibe_lib/vibe_lib/deepmc/locally_connected.py index b9dc8e80..c7ea5e0b 100644 --- a/src/vibe_lib/vibe_lib/deepmc/locally_connected.py +++ b/src/vibe_lib/vibe_lib/deepmc/locally_connected.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Optional, Tuple, Union import torch diff --git a/src/vibe_lib/vibe_lib/deepmc/models.py b/src/vibe_lib/vibe_lib/deepmc/models.py index 99fab6d8..13e71de4 100644 --- a/src/vibe_lib/vibe_lib/deepmc/models.py +++ b/src/vibe_lib/vibe_lib/deepmc/models.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, List, Tuple, Union import torch diff --git a/src/vibe_lib/vibe_lib/deepmc/time.py b/src/vibe_lib/vibe_lib/deepmc/time.py index 3183c1f0..8c529265 100644 --- a/src/vibe_lib/vibe_lib/deepmc/time.py +++ b/src/vibe_lib/vibe_lib/deepmc/time.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from torch import Tensor, nn diff --git a/src/vibe_lib/vibe_lib/deepmc/transform.py b/src/vibe_lib/vibe_lib/deepmc/transform.py index 4552eea0..cc6f05d0 100644 --- a/src/vibe_lib/vibe_lib/deepmc/transform.py +++ b/src/vibe_lib/vibe_lib/deepmc/transform.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import torch.nn as nn from einops import rearrange from torch import Tensor diff --git a/src/vibe_lib/vibe_lib/earthdata.py b/src/vibe_lib/vibe_lib/earthdata.py index 1b79ca4f..362b35f8 100644 --- a/src/vibe_lib/vibe_lib/earthdata.py +++ b/src/vibe_lib/vibe_lib/earthdata.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """ Interact with NASA's EarthData platform's API """ diff --git a/src/vibe_lib/vibe_lib/gaussian_mixture.py b/src/vibe_lib/vibe_lib/gaussian_mixture.py index 17479b96..e9b81a96 100644 --- a/src/vibe_lib/vibe_lib/gaussian_mixture.py +++ b/src/vibe_lib/vibe_lib/gaussian_mixture.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, Tuple import numpy as np diff --git a/src/vibe_lib/vibe_lib/geometry.py b/src/vibe_lib/vibe_lib/geometry.py index aee11271..a2efd7a6 100644 --- a/src/vibe_lib/vibe_lib/geometry.py +++ b/src/vibe_lib/vibe_lib/geometry.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from enum import auto from functools import reduce from operator import add diff --git a/src/vibe_lib/vibe_lib/gfs_blob_utils.py b/src/vibe_lib/vibe_lib/gfs_blob_utils.py index f2a4545e..04d56e2b 100644 --- a/src/vibe_lib/vibe_lib/gfs_blob_utils.py +++ b/src/vibe_lib/vibe_lib/gfs_blob_utils.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime """ diff --git a/src/vibe_lib/vibe_lib/glad.py b/src/vibe_lib/vibe_lib/glad.py index 6dcbf703..735feef7 100644 --- a/src/vibe_lib/vibe_lib/glad.py +++ b/src/vibe_lib/vibe_lib/glad.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, Dict, Iterable, List import geopandas as gpd diff --git a/src/vibe_lib/vibe_lib/heatmap_neighbor.py b/src/vibe_lib/vibe_lib/heatmap_neighbor.py index 28f0a48f..84b0d6b0 100644 --- a/src/vibe_lib/vibe_lib/heatmap_neighbor.py +++ b/src/vibe_lib/vibe_lib/heatmap_neighbor.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import cast import geopandas as gpd diff --git a/src/vibe_lib/vibe_lib/overlap_clustering.py b/src/vibe_lib/vibe_lib/overlap_clustering.py index 18f4f055..283898fe 100644 --- a/src/vibe_lib/vibe_lib/overlap_clustering.py +++ b/src/vibe_lib/vibe_lib/overlap_clustering.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import math from typing import Any diff --git a/src/vibe_lib/vibe_lib/planetary_computer.py b/src/vibe_lib/vibe_lib/planetary_computer.py index 28e425b1..8ad62691 100644 --- a/src/vibe_lib/vibe_lib/planetary_computer.py +++ b/src/vibe_lib/vibe_lib/planetary_computer.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """ Planetary computer model for TerraVibes. Helps query and download items and assets. """ diff --git a/src/vibe_lib/vibe_lib/raster.py b/src/vibe_lib/vibe_lib/raster.py index dfd3a3c2..80ec6f51 100644 --- a/src/vibe_lib/vibe_lib/raster.py +++ b/src/vibe_lib/vibe_lib/raster.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import json import logging import mimetypes diff --git a/src/vibe_lib/vibe_lib/segment_anything.py b/src/vibe_lib/vibe_lib/segment_anything.py index 0d4267c3..2f405cd5 100644 --- a/src/vibe_lib/vibe_lib/segment_anything.py +++ b/src/vibe_lib/vibe_lib/segment_anything.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from itertools import product from math import ceil diff --git a/src/vibe_lib/vibe_lib/shapefile.py b/src/vibe_lib/vibe_lib/shapefile.py index 57a4ae47..6a50a954 100644 --- a/src/vibe_lib/vibe_lib/shapefile.py +++ b/src/vibe_lib/vibe_lib/shapefile.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from typing import Any diff --git a/src/vibe_lib/vibe_lib/spaceeye/__init__.py b/src/vibe_lib/vibe_lib/spaceeye/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_lib/vibe_lib/spaceeye/__init__.py +++ b/src/vibe_lib/vibe_lib/spaceeye/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_lib/vibe_lib/spaceeye/chip.py b/src/vibe_lib/vibe_lib/spaceeye/chip.py index 89ff19c2..1297bdb8 100644 --- a/src/vibe_lib/vibe_lib/spaceeye/chip.py +++ b/src/vibe_lib/vibe_lib/spaceeye/chip.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """ This module contains code for running a pytorch module in chips extracted from rasters. Chips are read from disk before inference and predictions are written diff --git a/src/vibe_lib/vibe_lib/spaceeye/dataset.py b/src/vibe_lib/vibe_lib/spaceeye/dataset.py index 1b38bc17..3284c461 100644 --- a/src/vibe_lib/vibe_lib/spaceeye/dataset.py +++ b/src/vibe_lib/vibe_lib/spaceeye/dataset.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """ Module for generating inputs for the SpaceEye model. diff --git a/src/vibe_lib/vibe_lib/spaceeye/illumination.py b/src/vibe_lib/vibe_lib/spaceeye/illumination.py index 3fb49107..a2b17d7c 100644 --- a/src/vibe_lib/vibe_lib/spaceeye/illumination.py +++ b/src/vibe_lib/vibe_lib/spaceeye/illumination.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """ Methods for computing, normalizing and interpolation illuminance of multispectral raster timeseries. diff --git a/src/vibe_lib/vibe_lib/spaceeye/interpolation.py b/src/vibe_lib/vibe_lib/spaceeye/interpolation.py index 5548c63c..cb9fc195 100644 --- a/src/vibe_lib/vibe_lib/spaceeye/interpolation.py +++ b/src/vibe_lib/vibe_lib/spaceeye/interpolation.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Dict import torch diff --git a/src/vibe_lib/vibe_lib/spaceeye/utils.py b/src/vibe_lib/vibe_lib/spaceeye/utils.py index 27552bb2..a87fd0c9 100644 --- a/src/vibe_lib/vibe_lib/spaceeye/utils.py +++ b/src/vibe_lib/vibe_lib/spaceeye/utils.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Dict, List, Sequence, TypeVar from vibe_core.data import S2ProcessingLevel, Sentinel2Product diff --git a/src/vibe_lib/vibe_lib/stats.py b/src/vibe_lib/vibe_lib/stats.py index 45866bcd..c3a276ef 100644 --- a/src/vibe_lib/vibe_lib/stats.py +++ b/src/vibe_lib/vibe_lib/stats.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime from typing import Any, Dict, List, NamedTuple, Sequence, cast diff --git a/src/vibe_lib/vibe_lib/timeseries.py b/src/vibe_lib/vibe_lib/timeseries.py index f9e4815e..4aa364e2 100644 --- a/src/vibe_lib/vibe_lib/timeseries.py +++ b/src/vibe_lib/vibe_lib/timeseries.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import mimetypes import os diff --git a/src/vibe_notebook/setup.py b/src/vibe_notebook/setup.py index baf85dd2..2b723afe 100644 --- a/src/vibe_notebook/setup.py +++ b/src/vibe_notebook/setup.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from setuptools import find_packages, setup setup( diff --git a/src/vibe_notebook/vibe_notebook/__init__.py b/src/vibe_notebook/vibe_notebook/__init__.py index bcac4a86..b4ba6ddd 100644 --- a/src/vibe_notebook/vibe_notebook/__init__.py +++ b/src/vibe_notebook/vibe_notebook/__init__.py @@ -1 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Shared notebook library for FarmVibes.AI notebooks.""" diff --git a/src/vibe_notebook/vibe_notebook/deepmc/__init__.py b/src/vibe_notebook/vibe_notebook/deepmc/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_notebook/vibe_notebook/deepmc/__init__.py +++ b/src/vibe_notebook/vibe_notebook/deepmc/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_notebook/vibe_notebook/deepmc/forecast.py b/src/vibe_notebook/vibe_notebook/deepmc/forecast.py index 993a6617..a8742496 100644 --- a/src/vibe_notebook/vibe_notebook/deepmc/forecast.py +++ b/src/vibe_notebook/vibe_notebook/deepmc/forecast.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime, timedelta from typing import Any, Dict, List, Tuple, cast diff --git a/src/vibe_notebook/vibe_notebook/deepmc/prediction.py b/src/vibe_notebook/vibe_notebook/deepmc/prediction.py index eb72cd4a..9180ae10 100644 --- a/src/vibe_notebook/vibe_notebook/deepmc/prediction.py +++ b/src/vibe_notebook/vibe_notebook/deepmc/prediction.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import pickle from datetime import datetime, timedelta diff --git a/src/vibe_notebook/vibe_notebook/deepmc/preprocess.py b/src/vibe_notebook/vibe_notebook/deepmc/preprocess.py index a6a28113..67c4e043 100644 --- a/src/vibe_notebook/vibe_notebook/deepmc/preprocess.py +++ b/src/vibe_notebook/vibe_notebook/deepmc/preprocess.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import timedelta from math import ceil from typing import Any, List, Optional, Tuple diff --git a/src/vibe_notebook/vibe_notebook/deepmc/utils.py b/src/vibe_notebook/vibe_notebook/deepmc/utils.py index ffc2a2e2..57d51068 100644 --- a/src/vibe_notebook/vibe_notebook/deepmc/utils.py +++ b/src/vibe_notebook/vibe_notebook/deepmc/utils.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from datetime import datetime, timedelta from typing import Any, Dict, List diff --git a/src/vibe_notebook/vibe_notebook/plot.py b/src/vibe_notebook/vibe_notebook/plot.py index cfe93cba..2dcf72c1 100644 --- a/src/vibe_notebook/vibe_notebook/plot.py +++ b/src/vibe_notebook/vibe_notebook/plot.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Auxiliary methods for plotting and visualizing data in notebooks.""" import io diff --git a/src/vibe_notebook/vibe_notebook/raster.py b/src/vibe_notebook/vibe_notebook/raster.py index 7eee0f71..1e36d4f5 100644 --- a/src/vibe_notebook/vibe_notebook/raster.py +++ b/src/vibe_notebook/vibe_notebook/raster.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Raster data processing utilities.""" from typing import Any, List, Optional diff --git a/src/vibe_notebook/vibe_notebook/utils.py b/src/vibe_notebook/vibe_notebook/utils.py index 971d8751..c2c8b815 100644 --- a/src/vibe_notebook/vibe_notebook/utils.py +++ b/src/vibe_notebook/vibe_notebook/utils.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Additional utility methods used in the notebooks.""" import os diff --git a/src/vibe_server/setup.py b/src/vibe_server/setup.py index 7307eb52..184ac0fd 100644 --- a/src/vibe_server/setup.py +++ b/src/vibe_server/setup.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from setuptools import find_packages, setup setup( diff --git a/src/vibe_server/tests/conftest.py b/src/vibe_server/tests/conftest.py index 48ec6ac8..3c5bf8af 100644 --- a/src/vibe_server/tests/conftest.py +++ b/src/vibe_server/tests/conftest.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from dataclasses import asdict from typing import Any, Dict diff --git a/src/vibe_server/tests/test_graph.py b/src/vibe_server/tests/test_graph.py index 8a4d08f9..34ce87ba 100644 --- a/src/vibe_server/tests/test_graph.py +++ b/src/vibe_server/tests/test_graph.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import random from typing import Dict, List diff --git a/src/vibe_server/tests/test_href_handler.py b/src/vibe_server/tests/test_href_handler.py index a3bee77c..abc8ef39 100644 --- a/src/vibe_server/tests/test_href_handler.py +++ b/src/vibe_server/tests/test_href_handler.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import datetime import os from pathlib import Path diff --git a/src/vibe_server/tests/test_op_parallelism.py b/src/vibe_server/tests/test_op_parallelism.py index f7e4cf6f..75259b4d 100644 --- a/src/vibe_server/tests/test_op_parallelism.py +++ b/src/vibe_server/tests/test_op_parallelism.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, Awaitable, Callable, Dict, List, NamedTuple, cast from unittest.mock import MagicMock, patch from uuid import UUID, uuid4 diff --git a/src/vibe_server/tests/test_orchestrator.py b/src/vibe_server/tests/test_orchestrator.py index 936bd61c..52c47381 100644 --- a/src/vibe_server/tests/test_orchestrator.py +++ b/src/vibe_server/tests/test_orchestrator.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from asyncio.queues import Queue from dataclasses import asdict from datetime import datetime diff --git a/src/vibe_server/tests/test_parameter_resolver.py b/src/vibe_server/tests/test_parameter_resolver.py index 7889d9b8..4b9a6a69 100644 --- a/src/vibe_server/tests/test_parameter_resolver.py +++ b/src/vibe_server/tests/test_parameter_resolver.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from vibe_common.schemas import OperationParser diff --git a/src/vibe_server/tests/test_remote_workflow_runner.py b/src/vibe_server/tests/test_remote_workflow_runner.py index 37f8d781..9d4d8f6b 100644 --- a/src/vibe_server/tests/test_remote_workflow_runner.py +++ b/src/vibe_server/tests/test_remote_workflow_runner.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import sys import traceback diff --git a/src/vibe_server/tests/test_workflow.py b/src/vibe_server/tests/test_workflow.py index 544c0157..ce3abf06 100644 --- a/src/vibe_server/tests/test_workflow.py +++ b/src/vibe_server/tests/test_workflow.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os from typing import List diff --git a/src/vibe_server/tests/test_workflow_input_handler.py b/src/vibe_server/tests/test_workflow_input_handler.py index c7f0e071..4c69db14 100644 --- a/src/vibe_server/tests/test_workflow_input_handler.py +++ b/src/vibe_server/tests/test_workflow_input_handler.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import copy from dataclasses import dataclass from datetime import datetime diff --git a/src/vibe_server/tests/test_workflow_parser.py b/src/vibe_server/tests/test_workflow_parser.py index b21e8f46..4c39149e 100644 --- a/src/vibe_server/tests/test_workflow_parser.py +++ b/src/vibe_server/tests/test_workflow_parser.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from dataclasses import asdict import pytest diff --git a/src/vibe_server/tests/test_workflow_runner.py b/src/vibe_server/tests/test_workflow_runner.py index 6576cbc1..066c16e6 100644 --- a/src/vibe_server/tests/test_workflow_runner.py +++ b/src/vibe_server/tests/test_workflow_runner.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, List from uuid import UUID, uuid4 diff --git a/src/vibe_server/tests/test_workflow_spec_validator.py b/src/vibe_server/tests/test_workflow_spec_validator.py index be54137e..159df489 100644 --- a/src/vibe_server/tests/test_workflow_spec_validator.py +++ b/src/vibe_server/tests/test_workflow_spec_validator.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import pytest from vibe_dev.testing.fake_workflows_fixtures import get_fake_workflow_path diff --git a/src/vibe_server/tests/test_workflow_state.py b/src/vibe_server/tests/test_workflow_state.py index 0f1816c5..393067d4 100644 --- a/src/vibe_server/tests/test_workflow_state.py +++ b/src/vibe_server/tests/test_workflow_state.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from collections import Counter from dataclasses import asdict from datetime import datetime diff --git a/src/vibe_server/vibe_server/__init__.py b/src/vibe_server/vibe_server/__init__.py index e69de29b..b7c52582 100644 --- a/src/vibe_server/vibe_server/__init__.py +++ b/src/vibe_server/vibe_server/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + diff --git a/src/vibe_server/vibe_server/href_handler.py b/src/vibe_server/vibe_server/href_handler.py index 5968dad0..bdc99c88 100644 --- a/src/vibe_server/vibe_server/href_handler.py +++ b/src/vibe_server/vibe_server/href_handler.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from abc import ABC, abstractmethod from pathlib import Path from typing import List, Optional, Union diff --git a/src/vibe_server/vibe_server/orchestrator.py b/src/vibe_server/vibe_server/orchestrator.py index c04fac33..8480a0b7 100755 --- a/src/vibe_server/vibe_server/orchestrator.py +++ b/src/vibe_server/vibe_server/orchestrator.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio import asyncio.queues import logging diff --git a/src/vibe_server/vibe_server/server.py b/src/vibe_server/vibe_server/server.py index dfae3d1b..75dbe2a5 100644 --- a/src/vibe_server/vibe_server/server.py +++ b/src/vibe_server/vibe_server/server.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio import logging import os diff --git a/src/vibe_server/vibe_server/sniffer.py b/src/vibe_server/vibe_server/sniffer.py index 3c78e97d..75d6d664 100644 --- a/src/vibe_server/vibe_server/sniffer.py +++ b/src/vibe_server/vibe_server/sniffer.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import argparse import logging from typing import List diff --git a/src/vibe_server/vibe_server/workflow/__init__.py b/src/vibe_server/vibe_server/workflow/__init__.py index 9d4053ad..b2f600bf 100644 --- a/src/vibe_server/vibe_server/workflow/__init__.py +++ b/src/vibe_server/vibe_server/workflow/__init__.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import re from typing import Any, Dict, List, Union diff --git a/src/vibe_server/vibe_server/workflow/description_validator.py b/src/vibe_server/vibe_server/workflow/description_validator.py index 1d026655..bcff4b70 100644 --- a/src/vibe_server/vibe_server/workflow/description_validator.py +++ b/src/vibe_server/vibe_server/workflow/description_validator.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from typing import Any, Dict, List, Tuple, Union from .parameter import ParameterResolver diff --git a/src/vibe_server/vibe_server/workflow/graph.py b/src/vibe_server/vibe_server/workflow/graph.py index 6b6f107b..1edd98da 100644 --- a/src/vibe_server/vibe_server/workflow/graph.py +++ b/src/vibe_server/vibe_server/workflow/graph.py @@ -1,4 +1,7 @@ #!/usr/bin/env python +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from collections import defaultdict from enum import IntEnum diff --git a/src/vibe_server/vibe_server/workflow/input_handler.py b/src/vibe_server/vibe_server/workflow/input_handler.py index 0b52ca6b..6f5e2d3b 100644 --- a/src/vibe_server/vibe_server/workflow/input_handler.py +++ b/src/vibe_server/vibe_server/workflow/input_handler.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging from typing import ( Any, diff --git a/src/vibe_server/vibe_server/workflow/parameter.py b/src/vibe_server/vibe_server/workflow/parameter.py index 4ec98eef..7a0478d9 100644 --- a/src/vibe_server/vibe_server/workflow/parameter.py +++ b/src/vibe_server/vibe_server/workflow/parameter.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from collections import defaultdict from typing import Any, Dict, List, Optional, Tuple, Union, cast diff --git a/src/vibe_server/vibe_server/workflow/runner/__init__.py b/src/vibe_server/vibe_server/workflow/runner/__init__.py index 4bb598e2..44601725 100644 --- a/src/vibe_server/vibe_server/workflow/runner/__init__.py +++ b/src/vibe_server/vibe_server/workflow/runner/__init__.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from .remote_runner import RemoteWorkflowRunner from .runner import NoOpStateChange, WorkflowCallback, WorkflowChange, WorkflowRunner diff --git a/src/vibe_server/vibe_server/workflow/runner/remote_runner.py b/src/vibe_server/vibe_server/workflow/runner/remote_runner.py index 3240a673..51ee3efb 100644 --- a/src/vibe_server/vibe_server/workflow/runner/remote_runner.py +++ b/src/vibe_server/vibe_server/workflow/runner/remote_runner.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio import asyncio.queues import logging diff --git a/src/vibe_server/vibe_server/workflow/runner/runner.py b/src/vibe_server/vibe_server/workflow/runner/runner.py index 3d9dc949..9443cf2a 100644 --- a/src/vibe_server/vibe_server/workflow/runner/runner.py +++ b/src/vibe_server/vibe_server/workflow/runner/runner.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import asyncio import gc import logging diff --git a/src/vibe_server/vibe_server/workflow/runner/task_io_handler.py b/src/vibe_server/vibe_server/workflow/runner/task_io_handler.py index 3a82522c..de1a8a2a 100644 --- a/src/vibe_server/vibe_server/workflow/runner/task_io_handler.py +++ b/src/vibe_server/vibe_server/workflow/runner/task_io_handler.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + from copy import copy from typing import Dict, List diff --git a/src/vibe_server/vibe_server/workflow/spec_parser.py b/src/vibe_server/vibe_server/workflow/spec_parser.py index 76015dc6..4cc634ac 100644 --- a/src/vibe_server/vibe_server/workflow/spec_parser.py +++ b/src/vibe_server/vibe_server/workflow/spec_parser.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import os import re from copy import deepcopy diff --git a/src/vibe_server/vibe_server/workflow/spec_validator.py b/src/vibe_server/vibe_server/workflow/spec_validator.py index 3c0f5a95..d8a812f6 100644 --- a/src/vibe_server/vibe_server/workflow/spec_validator.py +++ b/src/vibe_server/vibe_server/workflow/spec_validator.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import warnings from typing import List diff --git a/src/vibe_server/vibe_server/workflow/workflow.py b/src/vibe_server/vibe_server/workflow/workflow.py index ba704aac..a4398b89 100644 --- a/src/vibe_server/vibe_server/workflow/workflow.py +++ b/src/vibe_server/vibe_server/workflow/workflow.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + import logging import os import re From ab1f8b205fb309c38387adb68a5de456630d3eb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roberto=20de=20Moura=20Estev=C3=A3o=20Filho?= Date: Fri, 2 Aug 2024 17:34:35 -0300 Subject: [PATCH 11/13] Add workflow to build base images (#189) Add workflow to build and push base images, and then update tags on relevant files. --- .github/workflows/base-build.yml | 64 +++++++++++++++++++++++++++ .github/workflows/release-to-main.yml | 1 - 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/base-build.yml diff --git a/.github/workflows/base-build.yml b/.github/workflows/base-build.yml new file mode 100644 index 00000000..87b4178e --- /dev/null +++ b/.github/workflows/base-build.yml @@ -0,0 +1,64 @@ +name: Build base images +on: + workflow_dispatch: + +permissions: + id-token: write + contents: write + +jobs: + + build-and-push: + runs-on: ubuntu-latest + environment: build + strategy: + matrix: + include: + - dockerfile: Dockerfile-services-base + image-name: services-base + - dockerfile: Dockerfile-worker-base + image-name: worker-base + steps: + - uses: actions/checkout@v4 + with: + ref: dev + - name: 'Az CLI login' + uses: azure/login@v1 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + - name: 'Build and push image' + run: | + az acr login -n ${{ secrets.ACR_NAME }} + VERSION_TAG=${{ github.run_id }} + export IMAGE_TAG=${{ secrets.ACR_NAME }}.azurecr.io/unlisted/farmai/terravibes/${{ matrix.image-name }}:$VERSION_TAG + docker build . -f ./resources/docker/${{ matrix.dockerfile }} -t $IMAGE_TAG + docker push $IMAGE_TAG + update-tags: + needs: build-and-push + runs-on: ubuntu-latest + environment: build + steps: + - uses: actions/checkout@v4 + with: + ref: dev + ssh-key: ${{ secrets.WORKFLOW_KEY }} + - name: 'Update tags' + run: | + TAG=${{ github.run_id }} + sed -i "s|\(\s\+image.*:\).*|\1${TAG}|" ./.github/workflows/lint-test.yml + sed -i "s|\(FROM.*:\).*|\1${TAG}|" ./resources/docker/Dockerfile-api_orchestrator + sed -i "s|\(FROM.*:\).*|\1${TAG}|" ./resources/docker/Dockerfile-cache + sed -i "s|\(FROM.*:\).*|\1${TAG}|" ./resources/docker/Dockerfile-worker + sed -i "s|\(FROM.*:\).*|\1${TAG}|" ./resources/docker/Dockerfile-devcontainer + - name: Commit changes + run: | + git config --global user.email "farmvibesaicd@microsoft.com" + git config --global user.name "FarmVibes.AI Release Pipeline" + BRANCH=update-base-${{ github.run_id }} + git checkout -b $BRANCH + git add ./.github/workflows/lint-test.yml + git add ./resources/docker/ + git commit -m "Update base tag to latest image" + git push --set-upstream origin $BRANCH diff --git a/.github/workflows/release-to-main.yml b/.github/workflows/release-to-main.yml index 3636d74d..c3e27eda 100644 --- a/.github/workflows/release-to-main.yml +++ b/.github/workflows/release-to-main.yml @@ -5,7 +5,6 @@ on: permissions: id-token: write contents: write - pull-requests: write jobs: release: From 98f3f71ecd4d113af6610c8ca908f666da8583f3 Mon Sep 17 00:00:00 2001 From: Rafael Soares Padilha Date: Mon, 25 Nov 2024 10:23:18 -0300 Subject: [PATCH 12/13] Restoring files with Git LFS (#200) If the user does not have [Git LFS](https://git-lfs.com/) installed when cloning the repo, some of the large files in the repository will not be available. This causes some workflows to break, due to reading one of these missing files. This PR adds Git LFS as one of the requirements for setting up the repo, and update the Makefile to pull files when building the cluster with local files (`make local` and `make local-worker`). --------- Co-authored-by: Renato L. de F. Cunha --- Makefile | 7 +++++-- docs/source/docfiles/markdown/QUICKSTART.md | 17 +++++++++++++++++ .../source/docfiles/markdown/TROUBLESHOOTING.md | 9 +++++++++ resources/vm/setup_farmvibes_ai_vm.sh | 5 +++++ 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3ac73b4b..71e3ae9a 100644 --- a/Makefile +++ b/Makefile @@ -43,12 +43,15 @@ help: ## Shows this help message @echo -e This is the farmvibes.ai makefile. Supported targets are:\\n @grep -E -h '\s##\s' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' -local: cluster local-rest-api local-cache local-worker local-orchestrator local-data-ops ## Builds all images locally and deploys them into the local farmvibes.ai cluster +local: cluster restore-git-lfs local-rest-api local-cache local-worker local-orchestrator local-data-ops ## Builds all images locally and deploys them into the local farmvibes.ai cluster [ -z $(WAIT_AT_THE_END) ] || kubectl delete pods -l backend=terravibes && \ kubectl wait --for=condition=Available deployment --timeout=300s -l backend=terravibes revert: cluster revert-rest-api revert-cache revert-worker revert-orchestrator ## Reverts all images to the official version +restore-git-lfs: +git lfs pull || echo "git lfs was not found. Please see https://git-lfs.com/ to install it." && exit 1 + services-base: resources/docker/Dockerfile-services-base @docker manifest inspect `$(subst FILE,$<,$(base_image_name))` || \ az acr login -n `$(subst FILE,$<,$(base_image_name)) | cut -d / -f 1 | sed 's|.azurecr.io||g'` || \ @@ -122,7 +125,7 @@ revert-data-ops: cluster repo-$(DATA_OPS_REPO) delete-$(DATA_OPS_DEPLOYMENT) ## DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(CONTAINER_REGISTRY_BASE)/$(DATA_OPS_REPO):$(FARMVIBES_AI_IMAGE_TAG) $(MAKE) set-registry-image DEPLOYMENT=$(DATA_OPS_DEPLOYMENT) REPLICAS=$(CURRENT_DATA_OPS_REPLICAS) make scale -local-worker: cluster local-worker-repo delete-$(WORKER_DEPLOYMENT) ## Builds and deploys a local WORKER image (enabling debug) +local-worker: cluster restore-git-lfs local-worker-repo delete-$(WORKER_DEPLOYMENT) ## Builds and deploys a local WORKER image (enabling debug) DEPLOYMENT=$(WORKER_DEPLOYMENT) IMAGE_FULL_REFERENCE=$(WORKER_REPO):$(TAG) $(MAKE) -C . set-registry-image DEPLOYMENT=$(WORKER_DEPLOYMENT) $(MAKE) -C . disable-frozen-modules DEPLOYMENT=$(WORKER_DEPLOYMENT) REPLICAS=$(CURRENT_WORKER_REPLICAS) make scale diff --git a/docs/source/docfiles/markdown/QUICKSTART.md b/docs/source/docfiles/markdown/QUICKSTART.md index 6963a861..878f3b27 100644 --- a/docs/source/docfiles/markdown/QUICKSTART.md +++ b/docs/source/docfiles/markdown/QUICKSTART.md @@ -22,6 +22,9 @@ In order to run FarmVibes.AI cluster, you need the following: the repository. If you already have access to the source code, then Git is not required. + * [Git LFS](https://git-lfs.com/) to restore some of the large files in the + repository (e.g., model weights). + * [Docker](https://docs.docker.com/engine/install/ubuntu/). Make sure you can run the docker client without running `sudo` by adding your user account to the `docker` group (which might require a logout/login when adding oneself @@ -60,6 +63,20 @@ bash ./resources/vm/setup_farmvibes_ai_vm.sh You might needed to restart your shell session once the script finishes. +## Restore files with Git LFS + +In case you did not have Git LFS installed when cloning the repository, you will need to do so +to restore the large files in the repository. Note that the last step +["Installing software dependencies](#optional-installing-software-dependencies) already installs +Git LFS. + +To restore the missing files, you can run the following command in the root of the repository: + +```shell +git lfs install +git lfs pull +``` + ## Install the FarmVibes.AI cluster With python3.8+ and pip installed on your machine, please install diff --git a/docs/source/docfiles/markdown/TROUBLESHOOTING.md b/docs/source/docfiles/markdown/TROUBLESHOOTING.md index d3e54963..ca53bfde 100644 --- a/docs/source/docfiles/markdown/TROUBLESHOOTING.md +++ b/docs/source/docfiles/markdown/TROUBLESHOOTING.md @@ -122,6 +122,15 @@ that are currently being addressed by the development team. +
+ Updating cluster in the `dev` branch after pulling files with Git LFS + + If you did not have Git LFS installed when cloning the repository and checking out to `dev`, + you will be missing some of the large files in the repository (e.g., ONNX models). Make sure + to install and setup Git LFS as described in the [Quickstart guide](QUICKSTART.md#restore-files-with-git-lfs). + You will also need to update your cluster with `make local`. +
+
- **Composing and running workflows:** diff --git a/resources/vm/setup_farmvibes_ai_vm.sh b/resources/vm/setup_farmvibes_ai_vm.sh index 762fa3b6..21d014ca 100755 --- a/resources/vm/setup_farmvibes_ai_vm.sh +++ b/resources/vm/setup_farmvibes_ai_vm.sh @@ -48,3 +48,8 @@ fi # Run docker without sudo sudo usermod -aG docker $DOCKER_USER + +# Run git-lfs install to restore large files +sudo apt install git-lfs -y +git lfs install +git lfs pull \ No newline at end of file From 60770faedd86cb9117987b1d10474f4caac9098e Mon Sep 17 00:00:00 2001 From: Rafael Soares Padilha Date: Wed, 27 Nov 2024 10:13:26 -0300 Subject: [PATCH 13/13] Updating url for bitnami charts repo, redis and rabbitmq image tags (#207) This PR fixes an issue in which cluster setup would fail with an `Error: could not download chart: invalid_reference: invalid tag` error message when installing rabbitmq. This is related to [this issue in bitnami repo](https://github.com/bitnami/charts/issues/30582), and fixes issue #203. This PR also updates the redis and rabbitmq image tags to the latest versions to ensure compatibility with the latest dependencies and security patches. --- src/vibe_core/vibe_core/cli/constants.py | 4 ++-- .../vibe_core/terraform/aks/modules/kubernetes/rabbitmq.tf | 2 +- .../vibe_core/terraform/aks/modules/kubernetes/redis.tf | 2 +- .../vibe_core/terraform/local/modules/kubernetes/rabbitmq.tf | 2 +- .../vibe_core/terraform/local/modules/kubernetes/redis.tf | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/vibe_core/vibe_core/cli/constants.py b/src/vibe_core/vibe_core/cli/constants.py index 09d9651a..080a1f83 100644 --- a/src/vibe_core/vibe_core/cli/constants.py +++ b/src/vibe_core/vibe_core/cli/constants.py @@ -14,5 +14,5 @@ # Local constants ONNX_SUBDIR = "onnx_resources" FARMVIBES_AI_LOG_LEVEL = "DEBUG" -REDIS_IMAGE_TAG = "7.0.4-debian-11-r11" -RABBITMQ_IMAGE_TAG = "3.10.8-debian-11-r4" +REDIS_IMAGE_TAG = "7.4.1-debian-12-r2" +RABBITMQ_IMAGE_TAG = "4.0.4-debian-12-r1" diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/rabbitmq.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/rabbitmq.tf index 0c1ecbe0..c76010fc 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/rabbitmq.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/rabbitmq.tf @@ -4,7 +4,7 @@ resource "helm_release" "rabbitmq" { name = "rabbitmq" - repository = "https://charts.bitnami.com/bitnami" + repository = "oci://registry-1.docker.io/bitnamicharts" chart = "rabbitmq" namespace = var.namespace diff --git a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/redis.tf b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/redis.tf index d35ff912..936a0669 100644 --- a/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/redis.tf +++ b/src/vibe_core/vibe_core/terraform/aks/modules/kubernetes/redis.tf @@ -4,7 +4,7 @@ resource "helm_release" "redis" { name = "redis" - repository = "https://charts.bitnami.com/bitnami" + repository = "oci://registry-1.docker.io/bitnamicharts" chart = "redis" namespace = var.namespace diff --git a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/rabbitmq.tf b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/rabbitmq.tf index 35cccd5d..7fd77839 100644 --- a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/rabbitmq.tf +++ b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/rabbitmq.tf @@ -4,7 +4,7 @@ resource "helm_release" "rabbitmq" { name = "rabbitmq" - repository = "https://charts.bitnami.com/bitnami" + repository = "oci://registry-1.docker.io/bitnamicharts" chart = "rabbitmq" namespace = var.namespace diff --git a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/redis.tf b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/redis.tf index da2d11e3..812012cb 100644 --- a/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/redis.tf +++ b/src/vibe_core/vibe_core/terraform/local/modules/kubernetes/redis.tf @@ -4,7 +4,7 @@ resource "helm_release" "redis" { name = "redis" - repository = "https://charts.bitnami.com/bitnami" + repository = "oci://registry-1.docker.io/bitnamicharts" chart = "redis" namespace = var.namespace