diff --git a/argo-pipeline/pipeline.yaml b/argo-pipeline/pipeline.yaml index edc32ae..e6ecbff 100644 --- a/argo-pipeline/pipeline.yaml +++ b/argo-pipeline/pipeline.yaml @@ -110,7 +110,7 @@ spec: command: [sh, -c] args: ["mkdir -p /mnt/bin/src ; mv /mnt/bin/argo-pipeline/src/* /mnt/bin/src ; - echo $PATH_WRITING_S3; + echo $ENVIRONMENT; "] volumeMounts: - name: volume-workflow-tmp @@ -134,11 +134,10 @@ spec: value: minio.lab.sspcloud.fr - name: MC_HOST_s3 value: https://$AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY@$AWS_S3_ENDPOINT - - name: PATH_WRITING_S3 - value: "test" - name: ENVIRONMENT - # set value to "dev" to simplify pipeline execution (2 years, only topojson, etc.), use "preprod" or "prod" else - value: dev + # set value to "test" to simplify pipeline execution (2 years, only topojson, etc.), use "preprod" or "prod" else + # -> this will also configure the path_within_bucket constant + value: test - name: download-all-sources outputs: @@ -150,7 +149,7 @@ spec: image: inseefrlab/cartiflette:latest command: [sh, -c] args: [" - python /mnt/bin/src/download_all_sources.py --path $PATH_WRITING_S3; + python /mnt/bin/src/download_all_sources.py; "] volumeMounts: - name: volume-workflow-tmp @@ -196,7 +195,7 @@ spec: - name: volume-workflow-tmp mountPath: /mnt args: [" - python /mnt/bin/src/make_geodata_datasets.py --path $PATH_WRITING_S3 --year '{{inputs.parameters.year}}'; + python /mnt/bin/src/make_geodata_datasets.py --year '{{inputs.parameters.year}}'; "] env: *env_parameters @@ -216,7 +215,7 @@ spec: - name: volume-workflow-tmp mountPath: /mnt args: [" - python /mnt/bin/src/make_metadata_datasets.py --path $PATH_WRITING_S3 --years '{{inputs.parameters.years}}'; + python /mnt/bin/src/make_metadata_datasets.py --years '{{inputs.parameters.years}}'; "] env: *env_parameters diff --git a/argo-pipeline/src/catalog.py b/argo-pipeline/src/catalog.py index 73cc7fc..1b91bfe 100644 --- a/argo-pipeline/src/catalog.py +++ b/argo-pipeline/src/catalog.py @@ -5,7 +5,6 @@ Create cartiflette's catalog """ -import json import logging from s3fs import S3FileSystem diff --git a/argo-pipeline/src/download_all_sources.py b/argo-pipeline/src/download_all_sources.py index dcffd2a..8bbc61c 100644 --- a/argo-pipeline/src/download_all_sources.py +++ b/argo-pipeline/src/download_all_sources.py @@ -41,9 +41,6 @@ parser = argparse.ArgumentParser( description="Run Cartiflette pipeline download script." ) -parser.add_argument( - "-p", "--path", help="Path within bucket", default=PATH_WITHIN_BUCKET -) default_years = ",".join(str(x) for x in range(2020, date.today().year + 1)) parser.add_argument( @@ -63,12 +60,13 @@ args = parser.parse_args() bucket = BUCKET -path_within_bucket = args.path years = args.years skip = args.skip -if os.environ.get("ENVIRONMENT", None) == "dev": - logging.warning("dev environment -> restrict download to 2023 & 2024 only") +if os.environ.get("ENVIRONMENT", None) == "test": + logging.warning( + "test environment -> restrict download to 2023 & 2024 only" + ) years = "2023,2024" if years: @@ -80,7 +78,7 @@ try: if not skip: results = download_all( - bucket, path_within_bucket, fs=fs, upload=True, years=years + bucket, PATH_WITHIN_BUCKET, fs=fs, upload=True, years=years ) else: results = dict() diff --git a/argo-pipeline/src/filter_vintages_operationnal.py b/argo-pipeline/src/filter_vintages_operationnal.py index 8993408..f7daf74 100644 --- a/argo-pipeline/src/filter_vintages_operationnal.py +++ b/argo-pipeline/src/filter_vintages_operationnal.py @@ -57,8 +57,8 @@ years = sorted(list(years_geodatasets | years_metadata)) -if os.environ.get("ENVIRONMENT", None) == "dev": - logging.warning("dev environment -> restrict generation to 2023, 2024 ") +if os.environ.get("ENVIRONMENT", None) == "test": + logging.warning("test environment -> restrict generation to 2023, 2024 ") years = [2023, 2024] logger.info( diff --git a/argo-pipeline/src/make_geodata_datasets.py b/argo-pipeline/src/make_geodata_datasets.py index c458436..77bc38c 100644 --- a/argo-pipeline/src/make_geodata_datasets.py +++ b/argo-pipeline/src/make_geodata_datasets.py @@ -37,10 +37,6 @@ parser = argparse.ArgumentParser( description="Preprocess geodatasets from raw sources" ) -parser.add_argument( - "-p", "--path", help="Path within bucket", default=PATH_WITHIN_BUCKET -) - parser.add_argument( "-y", "--year", help="Vintage to perform computation on", default="2023" ) @@ -54,7 +50,6 @@ # Parse arguments args = parser.parse_args() -path_within_bucket = args.path year = args.year simplifications = args.simplify @@ -63,10 +58,10 @@ def main( - path_within_bucket, simplifications: List[int], bucket=BUCKET, year: int = None, + path_within_bucket: str = PATH_WITHIN_BUCKET, ): created = create_one_year_geodataset_batch( @@ -90,4 +85,4 @@ def main( if __name__ == "__main__": - data = main(path_within_bucket, simplifications=simplifications, year=year) + data = main(simplifications=simplifications, year=year) diff --git a/argo-pipeline/src/make_metadata_datasets.py b/argo-pipeline/src/make_metadata_datasets.py index 0796bf6..bd0079c 100644 --- a/argo-pipeline/src/make_metadata_datasets.py +++ b/argo-pipeline/src/make_metadata_datasets.py @@ -29,9 +29,6 @@ parser = argparse.ArgumentParser( description="Preprocess metadata from raw sources" ) -parser.add_argument( - "-p", "--path", help="Path within bucket", default=PATH_WITHIN_BUCKET -) parser.add_argument( "-y", "--years", help="Vintage to perform computation on", default="[]" @@ -41,7 +38,6 @@ args = parser.parse_args() bucket = BUCKET -path_within_bucket = args.path years = args.years years = json.loads(years) @@ -50,8 +46,8 @@ def main( - path_within_bucket, - bucket=BUCKET, + path_within_bucket: str = PATH_WITHIN_BUCKET, + bucket: str = BUCKET, years: int = None, ): @@ -125,4 +121,4 @@ def main( if __name__ == "__main__": - data = main(path_within_bucket, years=years) + data = main(years=years) diff --git a/argo-pipeline/src/select_downstream_vintage_to_process.py b/argo-pipeline/src/select_downstream_vintage_to_process.py index 871505c..b29ed0f 100644 --- a/argo-pipeline/src/select_downstream_vintage_to_process.py +++ b/argo-pipeline/src/select_downstream_vintage_to_process.py @@ -42,8 +42,8 @@ # {"IGN": {"ADMINEXPRESS": {"EXPRESS-COG-TERRITOIRE": {"guadeloupe": {"2024": {"downloaded": true, "paths": {"COMMUNE": ["projet-cartiflette/test/provider=IGN/dataset_family=ADMINEXPRESS/source=EXPRESS-COG-TERRITOIRE/year=2024/administrative_level=None/crs=5490/origin=raw/vectorfile_format=shp/territory=guadeloupe/simplification=0/COMMUNE.shp"]}}}, "martinique": {"2024": {"downloaded": true, "paths": {"COMMUNE": ["projet-cartiflette/test/provider=IGN/dataset_family=ADMINEXPRESS/source=EXPRESS-COG-TERRITOIRE/year=2024/administrative_level=None/crs=5490/origin=raw/vectorfile_format=shp/territory=martinique/simplification=0/COMMUNE.shp"]}}}, "guyane": {"2024": {"downloaded": true, "paths": {"COMMUNE": ["projet-cartiflette/test/provider=IGN/dataset_family=ADMINEXPRESS/source=EXPRESS-COG-TERRITOIRE/year=2024/administrative_level=None/crs=2972/origin=raw/vectorfile_format=shp/territory=guyane/simplification=0/COMMUNE.shp"]}}}, "reunion": {"2024": {"downloaded": true, "paths": {"COMMUNE": ["projet-cartiflette/test/provider=IGN/dataset_family=ADMINEXPRESS/source=EXPRESS-COG-TERRITOIRE/year=2024/administrative_level=None/crs=2975/origin=raw/vectorfile_format=shp/territory=reunion/simplification=0/COMMUNE.shp"]}}}, "mayotte": {"2024": {"downloaded": true, "paths": {"COMMUNE": ["projet-cartiflette/test/provider=IGN/dataset_family=ADMINEXPRESS/source=EXPRESS-COG-TERRITOIRE/year=2024/administrative_level=None/crs=4326/origin=raw/vectorfile_format=shp/territory=mayotte/simplification=0/COMMUNE.shp"]}}}, "metropole": {"2024": {"downloaded": true, "paths": {"COMMUNE": ["projet-cartiflette/test/provider=IGN/dataset_family=ADMINEXPRESS/source=EXPRESS-COG-TERRITOIRE/year=2024/administrative_level=None/crs=2154/origin=raw/vectorfile_format=shp/territory=metropole/simplification=0/COMMUNE.shp"]}}}}}}, "Insee": {"COG": {"DEPARTEMENT": {"france_entiere": {"2024": {"downloaded": false, "paths": null}}}, "REGION": {"france_entiere": {"2024": {"downloaded": false, "paths": null}}}}, "TAGC": {"APPARTENANCE": {"france_entiere": {"2024": {"downloaded": true, "paths": {"table-appartenance-geo-communes-2024": ["projet-cartiflette/test/provider=Insee/dataset_family=TAGC/source=APPARTENANCE/year=2024/administrative_level=None/crs=None/origin=raw/vectorfile_format=xlsx/territory=france_entiere/simplification=0/table-appartenance-geo-communes-2024.xlsx"]}}}}}}} -if os.environ.get("ENVIRONMENT", None) == "dev": - logging.warning("dev environment -> force generation of only 2023 & 2024") +if os.environ.get("ENVIRONMENT", None) == "test": + logging.warning("test environment -> force generation of only 2023 & 2024") def store_to_json(name, years): @@ -55,7 +55,7 @@ def store_to_json(name, years): def filter_geodata(results): "filter the downloaded vintages of geodatasets" - if os.environ.get("ENVIRONMENT", None) == "dev": + if os.environ.get("ENVIRONMENT", None) == "test": return store_to_json("geodatasets_years.json", [2023, 2024]) years = set() @@ -81,7 +81,7 @@ def filter_geodata(results): def filter_metadata(results): "filter the downloaded vintages of metadatasets" - if os.environ.get("ENVIRONMENT", None) == "dev": + if os.environ.get("ENVIRONMENT", None) == "test": return store_to_json("metadata_years.json", [2023, 2024]) years = set() diff --git a/cartiflette/config.py b/cartiflette/config.py index 48024e5..3a60aa2 100644 --- a/cartiflette/config.py +++ b/cartiflette/config.py @@ -7,7 +7,7 @@ load_dotenv(override=True) BUCKET = "projet-cartiflette" -PATH_WITHIN_BUCKET = "test" +PATH_WITHIN_BUCKET = os.environ.get("ENVIRONMENT", "test") ENDPOINT_URL = "https://minio.lab.sspcloud.fr" kwargs = {} @@ -31,13 +31,13 @@ # PIPELINE CONFIG # ============================================================================= -# set to low resolution datasets for dev environment, high for anything else +# set to low resolution datasets for test environment, high for anything else INTERMEDIATE_FORMAT = "geojson" -DATASETS_HIGH_RESOLUTION = os.environ.get("ENVIRONMENT", "dev") != "dev" -MAPSHAPER_QUIET = os.environ.get("ENVIRONMENT", "dev") != "dev" +DATASETS_HIGH_RESOLUTION = os.environ.get("ENVIRONMENT", "test") != "test" +MAPSHAPER_QUIET = os.environ.get("ENVIRONMENT", "test") != "test" if not DATASETS_HIGH_RESOLUTION: warnings.warn( - "cartiflette is running with dev configuration, using only low " + "cartiflette is running with test configuration, using only low " "resolution datasets" ) diff --git a/cartiflette/pipeline_constants.py b/cartiflette/pipeline_constants.py index 7885178..e3347d8 100644 --- a/cartiflette/pipeline_constants.py +++ b/cartiflette/pipeline_constants.py @@ -79,7 +79,7 @@ "POPULATION-COM": ["Insee", "POPULATION", "POPULATION-IRIS-COM"], } -if os.environ.get("ENVIRONMENT", "dev") != "dev": +if os.environ.get("ENVIRONMENT", "test") != "test": PIPELINE_CRS = [2154, 4326, 3857] PIPELINE_SIMPLIFICATION_LEVELS = [100, 40] PIPELINE_FORMATS = ["geojson", "topojson", "gpkg"]