Skip to content

Commit

Permalink
Refactorisation et harmonisation des points d'entrée (#82)
Browse files Browse the repository at this point in the history
* first commit argo

* cross_product

* crossproduct ok en local

* ok print crossproduct in first function

* crossproduct json

* fix json format (json.dumps)

* json dump

* try raw string

* try to change items to item.format

* try json simple array

* try json simple array

* try seconds

* remplace dag by tasks in withParams

* crossproduct change sys to print

* silent output bash &> /dev/null

* silent pip install output --quiet

* item & notes

* item & notes

* one parameter for each json key

* utilise image cartiflette

* plus besoin du pip install requirements.txt

* test

* update test

* pyopen

* docker

* update

* update

* change extension

* sudo

* argo workflow

* pipeline step

* docker prod

* still pending

* update

* update

* update

* update

* complete

* test

* arg

* link

* argo pipeline

* update

* update

* update

* temp

* update

* docker

* update

* update volume

* adapting pipeline

* split tiles

* update

* add script

* update

* update

* print

* change local path

* script COMMUNE_ARRONDISSEMENT

* add log

* logger

* restriction to one field

* test subset

* simplification

* update

* simplfie

* pipeline using environment variables

* updatre

* add region and bassin_vie

* split

* modularise

* Full pipeline

* Full pipeline

* README

* temp

* Localdata path

* mount

* update

* force

* Refactorisation

* update

* download

* depreciation warning

* preprod

* clean

* update

* refactor and first draft API

---------

Co-authored-by: qchicherybercy <quentin.chichery@finances.gouv.fr>
  • Loading branch information
linogaliana and qchicherybercy authored Jan 17, 2024
1 parent f0a3594 commit 9a0b970
Show file tree
Hide file tree
Showing 28 changed files with 257 additions and 640 deletions.
58 changes: 58 additions & 0 deletions argo-pipeline/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""A simple API to expose cartiflette files"""
import typing
from fastapi import FastAPI, Response
from fastapi.responses import FileResponse
from cartiflette.api import download_from_cartiflette_inner

app = FastAPI(
title="API de récupération des fonds de carte avec <code>cartiflette</code>",
description="<br><br><img src=\"https://github.com/InseeFrLab/cartiflette/raw/main/cartiflette.png\" width=\"200\">"
)


@app.get("/", tags=["Welcome"])
def show_welcome_page():
"""
Show welcome page with model name and version.
"""

return {
"Message": "API cartiflette",
"Documentation": 'https://github.com/InseeFrLab/cartiflette'
}


@app.get("/json", tags=["Output a JSON object"])
def download_from_cartiflette_api(
values: typing.List[typing.Union[str, int, float]] = "11",
borders: str = "DEPARTEMENT",
filter_by: str = "REGION",
simplification: typing.Union[str, int, float] = None
) -> str:
"""
"""

geojsons = download_from_cartiflette_inner(
values=values,
borders=borders,
filter_by=filter_by,
territory="metropole",
vectorfile_format="topojson",
year=2022,
crs=4326,
simplification=simplification,
provider="IGN",
dataset_family="ADMINEXPRESS",
source="EXPRESS-COG-CARTO-TERRITOIRE",
return_as_json=False,
path_within_bucket="test/preprod",
)

geojson_dict = geojsons.to_json()

return Response(
geojson_dict,
media_type="application/json"
)


2 changes: 1 addition & 1 deletion argo-pipeline/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ spec:
mountPath: /mnt
env: &env_parameters
- name: PATH_WRITING_S3
value: "production"
value: "test/preprod"
- name: PYTHONPATH
value: "${PYTHONPATH}:/mnt/bin"
- name: LOCAL_DATA_PATH
Expand Down
4 changes: 2 additions & 2 deletions argo-pipeline/src/duplicate_in_bucket.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import argparse
import os

from cartiflette import BUCKET, PATH_WITHIN_BUCKET, FS
from cartiflette.config import BUCKET, PATH_WITHIN_BUCKET, FS
from cartiflette.utils import create_path_bucket
from cartiflette.pipeline.combine_adminexpress_france import (
combine_adminexpress_territory,
Expand Down Expand Up @@ -33,7 +33,7 @@


def main(path_within_bucket, localpath, bucket=BUCKET, year=year):

path_combined_files = combine_adminexpress_territory(
path_within_bucket=path_within_bucket,
intermediate_dir=localpath
Expand Down
2 changes: 1 addition & 1 deletion argo-pipeline/src/split_merge_tiles.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import argparse
from cartiflette import PATH_WITHIN_BUCKET
from cartiflette.config import PATH_WITHIN_BUCKET
from cartiflette.pipeline import (
mapshaperize_split_from_s3,
mapshaperize_merge_split_from_s3,
Expand Down
17 changes: 5 additions & 12 deletions cartiflette/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
from cartiflette.config import (
BUCKET,
PATH_WITHIN_BUCKET,
ENDPOINT_URL,
FS,
THREADS_DOWNLOAD,
LEAVE_TQDM,
)
from cartiflette.constants import REFERENCES, DOWNLOAD_PIPELINE_ARGS
from cartiflette.utils import *
from cartiflette.download import *
from cartiflette.s3 import *
from cartiflette.api import download_from_cartiflette_inner

carti_download = download_from_cartiflette_inner

__all__ = ["carti_download"]
5 changes: 5 additions & 0 deletions cartiflette/api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# -*- coding: utf-8 -*-

from .output import download_from_cartiflette_inner

__all__ = ["download_from_cartiflette_inner"]
134 changes: 80 additions & 54 deletions cartiflette/public/output.py → cartiflette/api/output.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,74 @@
# -*- coding: utf-8 -*-
from datetime import date
import geopandas as gpd
import logging
import os
import s3fs
import shutil
import tempfile
import logging
import typing
import s3fs
import geopandas as gpd

import cartiflette
from cartiflette.download.scraper import MasterScraper
from cartiflette.utils import create_path_bucket, standardize_inputs
from cartiflette.config import BUCKET, PATH_WITHIN_BUCKET, FS

logger = logging.getLogger(__name__)


def download_from_cartiflette(
def download_from_cartiflette_inner(
values: typing.List[typing.Union[str, int, float]],
bucket: str = cartiflette.BUCKET,
path_within_bucket: str = cartiflette.PATH_WITHIN_BUCKET,
provider: str = "IGN",
dataset_family: str = "ADMINEXPRESS",
source: str = "EXPRESS-COG-TERRITOIRE",
vectorfile_format: str = "geojson",
borders: str = "COMMUNE",
filter_by: str = "region",
territory: str = "metropole",
vectorfile_format: str = "geojson",
year: typing.Union[str, int, float] = None,
crs: typing.Union[list, str, int, float] = 2154,
simplification: typing.Union[str, int, float] = None,
) -> gpd.GeoDataFrame:
bucket: str = BUCKET,
path_within_bucket: str = PATH_WITHIN_BUCKET,
provider: str = "IGN",
dataset_family: str = "ADMINEXPRESS",
source: str = "EXPRESS-COG-TERRITOIRE",
return_as_json: bool = False
) -> typing.Union[gpd.GeoDataFrame, str]:
"""
Downloads GeoDataFrames from the Cartiflette service for specified values.
Downloads and aggregates official geographic datasets using the Cartiflette API
for a set of specified values.
Optionally returns the data as a JSON string.
This function is useful for downloading and concatenating data related to different regions,
communes, etc., into a single GeoDataFrame or JSON string.
Parameters:
- values (List[Union[str, int, float]]): A list of values to use in the 'value' parameter
when calling download_from_cartiflette_single for each iteration.
- bucket (str): The name of the S3 bucket.
- path_within_bucket (str): The path within the S3 bucket where the datasets are stored.
- provider (str): The data provider (default is "IGN").
- dataset_family (str): The dataset family (default is "ADMINEXPRESS").
- source (str): The data source (default is "EXPRESS-COG-TERRITOIRE").
- vectorfile_format (str): The file format for vector files (default is "geojson").
- borders (str): The type of borders (default is "COMMUNE").
- filter_by (str): The parameter to filter by (default is "region").
- territory (str): The territory (default is "metropole").
- year (Union[str, int, float]): The year of the dataset
(default is None, which uses the current year).
- crs (Union[list, str, int, float]): The coordinate reference system (default is 2154).
- simplification (Union[str, int, float]): The simplification parameter (default is None).
- values (List[Union[str, int, float]]):
A list of values to filter data by the filter_by parameter.
- borders (str, optional):
The type of borders (default is "COMMUNE").
- filter_by (str, optional):
The parameter to filter by (default is "region").
- territory (str, optional):
The territory (default is "metropole").
- vectorfile_format (str, optional):
The file format for vector files (default is "geojson").
- year (Union[str, int, float], optional):
The year of the dataset. Defaults to the current year if not provided.
- crs (Union[list, str, int, float], optional):
The coordinate reference system (default is 2154).
- simplification (Union[str, int, float], optional):
The simplification parameter (default is None).
- bucket, path_within_bucket, provider, dataset_family, source:
Other parameters required for accessing the Cartiflette API.
- return_as_json (bool, optional):
If True, the function returns a JSON string representation of the aggregated GeoDataFrame.
If False, it returns a GeoDataFrame. Default is False.
Returns:
- gpd.GeoDataFrame: A GeoDataFrame containing concatenated data from the Cartiflette service
for the specified values.
- Union[gpd.GeoDataFrame, str]:
A GeoDataFrame containing concatenated data from the
specified parameters if return_as_json is False.
A JSON string representation of the GeoDataFrame
if return_as_json is True.
"""

# Initialize an empty list to store individual GeoDataFrames
Expand All @@ -62,9 +78,12 @@ def download_from_cartiflette(
if not year:
year = str(date.today().year)

# Iterate over values and call download_from_cartiflette_single
if isinstance(values, (str, int)):
values = [values]

# Iterate over values
for value in values:
gdf_single = download_from_cartiflette_single(
gdf_single = download_cartiflette_single(
value=value,
bucket=bucket,
path_within_bucket=path_within_bucket,
Expand All @@ -84,12 +103,15 @@ def download_from_cartiflette(
# Concatenate the list of GeoDataFrames into a single GeoDataFrame
concatenated_gdf = gpd.pd.concat(gdf_list, ignore_index=True)

if return_as_json is True:
return concatenated_gdf.to_json()

return concatenated_gdf


def download_from_cartiflette_single(
bucket: str = cartiflette.BUCKET,
path_within_bucket: str = cartiflette.PATH_WITHIN_BUCKET,
def download_cartiflette_single(
bucket: str = BUCKET,
path_within_bucket: str = PATH_WITHIN_BUCKET,
provider: str = "IGN",
dataset_family: str = "ADMINEXPRESS",
source: str = "EXPRESS-COG-TERRITOIRE",
Expand Down Expand Up @@ -136,9 +158,12 @@ def download_from_cartiflette_single(
return gdf


# ---------------------


def download_vectorfile_single(
bucket: str = cartiflette.BUCKET,
path_within_bucket: str = cartiflette.PATH_WITHIN_BUCKET,
bucket: str = BUCKET,
path_within_bucket: str = PATH_WITHIN_BUCKET,
provider: str = "IGN",
dataset_family: str = "ADMINEXPRESS",
source: str = "EXPRESS-COG-TERRITOIRE",
Expand All @@ -151,7 +176,7 @@ def download_vectorfile_single(
crs: typing.Union[list, str, int, float] = 2154,
simplification: typing.Union[str, int, float] = None,
type_download: str = "https",
fs: s3fs.S3FileSystem = cartiflette.FS,
fs: s3fs.S3FileSystem = FS,
*args,
**kwargs,
) -> gpd.GeoDataFrame:
Expand All @@ -163,7 +188,7 @@ def download_vectorfile_single(
----------
bucket : str, optional
The name of the bucket where the file is stored. The default is
cartiflette.BUCKET.
cartiflette.config.BUCKET.
path_within_bucket : str, optional
The path within the bucket where the file will be stored. The default
is cartiflette.PATH_WITHIN_BUCKET.
Expand Down Expand Up @@ -249,23 +274,24 @@ def download_vectorfile_single(
)

if type_download == "bucket":

try:
fs.exists(url)
except Exception:
raise IOError(f"File has not been found at path {url} on S3")
else:
if format_read == "shp":
tdir = tempfile.TemporaryDirectory()
files = fs.ls(url)
for remote_file in files:
local_path = f"{tdir.name}/{remote_file.replace(url, '')}"
fs.download(remote_file, local_path)
local_path = f"{tdir.name}/raw.shp"

else:
tfile = tempfile.TemporaryFile()
local_path = tfile.name
if format_read == "shp":
tdir = tempfile.TemporaryDirectory()
files = fs.ls(url)
for remote_file in files:
local_path = f"{tdir.name}/{remote_file.replace(url, '')}"
fs.download(remote_file, local_path)
local_path = f"{tdir.name}/raw.shp"

else:
tfile = tempfile.TemporaryFile()
local_path = tfile.name
fs.download(remote_file, local_path)

else:
with MasterScraper(*args, **kwargs) as s:
Expand Down Expand Up @@ -307,8 +333,8 @@ def download_vectorfile_single(


def download_vectorfile_multiple(
bucket: str = cartiflette.BUCKET,
path_within_bucket: str = cartiflette.PATH_WITHIN_BUCKET,
bucket: str = BUCKET,
path_within_bucket: str = PATH_WITHIN_BUCKET,
provider: str = "IGN",
source: str = "EXPRESS-COG-TERRITOIRE",
vectorfile_format: str = "geojson",
Expand All @@ -318,7 +344,7 @@ def download_vectorfile_multiple(
values: typing.Union[list, str, int, float] = "28",
crs: typing.Union[list, str, int, float] = 2154,
type_download: str = "https",
fs: s3fs.S3FileSystem = cartiflette.FS,
fs: s3fs.S3FileSystem = FS,
*args,
**kwargs,
) -> gpd.GeoDataFrame:
Expand All @@ -331,7 +357,7 @@ def download_vectorfile_multiple(
----------
bucket : str, optional
The name of the bucket where the file is stored. The default is
cartiflette.BUCKET.
cartiflette.config.BUCKET.
path_within_bucket : str, optional
The path within the bucket where the file will be stored. The default
is cartiflette.PATH_WITHIN_BUCKET.
Expand Down
8 changes: 4 additions & 4 deletions cartiflette/download/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import zipfile

from cartiflette.utils import import_yaml_config, hash_file, deep_dict_update
import cartiflette
from cartiflette.config import BUCKET, PATH_WITHIN_BUCKET, FS

logger = logging.getLogger(__name__)

Expand All @@ -35,9 +35,9 @@ def __init__(
year: int = None,
provider: str = "IGN",
territory: str = None,
bucket: str = cartiflette.BUCKET,
path_within_bucket: str = cartiflette.PATH_WITHIN_BUCKET,
fs: s3fs.S3FileSystem = cartiflette.FS,
bucket: str = BUCKET,
path_within_bucket: str = PATH_WITHIN_BUCKET,
fs: s3fs.S3FileSystem = FS,
):
"""
Initialize a Dataset object.
Expand Down
2 changes: 1 addition & 1 deletion cartiflette/download/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import traceback
from typing import Union

from cartiflette import BUCKET, PATH_WITHIN_BUCKET, FS, THREADS_DOWNLOAD
from cartiflette.config import BUCKET, PATH_WITHIN_BUCKET, FS, THREADS_DOWNLOAD
from cartiflette.utils import (
deep_dict_update,
create_path_bucket,
Expand Down
2 changes: 1 addition & 1 deletion cartiflette/download/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from shapely.geometry import box

from cartiflette.download.dataset import Dataset
from cartiflette import REFERENCES
from cartiflette.constants import REFERENCES

logger = logging.getLogger(__name__)

Expand Down
Loading

0 comments on commit 9a0b970

Please sign in to comment.