Skip to content

Commit e417fa9

Browse files
committed
Modularisation sous forme de pipeline
1 parent 93f9fbe commit e417fa9

13 files changed

+562
-224
lines changed

cartiflette/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@
1010
from cartiflette.utils import *
1111
from cartiflette.download import *
1212
from cartiflette.s3 import *
13+
from cartiflette.pipeline import *
14+
from cartiflette.mapshaper import *

cartiflette/mapshaper/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .mapshaper_split import *
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import subprocess
2+
3+
4+
5+
DICT_CORRESP_IGN = {"REGION": "INSEE_REG", "DEPARTEMENT": "INSEE_DEP"}
6+
7+
8+
def mapshaperize_split(
9+
local_dir="temp",
10+
filename_initial="COMMUNE",
11+
extension_initial="shp",
12+
format_output="topojson",
13+
niveau_agreg="DEPARTEMENT",
14+
provider="IGN",
15+
source="EXPRESS-COG-CARTO-TERRITOIRE",
16+
year=2022,
17+
dataset_family="ADMINEXPRESS",
18+
territory="metropole",
19+
crs=4326,
20+
simplification=0,
21+
dict_corresp=DICT_CORRESP_IGN
22+
):
23+
"""
24+
Processes shapefiles and splits them based on specified parameters using Mapshaper.
25+
26+
Parameters
27+
----------
28+
local_dir : str, optional
29+
The local directory for file storage, by default "temp".
30+
filename_initial : str, optional
31+
The initial filename, by default "COMMUNE".
32+
extension_initial : str, optional
33+
The initial file extension, by default "shp".
34+
format_output : str, optional
35+
The output format, by default "topojson".
36+
niveau_agreg : str, optional
37+
The level of aggregation for the split, by default "DEPARTEMENT".
38+
provider : str, optional
39+
The data provider, by default "IGN".
40+
source : str, optional
41+
The data source, by default "EXPRESS-COG-CARTO-TERRITOIRE".
42+
year : int, optional
43+
The year of the data, by default 2022.
44+
dataset_family : str, optional
45+
The dataset family, by default "ADMINEXPRESS".
46+
territory : str, optional
47+
The territory of the data, by default "metropole".
48+
crs : int, optional
49+
The coordinate reference system (CRS) code, by default 4326.
50+
simplification : int, optional
51+
The degree of simplification, by default 0.
52+
dict_corresp: dict
53+
A dictionary giving correspondance between niveau_agreg argument
54+
and variable names.
55+
56+
Returns
57+
-------
58+
str
59+
The output path of the processed and split shapefiles.
60+
61+
"""
62+
63+
simplification_percent = simplification if simplification is not None else 0
64+
65+
output_path = f"{local_dir}/{niveau_agreg}/{format_output}/{simplification=}"
66+
67+
subprocess.run(
68+
(
69+
f"mapshaper {local_dir}/{filename_initial}.{extension_initial} name='' -proj EPSG:{crs} "
70+
f"-simplify {simplification_percent}% "
71+
f"-each \"SOURCE='{provider}:{source[0]}'\" "
72+
f"-split {dict_corresp[niveau_agreg]} "
73+
f"-o {output_path} format={format_output} extension=\".{format_output}\" singles"
74+
),
75+
shell=True
76+
)
77+
78+
return output_path

cartiflette/pipeline/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from .cross_product_parameters import (
2+
restructure_nested_dict_borders,
3+
crossproduct_parameters_production
4+
)
5+
6+
from .prepare_mapshaper import prepare_local_directory_mapshaper
7+
from .mapshaper_split_from_s3 import mapshaperize_split_from_s3
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import itertools
2+
import pandas as pd
3+
4+
5+
def restructure_nested_dict_borders(dict_with_list: dict):
6+
"""
7+
Restructures a nested dictionary by flattening its values and their corresponding keys.
8+
9+
Parameters:
10+
-----------
11+
dict_with_list : dict
12+
A dictionary with list values to be restructured.
13+
14+
Returns:
15+
--------
16+
list
17+
A list of lists containing key-value pairs obtained by flattening the input dictionary.
18+
19+
Example:
20+
--------
21+
Example usage:
22+
sample_dict = {'a': [1, 2, 3], 'b': [4, 5]}
23+
result = restructure_nested_dict_borders(sample_dict)
24+
print(result)
25+
26+
This will output:
27+
[['a', 1], ['a', 2], ['a', 3], ['b', 4], ['b', 5]]
28+
"""
29+
croisement_filter_by_borders_flat = [
30+
[key, inner_value]
31+
for key, values in dict_with_list.items()
32+
for inner_value in values
33+
]
34+
35+
return croisement_filter_by_borders_flat
36+
37+
import itertools
38+
import pandas as pd
39+
40+
def crossproduct_parameters_production(
41+
croisement_filter_by_borders: dict,
42+
list_format: list,
43+
years: list,
44+
crs_list: list,
45+
sources: list,
46+
simplifications: list
47+
) -> pd.DataFrame:
48+
"""
49+
Generates a DataFrame by performing a cross-product of the given parameters.
50+
51+
Parameters:
52+
-----------
53+
croisement_filter_by_borders : dict
54+
A dictionary with nested lists for cross-product generation.
55+
list_format : list
56+
A list of formats for cross-product generation.
57+
years : list
58+
A list of years for cross-product generation.
59+
crs_list : list
60+
A list of CRS (Coordinate Reference Systems) for cross-product generation.
61+
sources : list
62+
A list of sources for cross-product generation.
63+
simplifications : list
64+
A list of simplifications for cross-product generation.
65+
66+
Returns:
67+
--------
68+
pd.DataFrame
69+
A pandas DataFrame containing the cross-product of the input parameters.
70+
71+
Example:
72+
--------
73+
Example usage:
74+
sample_dict = {'a': [1, 2, 3], 'b': [4, 5]}
75+
formats = ['geojson', 'gpkg']
76+
years = [2022, 2022]
77+
crs_list = [4326, 2154]
78+
sources = ['source1', 'source2']
79+
simplifications = [0, 40]
80+
result = crossproduct_parameters_production(
81+
sample_dict, formats, years, crs_list, sources, simplifications
82+
)
83+
print(result)
84+
85+
This will output:
86+
A pandas DataFrame with the cross-product of the provided parameters.
87+
"""
88+
croisement_filter_by_borders_flat = restructure_nested_dict_borders(
89+
croisement_filter_by_borders
90+
)
91+
92+
combinations = list(
93+
itertools.product(
94+
list_format,
95+
croisement_filter_by_borders_flat,
96+
years,
97+
crs_list,
98+
sources,
99+
simplifications
100+
)
101+
)
102+
103+
tempdf = pd.DataFrame(
104+
combinations,
105+
columns=["format", "nested", "year", "crs", "source", "simplification"]
106+
)
107+
tempdf["borders"] = tempdf["nested"].apply(lambda l: l[0])
108+
tempdf["filter_by"] = tempdf["nested"].apply(lambda l: l[1])
109+
tempdf.drop("nested", axis="columns", inplace=True)
110+
111+
return tempdf
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
2+
import os
3+
4+
from cartiflette.config import BUCKET, PATH_WITHIN_BUCKET, FS
5+
from cartiflette.utils import create_path_bucket
6+
from cartiflette.mapshaper import mapshaperize_split
7+
from .prepare_mapshaper import prepare_local_directory_mapshaper
8+
9+
def mapshaperize_split_from_s3(
10+
path_bucket,
11+
config,
12+
fs=FS
13+
):
14+
15+
format_output = config.get("format_output", "topojson")
16+
filter_by = config.get("filter_by", "DEPARTEMENT")
17+
borders = config.get("borders", "COMMUNE")
18+
territory = config.get("territory", "metropole")
19+
20+
provider = config.get("provider", "IGN")
21+
source = config.get("source", "EXPRESS-COG-CARTO-TERRITOIRE")
22+
year = config.get("year", 2022)
23+
dataset_family = config.get("dataset_family", "ADMINEXPRESS")
24+
territory = config.get("territory", "metropole")
25+
crs = config.get("crs", 4326)
26+
simplification = config.get("simplification", 0)
27+
28+
bucket = config.get("bucket", BUCKET)
29+
path_within_bucket = config.get("path_within_bucket", PATH_WITHIN_BUCKET)
30+
local_dir = config.get("local_dir", "temp")
31+
32+
local_directories = prepare_local_directory_mapshaper(
33+
path_bucket,
34+
borders=borders,
35+
niveau_agreg=filter_by,
36+
format_output=format_output,
37+
simplification=simplification,
38+
local_dir=local_dir,
39+
fs=fs
40+
)
41+
42+
output_path = mapshaperize_split(
43+
local_dir=local_dir,
44+
filename_initial=borders,
45+
extension_initial="shp",
46+
format_output=format_output,
47+
niveau_agreg=filter_by,
48+
provider=provider,
49+
source=source,
50+
year=year,
51+
dataset_family=dataset_family,
52+
territory=territory,
53+
crs=crs,
54+
simplification=simplification
55+
)
56+
57+
for values in os.listdir(output_path):
58+
path_s3 = create_path_bucket(
59+
{
60+
"bucket": bucket,
61+
"path_within_bucket": path_within_bucket,
62+
"year": year,
63+
"borders": borders,
64+
"crs": crs,
65+
"filter_by": filter_by,
66+
"value": values.replace(f".{format_output}", ""),
67+
"vectorfile_format": format_output,
68+
"provider": provider,
69+
"dataset_family": dataset_family,
70+
"source": source,
71+
"territory": territory,
72+
"simplification": simplification
73+
})
74+
fs.put(f"{output_path}/{values}", path_s3)
75+
76+
77+
return output_path
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import os
2+
3+
from cartiflette.config import FS
4+
from cartiflette.s3 import list_raw_files_level, download_files_from_list
5+
6+
7+
def prepare_local_directory_mapshaper(
8+
path_bucket,
9+
borders="COMMUNE",
10+
niveau_agreg="DEPARTEMENT",
11+
format_output="topojson",
12+
simplification=0,
13+
local_dir="temp",
14+
fs=FS,
15+
):
16+
"""
17+
Prepares the local directory for processing with Mapshaper.
18+
19+
This function creates a local directory structure and downloads
20+
raw shapefiles from the specified path in the file system.
21+
22+
Parameters
23+
----------
24+
path_bucket : str
25+
The path to the bucket in the file system.
26+
borders : str, optional
27+
The type of borders, by default "COMMUNE".
28+
niveau_agreg : str, optional
29+
The level of aggregation, by default "DEPARTEMENT".
30+
format_output : str, optional
31+
The output format, by default "topojson".
32+
simplification : int, optional
33+
The degree of simplification, by default 0.
34+
local_dir : str, optional
35+
The local directory for file storage, by default "temp".
36+
fs : FileSystem, optional
37+
The file system object, by default fs.
38+
39+
Returns
40+
-------
41+
dict
42+
A dictionary containing paths for the original and destination directories.
43+
44+
"""
45+
os.makedirs(local_dir, exist_ok=True)
46+
# Get all raw shapefiles from Minio
47+
list_raw_files = list_raw_files_level(fs, path_bucket, borders=borders)
48+
download_files_from_list(fs, list_raw_files)
49+
local_path_destination = f"{local_dir}/{niveau_agreg}/{format_output}/{simplification=}"
50+
os.makedirs(
51+
local_path_destination,
52+
exist_ok=True
53+
)
54+
paths = {
55+
"path_origin": local_dir, "path_destination": local_path_destination
56+
}
57+
return paths

cartiflette/s3/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,22 @@
1-
from cartiflette.s3.s3 import (
1+
from .s3 import (
22
write_vectorfile_s3_all,
33
write_vectorfile_s3_custom_arrondissement,
44
production_cartiflette,
55
list_produced_cartiflette,
66
write_cog_s3,
77
)
88

9+
from .upload_raw_s3 import *
10+
from .list_files_s3 import *
11+
12+
913
__all__ = [
1014
"write_vectorfile_s3_all",
1115
"write_vectorfile_s3_custom_arrondissement",
1216
"production_cartiflette",
1317
"list_produced_cartiflette",
18+
"upload_s3_raw",
19+
"download_files_from_list",
20+
"list_raw_files_level",
1421
"write_cog_s3",
1522
]

0 commit comments

Comments
 (0)