diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml index 9b9dcba0..2df0dd2c 100644 --- a/.github/workflows/cicd.yaml +++ b/.github/workflows/cicd.yaml @@ -12,105 +12,104 @@ on: - main - staging-* env: - nexus_server: 10.128.81.69:8082 + nexus_server: docker-registry.ign.fr jobs: build_test_deploy: runs-on: self-hosted steps: - - - name: Checkout branch - uses: actions/checkout@v2 - - - name: replace BD_UNI credentials - run: | - cp configs/bd_uni_connection_params/credentials_template.yaml configs/bd_uni_connection_params/credentials.yaml - sed -i '/user:/c\user: invite' configs/bd_uni_connection_params/credentials.yaml - sed -i '/pwd:/c\pwd: ${{ secrets.PASSWORD_BD_UNI }}' configs/bd_uni_connection_params/credentials.yaml - - - name: build docker image - run: docker build -t lidar_prod . - - - name: Check code neatness (linter) - run: docker run lidar_prod flake8 - - - name: Run tests & get coverage - fast ones go first. - run: > - docker run --network host - lidar_prod - python -m - pytest -rA -v -m "not slow" --ignore=actions-runner - - - name: Run slow tests last (evaluation on large file) - run: > - docker run --network host - -v /var/data/cicd/CICD_github_assets/M11.1/inputs/evaluation/:/lidar/tests/files/large/ - lidar_prod - python -m - pytest -rA -v -m "slow" --ignore=actions-runner --no-cov - - - name: Test building module from CLI on a LAS subset. - run: > - docker run --network host - -v /var/data/cicd/CICD_github_assets/M11.1/inputs/:/inputs/ - -v /var/data/cicd/CICD_github_assets/M11.1/outputs/:/outputs/ - lidar_prod - python - lidar_prod/run.py - +task=apply_on_building - paths.src_las=/inputs/Semis_2021_0937_6537_LA93_IGN69.150mx100m.for_full_building_module.las - paths.output_dir=/outputs/ - - - name: Test vegetation/unclassified detection from CLI on a LAS subset. - run: > - docker run - -v /var/data/cicd/CICD_github_assets/M11.1/inputs/:/inputs/ - -v /var/data/cicd/CICD_github_assets/M11.1/outputs/:/outputs/ - lidar_prod - python - lidar_prod/run.py - +task=identify_vegetation_unclassified - data_format=vegetation_unclassified.yaml - paths.src_las=/inputs/888000_6614000.subset.las - paths.output_dir=/outputs/ - - - name: Tag the docker image with branch name - if: github.event_name == 'push' - run: | - docker tag lidar_prod:latest lidar_prod:${{github.ref_name}} - docker run lidar_prod:${{github.ref_name}} bash # Dry run image so that is it not prunned - # docker save lidar_prod:${{github.ref_name}} -o /var/data/cicd/CICD_github_assets/CICD_docker_images/lidar_prod_${{github.ref_name}}.tar # This needs writing rights to the mounted path - - # get version number and date, to tag the image pushed to nexus - - name: Get version number - id: tag - run: | - echo "::set-output name=version::$(docker run lidar_prod grep '__version__' package_metadata.yaml| cut -d\" -f2)" - echo "::set-output name=date::$(date '+%Y.%m.%d')" - - # show possible tags, for debugging purpose - - name: Print tag - run: | - echo "${{steps.tag.outputs.version}}" - echo "${{steps.tag.outputs.date}}" - - - name: push main docker on nexus (tagged with a date) - # we push on nexus an image from the main branch when it has been updated (push or accepted pull request) - if: ((github.ref_name == 'main') && (github.event_name == 'push')) - run: | - docker tag lidar_prod $nexus_server/lidar_hd/lidar_prod:${{steps.tag.outputs.version}}-${{steps.tag.outputs.date}} - docker login $nexus_server --username svc_lidarhd --password ${{ secrets.PASSWORD_SVC_LIDARHD }} - docker push $nexus_server/lidar_hd/lidar_prod:${{steps.tag.outputs.version}}-${{steps.tag.outputs.date}} - - - name: push branch docker on nexus (tagged with the branch name) - # we push on nexus an image from a branch when it's pushed - if: ((github.event_name == 'push') && (github.ref_name != 'main')) - run: | - docker tag lidar_prod $nexus_server/lidar_hd/lidar_prod:${{steps.tag.outputs.version}}-${{github.ref_name}} - docker login $nexus_server --username svc_lidarhd --password ${{ secrets.PASSWORD_SVC_LIDARHD }} - docker push $nexus_server/lidar_hd/lidar_prod:${{steps.tag.outputs.version}}-${{github.ref_name}} - - - name: Clean dangling docker images - if: always() # always do it, even if something failed - run: docker system prune --force # remove dangling docker images, without asking user for confirmation + - name: Checkout branch + uses: actions/checkout@v4 + + - name: replace BD_UNI credentials + run: | + cp configs/bd_uni_connection_params/credentials_template.yaml configs/bd_uni_connection_params/credentials.yaml + sed -i '/user:/c\user: invite' configs/bd_uni_connection_params/credentials.yaml + sed -i '/pwd:/c\pwd: ${{ secrets.PASSWORD_BD_UNI }}' configs/bd_uni_connection_params/credentials.yaml + + - name: build docker image + run: docker build --build-arg http_proxy=http://proxy.ign.fr:3128/ --build-arg https_proxy=http://proxy.ign.fr:3128/ -t lidar_prod . + + - name: Check code neatness (linter) + run: docker run lidar_prod flake8 + + - name: Run tests & get coverage - fast ones go first. + run: > + docker run --network host + lidar_prod + python -m + pytest -rA -vv -m "not slow" --ignore=actions-runner + + - name: Run slow tests last (evaluation on large file) + run: > + docker run --network host + -v /var/data/cicd/CICD_github_assets/M11.1/inputs/evaluation/:/lidar/tests/files/large/ + lidar_prod + python -m + pytest -rA -v -m "slow" --ignore=actions-runner --no-cov + + - name: Test building module from CLI on a LAS subset. + run: > + docker run --network host + -v /var/data/cicd/CICD_github_assets/M11.1/inputs/:/inputs/ + -v /var/data/cicd/CICD_github_assets/M11.1/outputs/:/outputs/ + lidar_prod + python + lidar_prod/run.py + +task=apply_on_building + paths.src_las=/inputs/Semis_2021_0937_6537_LA93_IGN69.150mx100m.for_full_building_module.las + paths.output_dir=/outputs/ + + - name: Test vegetation/unclassified detection from CLI on a LAS subset. + run: > + docker run + -v /var/data/cicd/CICD_github_assets/M11.1/inputs/:/inputs/ + -v /var/data/cicd/CICD_github_assets/M11.1/outputs/:/outputs/ + lidar_prod + python + lidar_prod/run.py + +task=identify_vegetation_unclassified + data_format=vegetation_unclassified.yaml + paths.src_las=/inputs/888000_6614000.subset.las + paths.output_dir=/outputs/ + + - name: Tag the docker image with branch name + if: github.event_name == 'push' + run: | + docker tag lidar_prod:latest lidar_prod:${{github.ref_name}} + docker run lidar_prod:${{github.ref_name}} bash # Dry run image so that is it not prunned + # docker save lidar_prod:${{github.ref_name}} -o /var/data/cicd/CICD_github_assets/CICD_docker_images/lidar_prod_${{github.ref_name}}.tar # This needs writing rights to the mounted path + + # get version number and date, to tag the image pushed to nexus + - name: Get version number + id: tag + run: | + echo "::set-output name=version::$(docker run lidar_prod grep '__version__' package_metadata.yaml| cut -d\" -f2)" + echo "::set-output name=date::$(date '+%Y.%m.%d')" + + # show possible tags, for debugging purpose + - name: Print tag + run: | + echo "${{steps.tag.outputs.version}}" + echo "${{steps.tag.outputs.date}}" + + - name: push main docker on nexus (tagged with a date) + # we push on nexus an image from the main branch when it has been updated (push or accepted pull request) + if: ((github.ref_name == 'main') && (github.event_name == 'push')) + run: | + docker tag lidar_prod $nexus_server/lidar_hd/lidar_prod:${{steps.tag.outputs.version}}-${{steps.tag.outputs.date}} + docker login $nexus_server --username svc_lidarhd --password ${{ secrets.PASSWORD_SVC_LIDARHD }} + docker push $nexus_server/lidar_hd/lidar_prod:${{steps.tag.outputs.version}}-${{steps.tag.outputs.date}} + + - name: push branch docker on nexus (tagged with the branch name) + # we push on nexus an image from a branch when it's pushed + if: ((github.event_name == 'push') && (github.ref_name != 'main')) + run: | + docker tag lidar_prod $nexus_server/lidar_hd/lidar_prod:${{steps.tag.outputs.version}}-${{github.ref_name}} + docker login $nexus_server --username svc_lidarhd --password ${{ secrets.PASSWORD_SVC_LIDARHD }} + docker push $nexus_server/lidar_hd/lidar_prod:${{steps.tag.outputs.version}}-${{github.ref_name}} + + - name: Clean dangling docker images + if: always() # always do it, even if something failed + run: docker system prune --force # remove dangling docker images, without asking user for confirmation diff --git a/.github/workflows/gh-pages.yaml b/.github/workflows/gh-pages.yaml index 6dcf4bef..fbe2eb84 100644 --- a/.github/workflows/gh-pages.yaml +++ b/.github/workflows/gh-pages.yaml @@ -5,10 +5,9 @@ name: "Documentation Build" on: push: branches: - - main # <- only on main branch + - main # <- only on main branch jobs: - build-and-deploy: runs-on: ubuntu-latest @@ -20,7 +19,7 @@ jobs: steps: # Checkout the repository - name: "Checkout" - uses: actions/checkout@v2 + uses: actions/checkout@v4 # See https://github.com/conda-incubator/setup-miniconda#caching-environments @@ -28,12 +27,12 @@ jobs: - name: Setup a conda-incubator with an empty conda env uses: conda-incubator/setup-miniconda@v2 with: - python-version: 3.9.12 - miniforge-variant: Mambaforge - miniforge-version: latest - use-mamba: true - # Environment to create and activate for next steps - activate-environment: lidar_prod + python-version: 3.9.12 + miniforge-variant: Mambaforge + miniforge-version: latest + use-mamba: true + # Environment to create and activate for next steps + activate-environment: lidar_prod # Cache the env # See https://github.com/conda-incubator/setup-miniconda#caching-environments @@ -66,5 +65,5 @@ jobs: - name: "Deploy Github Pages" uses: JamesIves/github-pages-deploy-action@3.7.1 with: - BRANCH: gh-pages # <- Branch where generated doc files will be commited - FOLDER: ./docs/build/html/ # <- Dir where .nojekyll is created and from which to deploy github pages. + BRANCH: gh-pages # <- Branch where generated doc files will be commited + FOLDER: ./docs/build/html/ # <- Dir where .nojekyll is created and from which to deploy github pages. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dad27b33..407ae4ee 100755 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.6.0 hooks: # list of supported hooks: https://pre-commit.com/hooks.html - id: trailing-whitespace @@ -12,26 +12,26 @@ repos: # python code formatting - repo: https://github.com/psf/black - rev: 20.8b1 + rev: 24.3.0 hooks: - id: black args: [--line-length, "99"] # python import sorting - repo: https://github.com/PyCQA/isort - rev: 5.8.0 + rev: 5.13.2 hooks: - id: isort # yaml formatting - repo: https://github.com/pre-commit/mirrors-prettier - rev: v2.3.0 + rev: v4.0.0-alpha.8 hooks: - id: prettier types: [yaml] # python code analysis - repo: https://github.com/PyCQA/flake8 - rev: 3.9.2 + rev: 7.0.0 hooks: - id: flake8 diff --git a/CHANGELOG.md b/CHANGELOG.md index fcdb01e1..b0e57d4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # main +## 1.10.0 +- Add support for EPSG reference other than 2154 + ### 1.9.14 - Be robust to pgsql2shp warnings when dealing with empty tables (i;e. no buildings). diff --git a/Dockerfile b/Dockerfile index 5f8ba5eb..b0258a64 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,12 @@ -FROM mambaorg/micromamba:latest - -# set the IGN proxy, otherwise apt-get and other applications don't work -# from within our self-hoster action runner -ENV http_proxy 'http://192.168.4.9:3128/' -ENV https_proxy 'http://192.168.4.9:3128/' +# Fix mambaorg/micromamba tag (lastest was not updated on the ci computer) +FROM mambaorg/micromamba:bookworm-slim # all the apt-get installs USER root -RUN apt-get update && apt-get upgrade -y && apt-get install -y \ - software-properties-common \ - wget \ - git \ - postgis + +RUN apt-get update \ + && apt-get upgrade -y \ + && apt-get install -y postgis>=3.3.0 # Only copy necessary files to set up the environment, in order # to use docker caching if requirements files were not updated. @@ -20,7 +15,7 @@ WORKDIR /tmp COPY ./setup_env/ . # install the python packages via anaconda -RUN micromamba create --yes --file /tmp/requirements.yml +RUN micromamba create --file /tmp/requirements.yml # Sets the environment name (since it is not named "base") # This ensures that env is activated when using "docker run ..." @@ -33,7 +28,7 @@ RUN micromamba list RUN echo "Make sure pdal is installed:" RUN python -c "import pdal" -# /lidar becomes the working directory, where the repo content +# /lidar becomes the working directory, where the repo content # (the context of this Dockerfile) is copied. WORKDIR /lidar COPY . . diff --git a/configs/data_format/default.yaml b/configs/data_format/default.yaml index 27e5779f..179400b0 100644 --- a/configs/data_format/default.yaml +++ b/configs/data_format/default.yaml @@ -1,3 +1,6 @@ +# EPSG code to override the las spatial reference +epsg: 2154 + # Those names connect the logics between successive tasks las_dimensions: # input @@ -18,7 +21,7 @@ las_dimensions: # Intermediary channels cluster_id: ClusterID # pdal-defined -> created by clustering operations uni_db_overlay: BDTopoOverlay # user-defined -> a 0/1 flag for presence of a BDUni vector - candidate_buildings_flag: F_CandidateB # -> a 0/1 flag identifying candidate buildings found by rule- based classification + candidate_buildings_flag: F_CandidateB # -> a 0/1 flag identifying candidate buildings found by rule- based classification ClusterID_candidate_building: CID_CandidateB # -> Cluster index from BuildingValidator, 0 if no cluster, 1-n otherwise ClusterID_confirmed_or_high_proba: CID_IsolatedOrConfirmed # -> Cluster index from BuildingCompletor, 0 if no cluster, 1-n otherwise completion_non_candidate_flag: F_NonCandidateCompletion # --> a 0/1 flag for non candidates points with high proba and close to confirmed buildings diff --git a/configs/data_format/vegetation_unclassified.yaml b/configs/data_format/vegetation_unclassified.yaml index 26cc131f..e03e52df 100644 --- a/configs/data_format/vegetation_unclassified.yaml +++ b/configs/data_format/vegetation_unclassified.yaml @@ -1,3 +1,6 @@ +# EPSG code to override the las spatial reference +epsg: 2154 + # Those names connect the logics between successive tasks las_dimensions: # input @@ -14,7 +17,7 @@ las_dimensions: # Intermediary channels cluster_id: ClusterID # pdal-defined -> created by clustering operations uni_db_overlay: BDTopoOverlay # user-defined -> a 0/1 flag for presence of a BDUni vector - candidate_buildings_flag: F_CandidateB # -> a 0/1 flag identifying candidate buildings found by rule- based classification + candidate_buildings_flag: F_CandidateB # -> a 0/1 flag identifying candidate buildings found by rule- based classification ClusterID_candidate_building: CID_CandidateB # -> Cluster index from BuildingValidator, 0 if no cluster, 1-n otherwise ClusterID_confirmed_or_high_proba: CID_IsolatedOrConfirmed # -> Cluster index from BuildingCompletor, 0 if no cluster, 1-n otherwise diff --git a/lidar_prod/application.py b/lidar_prod/application.py index 7c3a8f78..80aa797a 100644 --- a/lidar_prod/application.py +++ b/lidar_prod/application.py @@ -18,7 +18,7 @@ get_pipeline, request_bd_uni_for_building_shapefile, save_las_data_to_las, - BDUniConnectionParams + BDUniConnectionParams, ) log = logging.getLogger(__name__) @@ -54,7 +54,6 @@ def get_list_las_path_from_src(src_path: str): @commons.eval_time def identify_vegetation_unclassified(config, src_las_path: str, dest_las_path: str): - log.info(f"Identifying on {src_las_path}") data_format = config["data_format"] las_data = get_las_data_from_las(src_las_path) @@ -126,10 +125,12 @@ def apply_building_module( cl: Cleaner = hydra.utils.instantiate( config.data_format.cleaning.input_building ) - cl.run(src_las_path, tmp_las_path) + cl.run(src_las_path, tmp_las_path, config.data_format.epsg) # Validate buildings (unsure/confirmed/refuted) on a per-group basis. - bd_uni_connection_params: BDUniConnectionParams = hydra.utils.instantiate(config.bd_uni_connection_params) + bd_uni_connection_params: BDUniConnectionParams = hydra.utils.instantiate( + config.bd_uni_connection_params + ) bv = BuildingValidator( shp_path=config.building_validation.application.shp_path, @@ -138,7 +139,7 @@ def apply_building_module( bd_uni_request=config.building_validation.application.bd_uni_request, data_format=config.building_validation.application.data_format, thresholds=config.building_validation.application.thresholds, - use_final_classification_codes=config.building_validation.application.use_final_classification_codes + use_final_classification_codes=config.building_validation.application.use_final_classification_codes, ) bv.run(tmp_las_path) @@ -154,7 +155,7 @@ def apply_building_module( cl: Cleaner = hydra.utils.instantiate( config.data_format.cleaning.output_building ) - cl.run(tmp_las_path, dest_las_path) + cl.run(tmp_las_path, dest_las_path, config.data_format.epsg) return dest_las_path @@ -176,7 +177,11 @@ def get_shapefile(config: DictConfig, src_las_path: str, dest_las_path: str): os.path.splitext(os.path.basename(src_las_path))[0] + ".shp", ), # new shapefile path get_integer_bbox( - get_pipeline(src_las_path), + get_pipeline( + src_las_path, + config.data_format.epsg, + ), buffer=config.building_validation.application.bd_uni_request.buffer, ), # bbox + config.data_format.epsg, ) diff --git a/lidar_prod/tasks/building_completion.py b/lidar_prod/tasks/building_completion.py index d9ffe673..71fef362 100644 --- a/lidar_prod/tasks/building_completion.py +++ b/lidar_prod/tasks/building_completion.py @@ -51,8 +51,10 @@ def run(self, input_values: Union[str, pdal.pipeline.Pipeline]): str: returns `target_las_path` for potential terminal piping. """ - log.info("Completion of building with relatively distant points that have high enough probability") - pipeline = get_pipeline(input_values) + log.info( + "Completion of building with relatively distant points that have high enough probability" + ) + pipeline = get_pipeline(input_values, self.data_format.epsg) self.prepare_for_building_completion(pipeline) self.update_classification() diff --git a/lidar_prod/tasks/building_identification.py b/lidar_prod/tasks/building_identification.py index 16ce39b6..1e12eee8 100644 --- a/lidar_prod/tasks/building_identification.py +++ b/lidar_prod/tasks/building_identification.py @@ -48,10 +48,12 @@ def run( _completion_flag = self.data_format.las_dimensions.completion_non_candidate_flag log.info("Clustering of points with high building proba.") - pipeline = get_pipeline(input_values) + pipeline = get_pipeline(input_values, self.data_format.epsg) # Considered for identification: - non_candidates = f"({self.data_format.las_dimensions.candidate_buildings_flag} == 0)" + non_candidates = ( + f"({self.data_format.las_dimensions.candidate_buildings_flag} == 0)" + ) not_already_confirmed = f"({self.data_format.las_dimensions.classification} != {self.data_format.codes.building.final.building})" not_a_potential_completion = f"({_completion_flag} != 1)" p_heq_threshold = f"(building>={self.min_building_proba})" @@ -63,11 +65,17 @@ def run( where=where, ) # Increment ClusterID, so that points from building completion can become cluster 1 - pipeline |= pdal.Filter.assign(value=f"{_cid} = {_cid} + 1", where=f"{_cid} != 0") - pipeline |= pdal.Filter.assign(value=f"{_cid} = 1", where=f"{_completion_flag} == 1") + pipeline |= pdal.Filter.assign( + value=f"{_cid} = {_cid} + 1", where=f"{_cid} != 0" + ) + pipeline |= pdal.Filter.assign( + value=f"{_cid} = 1", where=f"{_completion_flag} == 1" + ) # Duplicate ClusterID to have an explicit name for it for inspection. # Do not reset it to zero to have access to it at human inspection stage. - pipeline |= pdal.Filter.ferry(dimensions=f"{_cid}=>{self.data_format.las_dimensions.ai_building_identified}") + pipeline |= pdal.Filter.ferry( + dimensions=f"{_cid}=>{self.data_format.las_dimensions.ai_building_identified}" + ) if target_las_path: pipeline |= get_pdal_writer(target_las_path) os.makedirs(osp.dirname(target_las_path), exist_ok=True) diff --git a/lidar_prod/tasks/building_validation.py b/lidar_prod/tasks/building_validation.py index 4411f7d9..9e2a34e7 100644 --- a/lidar_prod/tasks/building_validation.py +++ b/lidar_prod/tasks/building_validation.py @@ -13,7 +13,6 @@ from lidar_prod.tasks.utils import ( get_integer_bbox, - get_pdal_reader, get_pdal_writer, get_pipeline, request_bd_uni_for_building_shapefile, @@ -93,12 +92,12 @@ def run( str: returns `target_las_path` """ - self.pipeline = get_pipeline(input_values) + self.pipeline = get_pipeline(input_values, self.data_format.epsg) with TemporaryDirectory() as td: log.info( "Preparation : Clustering of candidates buildings & Import vectors" ) - if type(input_values) == str: + if isinstance(input_values, str): log.info(f"Applying Building Validation to file \n{input_values}") temp_f = osp.join(td, osp.basename(input_values)) else: @@ -142,7 +141,7 @@ def prepare( ) dim_overlay = self.data_format.las_dimensions.uni_db_overlay - self.pipeline = get_pipeline(input_values) + self.pipeline = get_pipeline(input_values, self.data_format.epsg) # Identify candidates buildings points with a boolean flag self.pipeline |= pdal.Filter.ferry(dimensions=f"=>{dim_candidate_flag}") _is_candidate_building = ( @@ -190,7 +189,7 @@ def prepare( _shp_p = os.path.join(temp_dirpath, "temp.shp") log.info("Request Bd Uni") buildings_in_bd_topo = request_bd_uni_for_building_shapefile( - self.bd_uni_connection_params, _shp_p, bbox + self.bd_uni_connection_params, _shp_p, bbox, self.data_format.epsg ) # Create overlay dim @@ -213,9 +212,7 @@ def prepare( def update(self, src_las_path: str = None, target_las_path: str = None) -> None: """Updates point cloud classification channel.""" if src_las_path: - self.pipeline = pdal.Pipeline() - self.pipeline |= get_pdal_reader(src_las_path) - self.pipeline.execute() + self.pipeline = get_pipeline(src_las_path, self.data_format.epsg) points = self.pipeline.arrays[0] diff --git a/lidar_prod/tasks/building_validation_optimization.py b/lidar_prod/tasks/building_validation_optimization.py index 2d959d03..f5185c5f 100644 --- a/lidar_prod/tasks/building_validation_optimization.py +++ b/lidar_prod/tasks/building_validation_optimization.py @@ -10,7 +10,6 @@ import numpy as np import optuna -import pdal from sklearn.metrics import confusion_matrix from tqdm import tqdm @@ -19,7 +18,10 @@ BuildingValidator, thresholds, ) -from lidar_prod.tasks.utils import get_pdal_reader, split_idx_by_dim +from lidar_prod.tasks.utils import ( + split_idx_by_dim, + pdal_read_las_array, +) log = logging.getLogger(__name__) @@ -227,10 +229,7 @@ def _extract_clusters_from_las(self, prepared_las_path: str) -> List[BuildingVal List[BuildingValidationClusterInfo]: cluster information for each cluster of candidate buildings """ - pipeline = pdal.Pipeline() - pipeline |= get_pdal_reader(prepared_las_path) - pipeline.execute() - las = pipeline.arrays[0] + las = pdal_read_las_array(prepared_las_path, self.bv.data_format.epsg) # las: laspy.LasData = laspy.read(prepared_las_path) dim_cluster_id = las[self.bv.data_format.las_dimensions.ClusterID_candidate_building] dim_classification = las[self.bv.data_format.las_dimensions.classification] diff --git a/lidar_prod/tasks/cleaning.py b/lidar_prod/tasks/cleaning.py index 097b77aa..4b34e273 100644 --- a/lidar_prod/tasks/cleaning.py +++ b/lidar_prod/tasks/cleaning.py @@ -44,14 +44,17 @@ def get_extra_dims_as_str(self): return_str = ",".join([f"{k}={v}" for k, v in self.extra_dims_as_dict.items()]) return return_str if return_str else [] - def run(self, src_las_path: str, target_las_path: str): + def run(self, src_las_path: str, target_las_path: str, epsg: int | str): """Clean out LAS extra dimensions. Args: src_las_path (str): input LAS path target_las_path (str): output LAS path, with specified extra dims. + epsg (int | str): epsg code for the input file (if empty or None: infer + it from the las metadata) + """ - points = pdal_read_las_array(src_las_path) + points = pdal_read_las_array(src_las_path, epsg) # Check input dims to see what we can keep. input_dims = points.dtype.fields.keys() self.extra_dims_as_dict = {k: v for k, v in self.extra_dims_as_dict.items() if k in input_dims} diff --git a/lidar_prod/tasks/utils.py b/lidar_prod/tasks/utils.py index edf9167d..d7ea8257 100644 --- a/lidar_prod/tasks/utils.py +++ b/lidar_prod/tasks/utils.py @@ -3,12 +3,13 @@ import subprocess from dataclasses import dataclass from numbers import Number -from typing import Any, Dict, Iterable, Union +from typing import Any, Dict, Iterable import geopandas import laspy import numpy as np import pdal +import psycopg2 log = logging.getLogger(__name__) @@ -34,25 +35,41 @@ def split_idx_by_dim(dim_array): return group_idx -def get_pipeline(input_value: Union[pdal.pipeline.Pipeline, str]): - """If the input value is a pipeline, returns it, if it's a las path return the corresponding pipeline""" - if type(input_value) == str: - pipeline = pdal.Pipeline() | get_pdal_reader(input_value) +def get_pipeline(input_value: pdal.pipeline.Pipeline | str, epsg: int | str): + """If the input value is a pipeline, returns it, if it's a las path return the corresponding pipeline + + Args: + input_value (pdal.pipeline.Pipeline | str): input value to get a pipeline from + (las pipeline or path to a file to read with pdal) + epsg (int | str): if input_value is a string, use the epsg value to override the crs from the las header + + Returns: + pdal pipeline + """ + if isinstance(input_value, str): + pipeline = pdal.Pipeline() | get_pdal_reader(input_value, epsg) pipeline.execute() else: pipeline = input_value return pipeline -def get_las_metadata(entry_value: Union[pdal.pipeline.Pipeline, str]): - pipeline = get_pipeline(entry_value) +def get_input_las_metadata(pipeline: pdal.pipeline.Pipeline): + """Get las reader metadata from the input pipeline""" return pipeline.metadata["metadata"]["readers.las"] -def get_integer_bbox(entry_value: Union[pdal.pipeline.Pipeline, str], buffer: Number = 0) -> Dict[str, int]: - pipeline = get_pipeline(entry_value) - """Get XY bounding box of a cloud, cast x/y min/max to integers.""" - metadata = get_las_metadata(pipeline) +def get_integer_bbox(pipeline: pdal.pipeline.Pipeline, buffer: Number = 0) -> Dict[str, int]: + """Get XY bounding box of the las input of a pipeline, cast x/y min/max to integers. + + Args: + pipeline (pdal.pipeline.Pipeline): pipeline for which to read the input bounding box + buffer (Number, optional): buffer to add to the bounds before casting it to integers. Defaults to 0. + + Returns: + Dict[str, int]: x/y min/max values as a dictionary + """ + metadata = get_input_las_metadata(pipeline) bbox = { "x_min": math.floor(metadata["minx"] - buffer), "y_min": math.floor(metadata["miny"] - buffer), @@ -62,21 +79,30 @@ def get_integer_bbox(entry_value: Union[pdal.pipeline.Pipeline, str], buffer: Nu return bbox -def get_pdal_reader(las_path: str) -> pdal.Reader.las: +def get_pdal_reader(las_path: str, epsg: int | str) -> pdal.Reader.las: """Standard Reader which imposes Lamber 93 SRS. Args: las_path (str): input LAS path to read. + epsg (int | str): epsg code for the input file (if empty or None: infer + it from the las metadata) Returns: pdal.Reader.las: reader to use in a pipeline. """ - return pdal.Reader.las( - filename=las_path, - nosrs=True, - override_srs="EPSG:2154", - ) + if epsg: + reader = pdal.Reader.las( + filename=las_path, + nosrs=True, + override_srs=(f"EPSG:{epsg}" if (isinstance(epsg, int) or epsg.isdigit()) else epsg), + ) + else: + reader = pdal.Reader.las( + filename=las_path, + ) + + return reader def get_las_data_from_las(las_path: str) -> laspy.lasdata.LasData: @@ -109,93 +135,155 @@ def save_las_data_to_las(las_path: str, las_data: laspy.lasdata.LasData): las_data.write(las_path) -def get_a_las_to_las_pdal_pipeline(src_las_path: str, target_las_path: str, ops: Iterable[Any]): +def get_a_las_to_las_pdal_pipeline( + src_las_path: str, target_las_path: str, ops: Iterable[Any], epsg: int | str +): """Create a pdal pipeline, preserving format, forwarding every dimension. Args: src_las_path (str): input LAS path target_las_path (str): output LAS path ops (Iterable[Any]): list of pdal operation (e.g. Filter.assign(...)) + epsg (int | str): epsg code for the input file (if empty or None: infer it from the las metadata) """ pipeline = pdal.Pipeline() - pipeline |= get_pdal_reader(src_las_path) + pipeline |= get_pdal_reader(src_las_path, epsg) for op in ops: pipeline |= op pipeline |= get_pdal_writer(target_las_path) return pipeline -def pdal_read_las_array(las_path: str): +def pdal_read_las_array(las_path: str, epsg: int | str): """Read LAS as a named array. Args: las_path (str): input LAS path + epsg (int | str): epsg code for the input file (if empty or None: infer it from the las metadata) Returns: np.ndarray: named array with all LAS dimensions, including extra ones, with dict-like access. """ - p1 = pdal.Pipeline() | get_pdal_reader(las_path) + p1 = pdal.Pipeline() | get_pdal_reader(las_path, epsg) p1.execute() return p1.arrays[0] +def check_bbox_intersects_territoire_with_srid( + bd_params: BDUniConnectionParams, bbox: Dict[str, int], epsg_srid: int | str +): + """Check if a bounding box intersects one of the territories from the BDUni database + (public.gcms_territoire) with the expected srid. + As geometries are indicated with srid = 0 in the database (but stored in their original projection), + both geometries are compared using this common srid. + In the territoire geometry query, ST_Union is used to combine different territoires that would have the same + srid (eg. 5490 for Guadeloupe and Martinique) + """ + conn = psycopg2.connect( + dbname=bd_params.bd_name, + user=bd_params.user, + password=bd_params.pwd, + host=bd_params.host, + ) + try: + with conn: + with conn.cursor() as curs: + query = f"""select ST_Intersects( + ST_MakeEnvelope({bbox["x_min"]}, {bbox["y_min"]}, {bbox["x_max"]}, {bbox["y_max"]}, 0), + ST_SetSRID(ST_Envelope(ST_Union(ST_Force2D(geometrie))),0))::bool as consistency_bbox_srid + from public.gcms_territoire + where srid = '{epsg_srid}' + limit 1; + """ + curs.execute(query) + out = curs.fetchone() + + # Unlike file objects or other resources, exiting the connection’s with block doesn’t close the connection + # hence we need to close it manually + # cf https://www.psycopg.org/docs/usage.html#with-statement + finally: + conn.close() + + return out[0] + + def request_bd_uni_for_building_shapefile( bd_params: BDUniConnectionParams, shapefile_path: str, bbox: Dict[str, int], + epsg: int | str, ): - """Rrequest BD Uni for its buildings. + """Request BD Uni for its buildings. - Create a shapefile with non destructed building on the area of interest - and saves it. + Create a shapefile with non destructed building on the area of interest and saves it. Also add a "PRESENCE" column filled with 1 for later use by pdal. + Note on the projections: + Projections are mixed in the BDUni tables. + In PostGIS, the declared projection is 0 but the data are stored in the legal projection of the corresponding + territories. + In each table, there is a a "gcms_territoire" field, which tells the corresponding territory (3 letters code). + The gcms_territoire table gives hints on each territory (SRID, footprint) """ - Lambert_93_SRID = 2154 + + epsg_srid = epsg if (isinstance(epsg, int) or epsg.isdigit()) else epsg.split(":")[-1] + + if not check_bbox_intersects_territoire_with_srid(bd_params, bbox, epsg_srid): + raise ValueError( + f"The query bbox ({bbox}) does not intersect with any territoire in the database with " + + f"the query srid ({epsg_srid}). Please check that you passed the correct srid." + ) + + sql_territoire = f"""WITH territoire(code) as (SELECT code FROM public.gcms_territoire WHERE srid = {epsg_srid}) """ + sql_batiment = f"""SELECT \ - st_setsrid(batiment.geometrie,{Lambert_93_SRID}) AS geometry, \ + ST_MakeValid(ST_Force2D(st_setsrid(batiment.geometrie,{epsg_srid}))) AS geometry, \ 1 as presence \ - FROM batiment \ - WHERE batiment.geometrie \ - && ST_MakeEnvelope({bbox["x_min"]}, {bbox["y_min"]}, {bbox["x_max"]}, {bbox["y_max"]}, {Lambert_93_SRID}) \ + FROM batiment, territoire \ + WHERE (batiment.gcms_territoire = territoire.code) \ + AND batiment.geometrie \ + && ST_MakeEnvelope({bbox["x_min"]}, {bbox["y_min"]}, {bbox["x_max"]}, {bbox["y_max"]}, 0) \ AND not gcms_detruit""" sql_reservoir = f"""SELECT \ - st_setsrid(reservoir.geometrie,{Lambert_93_SRID}) AS geometry, \ + ST_MakeValid(ST_Force2D(st_setsrid(reservoir.geometrie,{epsg_srid}))) AS geometry, \ 1 as presence \ - FROM reservoir \ - WHERE reservoir.geometrie \ - && ST_MakeEnvelope({bbox["x_min"]}, {bbox["y_min"]}, {bbox["x_max"]}, {bbox["y_max"]}, {Lambert_93_SRID}) \ + FROM reservoir, territoire \ + WHERE (reservoir.gcms_territoire = territoire.code) \ + AND reservoir.geometrie \ + && ST_MakeEnvelope({bbox["x_min"]}, {bbox["y_min"]}, {bbox["x_max"]}, {bbox["y_max"]}, 0) \ AND (reservoir.nature = 'Château d''eau' OR reservoir.nature = 'Réservoir industriel') \ AND NOT gcms_detruit""" sql_select_list = [sql_batiment, sql_reservoir] - sql_request = " UNION ".join(sql_select_list) - - cmd = [ - "pgsql2shp", - "-f", - shapefile_path, - "-h", - bd_params.host, - "-u", - bd_params.user, - "-P", - bd_params.pwd, - bd_params.bd_name, - sql_request, - ] + sql_request = sql_territoire + " UNION ".join(sql_select_list) + + cmd = f"""pgsql2shp -f {shapefile_path} \ + -h {bd_params.host} \ + -u {bd_params.user} \ + -P {bd_params.pwd} \ + {bd_params.bd_name} \ + \"{sql_request}\"""" + # This call may yield try: - subprocess.check_output(cmd, stderr=subprocess.STDOUT, timeout=120) + subprocess.check_output( + cmd, shell=True, stderr=subprocess.STDOUT, timeout=120, encoding="utf-8" + ) except subprocess.CalledProcessError as e: # In empty zones, pgsql2shp does not create a shapefile - if b"Initializing... \nERROR: Could not determine table metadata (empty table)\n" in e.output: - + if ( + b"Initializing... \nERROR: Could not determine table metadata (empty table)\n" + in e.output + ): # write empty shapefile - df = geopandas.GeoDataFrame(columns=["id", "geometry"], geometry="geometry", crs=f"EPSG:{Lambert_93_SRID}") + df = geopandas.GeoDataFrame( + columns=["id", "geometry"], + geometry="geometry", + crs=f"EPSG:{epsg_srid}", + ) df.to_file(shapefile_path) return False @@ -214,9 +302,4 @@ def request_bd_uni_for_building_shapefile( log.error("Time out when requesting BDUni.") raise e - # read & write to avoid unnacepted 3D shapefile format. - # Dissolve to avoid invalid shapefile that would make pdal hang in overlay filter. - gdf = geopandas.read_file(shapefile_path) - gdf[["PRESENCE", "geometry"]].dissolve().to_file(shapefile_path) - return True diff --git a/package_metadata.yaml b/package_metadata.yaml index 0962dc8e..10363b20 100644 --- a/package_metadata.yaml +++ b/package_metadata.yaml @@ -1,4 +1,4 @@ -__version__: "V1.9.14" +__version__: "V1.10.0" __name__: "lidar_prod" __url__: "https://github.com/IGNF/lidar-prod-quality-control" __description__: "A 3D semantic segmentation production tool to augment rule- based Lidar classification with AI and databases." diff --git a/setup_env/requirements.yml b/setup_env/requirements.yml index e5b6d57a..8d9a9883 100755 --- a/setup_env/requirements.yml +++ b/setup_env/requirements.yml @@ -2,7 +2,7 @@ name: lidar_prod channels: - conda-forge dependencies: - - python==3.9.* + - python==3.10.* - pip # --------- linters --------- # - pre-commit # hooks for applying linters on commit @@ -15,11 +15,13 @@ dependencies: - numpy - scikit-learn - geopandas + - pyproj # --------- others --------- # + - psycopg2 # database interaction - jupyterlab # better jupyter notebooks - pudb # debugger - - rich==11.2.* # rich text formatting - - pytest==6.2.* # tests + - rich==11.2.* # rich text formatting + - pytest>=7.1.2 # tests - pytest-cov==3.0.* # --------- torch --------- # - pip: diff --git a/tests/conftest.py b/tests/conftest.py index 69581aeb..ba580ed8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,11 +24,11 @@ def hydra_cfg(): return compose(config_name="config", overrides=["data_format=default.yaml", "building_validation/optimization=pytest.yaml"]) -def check_las_invariance(las_path1, las_path2): +def check_las_invariance(las_path1, las_path2, epsg): TOLERANCE = 0.0001 - array1 = pdal_read_las_array(las_path1) - array2 = pdal_read_las_array(las_path2) + array1 = pdal_read_las_array(las_path1, epsg) + array2 = pdal_read_las_array(las_path2, epsg) key_dims = ["X", "Y", "Z", "Infrared", "Red", "Blue", "Green", "Intensity"] assert array1.shape == array2.shape # no loss of points assert all(dim in array2.dtype.fields.keys() for dim in key_dims) # key dimensions are here @@ -41,7 +41,7 @@ def check_las_invariance(las_path1, las_path2): assert pytest.approx(np.sum(array2[dim]), TOLERANCE) == np.sum(array1[dim]) -def check_las_contains_dims(las1, dims_to_check=[]): - a1 = pdal_read_las_array(las1) +def check_las_contains_dims(las1, epsg, dims_to_check=[]): + a1 = pdal_read_las_array(las1, epsg) for d in dims_to_check: assert d in a1.dtype.fields.keys() diff --git a/tests/files/St_Barth_RGAF09_UTM20N_IGN_1988_SB_subset_100m.laz b/tests/files/St_Barth_RGAF09_UTM20N_IGN_1988_SB_subset_100m.laz new file mode 100644 index 00000000..4718b03b Binary files /dev/null and b/tests/files/St_Barth_RGAF09_UTM20N_IGN_1988_SB_subset_100m.laz differ diff --git a/tests/lidar_prod/tasks/test_building_validation_preparation.py b/tests/lidar_prod/tasks/test_building_validation_preparation.py index f82595f5..d45a031a 100644 --- a/tests/lidar_prod/tasks/test_building_validation_preparation.py +++ b/tests/lidar_prod/tasks/test_building_validation_preparation.py @@ -1,20 +1,94 @@ -import os -import tempfile +import shutil +from pathlib import Path import hydra +import numpy as np +import pytest from lidar_prod.tasks.building_validation import BuildingValidator -from lidar_prod.tasks.utils import BDUniConnectionParams +from lidar_prod.tasks.utils import BDUniConnectionParams, get_las_data_from_las -INVALID_OVERLAY_LAZ_PATH = "tests/files/invalid_overlay_data/842_6521-invalid_shapefile_area-borders.las" +TMP_DIR = Path("tmp/lidar_prod/tasks/building_validation_preparation") + + +def setup_module(module): + try: + shutil.rmtree(TMP_DIR) + except FileNotFoundError: + pass + TMP_DIR.mkdir(parents=True, exist_ok=True) + + +def test_shapefile_overlay_in_building_module(hydra_cfg): + """Check that that the prepare function does not add any presence data if the laz geometry does not intersect the + BDUni territoire corresponding with the configured epsg""" + # Run application on the data subset for which vector data is expected to be invalid. + laz_input_file = "tests/files/St_Barth_RGAF09_UTM20N_IGN_1988_SB_subset_100m.laz" + epsg = 5490 + + target_las_path = str(TMP_DIR / "St_Barth_RGAF09_UTM20N_IGN_1988_SB_subset_100m_prepared.laz") + cfg = hydra_cfg.copy() + cfg.data_format.epsg = epsg + + bd_uni_connection_params: BDUniConnectionParams = hydra.utils.instantiate( + cfg.bd_uni_connection_params + ) + + bv = BuildingValidator( + shp_path=cfg.building_validation.application.shp_path, + bd_uni_connection_params=bd_uni_connection_params, + cluster=cfg.building_validation.application.cluster, + bd_uni_request=cfg.building_validation.application.bd_uni_request, + data_format=cfg.building_validation.application.data_format, + thresholds=cfg.building_validation.application.thresholds, + use_final_classification_codes=cfg.building_validation.application.use_final_classification_codes, + ) + + bv.prepare(laz_input_file, target_las_path, save_result=True) + data = get_las_data_from_las(target_las_path) + overlay = data[cfg.data_format.las_dimensions.uni_db_overlay] + assert np.any(overlay == 1) # assert some points are marked + assert np.any(overlay == 0) # assert not all points are marked + + +def test_shapefile_overlay_in_building_module_fail(hydra_cfg): + """Check that that the prepare function fails if the laz geometry does not intersect the + BDUni territoire corresponding with the configured epsg""" + # Run application on the data subset for which vector data is expected to be invalid. + laz_input_file = "tests/files/St_Barth_RGAF09_UTM20N_IGN_1988_SB_subset_100m.laz" + wrong_epsg = 2154 + + target_las_path = str( + TMP_DIR / "St_Barth_RGAF09_UTM20N_IGN_1988_SB_subset_100m_wrong_epsg.laz" + ) + cfg = hydra_cfg.copy() + cfg.data_format.epsg = wrong_epsg + + bd_uni_connection_params: BDUniConnectionParams = hydra.utils.instantiate( + cfg.bd_uni_connection_params + ) + + bv = BuildingValidator( + shp_path=cfg.building_validation.application.shp_path, + bd_uni_connection_params=bd_uni_connection_params, + cluster=cfg.building_validation.application.cluster, + bd_uni_request=cfg.building_validation.application.bd_uni_request, + data_format=cfg.building_validation.application.data_format, + thresholds=cfg.building_validation.application.thresholds, + use_final_classification_codes=cfg.building_validation.application.use_final_classification_codes, + ) + + with pytest.raises(ValueError): + bv.prepare(laz_input_file, target_las_path, save_result=True) # We try to reduce size of LAZ to isolate the problem first to make it quick to test when it is ok. + # Normal execution on subset of LAZ lasts ~ 3sec. # If a regression occurs, the pdal execution will hang and a timeout would make it more apparent. # However, pytest-timeout does not stop pdal for some reasons. For now this should be sufficient. -def test_shapefile_overlay_in_building_module(hydra_cfg): +def test_shapefile_overlay_in_building_module_invalid_overlay(hydra_cfg): """We test the application against a LAS subset for which the BDUni shapefile shows overlapping vectors. We only need points at the borders of the area in order to request the error-generating shapefile. @@ -23,23 +97,24 @@ def test_shapefile_overlay_in_building_module(hydra_cfg): shapefile to remove this bug. """ + invalid_overlay_laz_path = ( + "tests/files/invalid_overlay_data/842_6521-invalid_shapefile_area-borders.las" + ) # Run application on the data subset for which vector data is expected to be invalid. - with tempfile.TemporaryDirectory() as hydra_cfg.paths.output_dir: - target_las_path = os.path.join( - hydra_cfg.paths.output_dir, - os.path.basename(INVALID_OVERLAY_LAZ_PATH), - ) - - bd_uni_connection_params: BDUniConnectionParams = hydra.utils.instantiate(hydra_cfg.bd_uni_connection_params) - - bv = BuildingValidator( - shp_path=hydra_cfg.building_validation.application.shp_path, - bd_uni_connection_params=bd_uni_connection_params, - cluster=hydra_cfg.building_validation.application.cluster, - bd_uni_request=hydra_cfg.building_validation.application.bd_uni_request, - data_format=hydra_cfg.building_validation.application.data_format, - thresholds=hydra_cfg.building_validation.application.thresholds, - use_final_classification_codes=hydra_cfg.building_validation.application.use_final_classification_codes - ) - - bv.prepare(INVALID_OVERLAY_LAZ_PATH, target_las_path) + target_las_path = str(TMP_DIR / "invalid_overlay.laz") + + bd_uni_connection_params: BDUniConnectionParams = hydra.utils.instantiate( + hydra_cfg.bd_uni_connection_params + ) + + bv = BuildingValidator( + shp_path=hydra_cfg.building_validation.application.shp_path, + bd_uni_connection_params=bd_uni_connection_params, + cluster=hydra_cfg.building_validation.application.cluster, + bd_uni_request=hydra_cfg.building_validation.application.bd_uni_request, + data_format=hydra_cfg.building_validation.application.data_format, + thresholds=hydra_cfg.building_validation.application.thresholds, + use_final_classification_codes=hydra_cfg.building_validation.application.use_final_classification_codes, + ) + + bv.prepare(invalid_overlay_laz_path, target_las_path) diff --git a/tests/lidar_prod/tasks/test_cleaning.py b/tests/lidar_prod/tasks/test_cleaning.py index 16ba6f41..2dcee810 100644 --- a/tests/lidar_prod/tasks/test_cleaning.py +++ b/tests/lidar_prod/tasks/test_cleaning.py @@ -9,6 +9,7 @@ from tests.lidar_prod.test_application import check_las_format_versions_and_srs SRC_LAS_SUBSET_PATH = "tests/files/870000_6618000.subset.postIA.las" +SRC_LAS_EPSG = "2154" LAS_SUBSET_FILE_VEGETATION = "tests/files/436000_6478000.subset.postIA.las" @@ -18,9 +19,9 @@ def test_cleaning_no_extra_dims(extra_dims): with tempfile.TemporaryDirectory() as td: clean_las_path = osp.join(td, "no_extra_dims.las") - cl.run(SRC_LAS_SUBSET_PATH, clean_las_path) - check_las_invariance(SRC_LAS_SUBSET_PATH, clean_las_path) - a = pdal_read_las_array(clean_las_path) + cl.run(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) + check_las_invariance(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) + a = pdal_read_las_array(clean_las_path, SRC_LAS_EPSG) las_dimensions = a.dtype.fields.keys() # Check that key dims were cleaned out assert all(dim not in las_dimensions for dim in ["building", "entropy"]) @@ -30,9 +31,9 @@ def test_cleaning_float_extra_dim(): cl = Cleaner(extra_dims="entropy=float") with tempfile.TemporaryDirectory() as td: clean_las_path = osp.join(td, "float_extra_dim.las") - cl.run(SRC_LAS_SUBSET_PATH, clean_las_path) - check_las_invariance(SRC_LAS_SUBSET_PATH, clean_las_path) - a = pdal_read_las_array(clean_las_path) + cl.run(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) + check_las_invariance(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) + a = pdal_read_las_array(clean_las_path, SRC_LAS_EPSG) las_dimensions = a.dtype.fields.keys() assert "entropy" in las_dimensions assert "building" not in las_dimensions @@ -46,9 +47,9 @@ def test_cleaning_two_float_extra_dims_and_one_fantasy_dim(): cl = Cleaner(extra_dims=extra_dims) with tempfile.TemporaryDirectory() as td: clean_las_path = osp.join(td, "float_extra_dim.las") - cl.run(SRC_LAS_SUBSET_PATH, clean_las_path) - check_las_invariance(SRC_LAS_SUBSET_PATH, clean_las_path) - out_a = pdal_read_las_array(clean_las_path) + cl.run(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) + check_las_invariance(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) + out_a = pdal_read_las_array(clean_las_path, SRC_LAS_EPSG) assert d1 in out_a.dtype.fields.keys() assert d2 in out_a.dtype.fields.keys() assert d3 not in out_a.dtype.fields.keys() @@ -59,8 +60,8 @@ def test_pdal_cleaning_format(extra_dims): cl = Cleaner(extra_dims=extra_dims) with tempfile.TemporaryDirectory() as td: clean_las_path = osp.join(td, "float_extra_dim.las") - cl.run(SRC_LAS_SUBSET_PATH, clean_las_path) - check_las_format_versions_and_srs(clean_las_path) + cl.run(SRC_LAS_SUBSET_PATH, clean_las_path, SRC_LAS_EPSG) + check_las_format_versions_and_srs(clean_las_path, SRC_LAS_EPSG) @pytest.mark.parametrize( diff --git a/tests/lidar_prod/tasks/test_utils.py b/tests/lidar_prod/tasks/test_utils.py index b285dd93..fbc2faeb 100644 --- a/tests/lidar_prod/tasks/test_utils.py +++ b/tests/lidar_prod/tasks/test_utils.py @@ -1,7 +1,27 @@ +import shutil +from pathlib import Path + +import geopandas as gdb import numpy as np import pdal +import pytest + +from lidar_prod.tasks.utils import ( + check_bbox_intersects_territoire_with_srid, + get_pdal_writer, + request_bd_uni_for_building_shapefile, + split_idx_by_dim, +) + +TMP_DIR = Path("tmp/lidar_prod/tasks/utils") + -from lidar_prod.tasks.utils import get_pdal_writer, split_idx_by_dim +def setup_module(module): + try: + shutil.rmtree(TMP_DIR) + except FileNotFoundError: + pass + TMP_DIR.mkdir(parents=True, exist_ok=True) def create_synthetic_las_data_within_bounds(synthetic_las_path: str, bbox) -> None: @@ -12,13 +32,9 @@ def create_synthetic_las_data_within_bounds(synthetic_las_path: str, bbox) -> No bbox (_type_): bounding box (example key: `x_min`). """ - bounds = ( - f'([{bbox["x_min"]},{bbox["x_max"]}],[{bbox["y_min"]},{bbox["y_max"]}],[0,100])' - ) + bounds = f'([{bbox["x_min"]},{bbox["x_max"]}],[{bbox["y_min"]},{bbox["y_max"]}],[0,100])' pipeline = pdal.Pipeline() - pipeline |= pdal.Reader.faux( - filename="no_file.las", mode="ramp", count=100, bounds=bounds - ) + pipeline |= pdal.Reader.faux(filename="no_file.las", mode="ramp", count=100, bounds=bounds) pipeline |= get_pdal_writer(synthetic_las_path) pipeline.execute() @@ -55,3 +71,78 @@ def test_split_idx_by_dim_unordered(): for i, group in enumerate(group_idx): assert np.array_equal(group, expected_groups[i]) assert np.array_equal(dim_array[group, 1], expected_values[i]) + + +@pytest.mark.parametrize( + "bbox,srid,expected_result", + [ + ( + # Bbox in Metropolitan France, with correct epsg => output should not be true + dict(x_min=870150, y_min=6616950, x_max=870350, y_max=6617200), + 2154, + True, + ), + ( + # Bbox in St Barthelemy with correct epsg => output should be true + # srid 5490 corresponds to 2 different territories, which should not impact the result + dict(x_min=515000, y_min=1981000, x_max=515100, y_max=1981100), + 5490, + True, + ), + ( + # Bbox in St Barthelemy with wrong epsg => output should be false + dict(x_min=515000, y_min=1981000, x_max=515100, y_max=1981100), + 2154, + False, + ), + ], +) +def test_check_bbox_intersects_territoire_with_srid(hydra_cfg, bbox, srid, expected_result): + + res = check_bbox_intersects_territoire_with_srid( + hydra_cfg.bd_uni_connection_params, + bbox=bbox, + epsg_srid=srid, + ) + assert res == expected_result + + +@pytest.mark.parametrize( + "bbox,epsg,out_shp", + [ + ( + # Bbox in Metropolitan France, with correct epsg => output should not be empty + dict(x_min=870150, y_min=6616950, x_max=870350, y_max=6617200), + 2154, + "metropolitan_ok.shp", + ), + ( + # Bbox in St Barthelemy with correct epsg => output should not be empty + dict(x_min=515000, y_min=1981000, x_max=515100, y_max=1981100), + 5490, + "st_barth_ok.shp", + ), + ], +) +def test_request_bd_uni_for_building_shapefile(hydra_cfg, bbox, epsg, out_shp): + out_path = TMP_DIR / out_shp + request_bd_uni_for_building_shapefile( + hydra_cfg.bd_uni_connection_params, + shapefile_path=out_path, + bbox=bbox, + epsg=epsg, + ) + assert out_path.is_file() + gdf = gdb.read_file(out_path) + assert bool(len(gdf.index)) + + +def test_request_bd_uni_for_building_shapefile_fail(hydra_cfg): + with pytest.raises(ValueError): + out_path = (TMP_DIR / "st_barth_nok.shp",) + request_bd_uni_for_building_shapefile( + hydra_cfg.bd_uni_connection_params, + shapefile_path=out_path, + bbox=dict(x_min=515000, y_min=1981000, x_max=515100, y_max=1981100), + epsg=2154, + ) diff --git a/tests/lidar_prod/test_application.py b/tests/lidar_prod/test_application.py index 5197e365..fabe2633 100644 --- a/tests/lidar_prod/test_application.py +++ b/tests/lidar_prod/test_application.py @@ -1,14 +1,31 @@ import os import tempfile +import geopandas import numpy as np import pdal +import pyproj import pytest from omegaconf import open_dict -from lidar_prod.application import apply, apply_building_module, get_shapefile, identify_vegetation_unclassified, just_clean -from lidar_prod.tasks.utils import get_a_las_to_las_pdal_pipeline, get_las_data_from_las, get_las_metadata -from tests.conftest import check_las_contains_dims, check_las_invariance, pdal_read_las_array +from lidar_prod.application import ( + apply, + apply_building_module, + get_shapefile, + identify_vegetation_unclassified, + just_clean, +) +from lidar_prod.tasks.utils import ( + get_a_las_to_las_pdal_pipeline, + get_las_data_from_las, + get_input_las_metadata, + get_pipeline, +) +from tests.conftest import ( + check_las_contains_dims, + check_las_invariance, + pdal_read_las_array, +) LAS_SUBSET_FILE_BUILDING = "tests/files/870000_6618000.subset.postIA.las" SHAPE_FILE = "tests/files/870000_6618000.subset.postIA.shp" @@ -16,6 +33,9 @@ LAZ_SUBSET_FILE_VEGETATION = "tests/files/436000_6478000.subset.postIA.laz" DUMMY_DIRECTORY_PATH = "tests/files/dummy_folder" DUMMY_FILE_PATH = "tests/files/dummy_folder/dummy_file1.las" +LAS_FILE_BUILDING_5490 = ( + "tests/files/St_Barth_RGAF09_UTM20N_IGN_1988_SB_subset_100m.laz" +) @pytest.mark.parametrize( @@ -59,44 +79,62 @@ def test_application_data_invariance_and_data_format(hydra_cfg, las_mutation, qu with tempfile.TemporaryDirectory() as hydra_cfg.paths.output_dir: # Copy the data and apply the "mutation" mutated_copy: str = tempfile.NamedTemporaryFile().name - pipeline = get_a_las_to_las_pdal_pipeline(LAS_SUBSET_FILE_BUILDING, mutated_copy, las_mutation) + pipeline = get_a_las_to_las_pdal_pipeline( + LAS_SUBSET_FILE_BUILDING, + mutated_copy, + las_mutation, + hydra_cfg.data_format.epsg, + ) pipeline.execute() hydra_cfg.paths.src_las = mutated_copy if not query_db_Uni: # we don't request db_uni, we use a shapefile instead hydra_cfg.building_validation.application.shp_path = SHAPE_FILE updated_las_path_list = apply(hydra_cfg, apply_building_module) # Check output - check_las_invariance(mutated_copy, updated_las_path_list[0]) - check_format_of_application_output_las(updated_las_path_list[0], expected_codes) + check_las_invariance( + mutated_copy, updated_las_path_list[0], hydra_cfg.data_format.epsg + ) + check_format_of_application_output_las( + updated_las_path_list[0], hydra_cfg.data_format.epsg, expected_codes + ) -def check_format_of_application_output_las(output_las_path: str, expected_codes: dict): +def check_format_of_application_output_las( + output_las_path: str, epsg: int | str, expected_codes: dict +): """Check LAS format, dimensions, and classification codes of output Args: output_las_path (str): path of output LAS + epsg (int | str): epsg code for the file (if empty or None: infer + it from the las metadata). Used to read the data expected_codes (dict): set of expected classification codes. """ # Check that we contain extra_dims that production needs - check_las_contains_dims(output_las_path, dims_to_check=["Group", "entropy"]) + check_las_contains_dims(output_las_path, epsg, dims_to_check=["Group", "entropy"]) # Ensure that the format versions are as expected - check_las_format_versions_and_srs(output_las_path) + check_las_format_versions_and_srs(output_las_path, epsg) # Check that we have either 1/2 (ground/unclassified), # or one of the three final classification code of the module - arr1 = pdal_read_las_array(output_las_path) + arr1 = pdal_read_las_array(output_las_path, epsg) actual_codes = {*np.unique(arr1["Classification"])} assert actual_codes.issubset(expected_codes) -def check_las_format_versions_and_srs(pipeline: pdal.pipeline.Pipeline): - metadata = get_las_metadata(pipeline) +def check_las_format_versions_and_srs(input_path: str, epsg: int | str): + pipeline = get_pipeline(input_path, epsg) + metadata = get_input_las_metadata(pipeline) assert metadata["minor_version"] == 4 assert metadata["dataformat_id"] == 8 - # Ensure that the final spatial reference is French CRS Lambert-93 - assert "Lambert-93" in metadata["spatialreference"] + # Ensure that the final spatial reference is the same as in the config (if provided) + metadata_crs = metadata["srs"]["compoundwkt"] + assert metadata_crs + if epsg: + expected_crs = pyproj.crs.CRS(epsg) + assert expected_crs.equals(metadata_crs) @pytest.mark.parametrize( @@ -156,3 +194,21 @@ def test_get_shapefile(hydra_cfg): os.path.splitext(os.path.basename(LAS_SUBSET_FILE_BUILDING))[0] + ".shp", ) assert os.path.exists(created_shapefile_path) + gdf = geopandas.read_file(created_shapefile_path) + assert len(gdf.index > 0) + + +def test_get_shapefile_epsg_5490(hydra_cfg): + # Update EPSG in configuration for this test only + hydra_cfg_local = hydra_cfg.copy() + hydra_cfg_local.data_format.epsg = 5490 + + destination_path = tempfile.NamedTemporaryFile().name + get_shapefile(hydra_cfg_local, LAS_FILE_BUILDING_5490, destination_path) + created_shapefile_path = os.path.join( + os.path.dirname(destination_path), + os.path.splitext(os.path.basename(LAS_FILE_BUILDING_5490))[0] + ".shp", + ) + assert os.path.exists(created_shapefile_path) + gdf = geopandas.read_file(created_shapefile_path) + assert len(gdf.index > 0) diff --git a/tests/lidar_prod/test_optimization.py b/tests/lidar_prod/test_optimization.py index f56d2f20..f4d4e009 100644 --- a/tests/lidar_prod/test_optimization.py +++ b/tests/lidar_prod/test_optimization.py @@ -92,7 +92,7 @@ def test_BVOptimization_on_subset(hydra_cfg): bvo.bv.use_final_classification_codes = True bvo.update() assert os.path.isfile(updated_las_path) - arr = pdal_read_las_array(updated_las_path) + arr = pdal_read_las_array(updated_las_path, hydra_cfg.data_format.epsg) # Check that we have either 1/2 (ground/unclassified), or one of # the final classification code of the module. final_codes = hydra_cfg.data_format.codes.building.final