IGNF · leavauchier · Apr 11, 2024 · Apr 11, 2024
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 99
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -17,21 +17,22 @@ repos:
       - id: black
         args: [--line-length, "99"]
 
+  # python code analysis
+  - repo: https://github.com/PyCQA/flake8
+    rev: 7.0.0
+    hooks:
+      - id: flake8
+
   # python import sorting
   - repo: https://github.com/PyCQA/isort
     rev: 5.13.2
     hooks:
       - id: isort
+        args: ["--profile", "black", --line-length, "99"]
 
   # yaml formatting
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v4.0.0-alpha.8
     hooks:
       - id: prettier
         types: [yaml]
-
-  # python code analysis
-  - repo: https://github.com/PyCQA/flake8
-    rev: 7.0.0
-    hooks:
-      - id: flake8
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ The Lidar HD project ambitions to map France in 3D using 10 pulse/m² aerial Lid
 
 To produce this classification, geometric rule- based classification are familiar and present advantages such as scalability, high geometric regularity, and predictability. But rule- based algorithm often lack the fine-grain understanding needed for complex Lidar scenes, which results in a need for time-consuming human correction.
 
-Additionnaly, some valuable information exist in 2D public geographical database, but finding a way to leverage it on a point cloud classification is not straightforward considering database incompletness, potential out-of-dateness, and frequent x-y offsets. 
+Additionnaly, some valuable information exist in 2D public geographical database, but finding a way to leverage it on a point cloud classification is not straightforward considering database incompletness, potential out-of-dateness, and frequent x-y offsets.
 
 Considering the scale of this task, deep learning is leveraged to as a production tool. A [deep learning library](https://github.com/IGNF/lidar-deep-segmentation) was developed with a focused scope: the multiclass semantic segmentation of large scale, high density aerial Lidar points cloud. Using a classification produced directly by a deep learning model might be tempting, but they usually presents some limitations including unexpected failure modes, inconsistant geometric regularity, noise.
 
@@ -33,7 +33,7 @@ Our strategy is to fuse together different sources of informations (rule- based
 Right now, the class `building` is the only one that is addressed. The extension to other classes is dependent on the training of multiclass AI model, which requires high quality training datasets that are currently being produced.
 
 > Please refer to the documentation for [installation and usage](https://ignf.github.io/lidar-prod/tutorials/install.html).
-    
+
 > Please refer to the documentation to understand the [production process](https://ignf.github.io/lidar-prod/background/production_process.html).
 
 ## Version

diff --git a/configs/bd_uni_connection_params/credentials_template.yaml b/configs/bd_uni_connection_params/credentials_template.yaml
@@ -2,4 +2,4 @@ _target_: lidar_prod.tasks.utils.BDUniConnectionParams
 host: serveurbdudiff.ign.fr
 user:
 pwd:
-bd_name: bduni_france_consultation
+bd_name: bduni_france_consultation
diff --git a/configs/building_completion/default.yaml b/configs/building_completion/default.yaml
@@ -6,6 +6,5 @@ min_building_proba: 0.5
 
 cluster:
   min_points: 10 # including isolated points (in BuildingValidator) and confirmed candidates points.
-  tolerance: 0.3  # meters, small to capture building superstructures only
-  is3d: false  # group in 2d for better detection
-
+  tolerance: 0.3 # meters, small to capture building superstructures only
+  is3d: false # group in 2d for better detection
diff --git a/configs/building_identification/default.yaml b/configs/building_identification/default.yaml
@@ -5,7 +5,6 @@ data_format: ${data_format}
 min_building_proba: 0.5
 
 cluster:
-  min_points: 200  # Large so that small isolated artefacts are ignored
-  tolerance: 0.75  # meters
-  is3d: false  # group in 2d for better detection
-
+  min_points: 200 # Large so that small isolated artefacts are ignored
+  tolerance: 0.75 # meters
+  is3d: false # group in 2d for better detection
diff --git a/configs/building_validation/application/default.yaml b/configs/building_validation/application/default.yaml
@@ -6,19 +6,19 @@ use_final_classification_codes: true
 shp_path: null
 
 cluster:
-  tolerance: 0.5  # meters
+  tolerance: 0.5 # meters
   min_points: 10
 
 bd_uni_request:
   buffer: 50
 
 # Associated Version(s) : M10.0-proto151_V1.0_epoch_40_Myria3DV3.0.1-proto151optimization
 thresholds:
-  min_confidence_confirmation: 0.7489066375339118  # min proba to validate a point
-  min_frac_confirmation: 0.16236610677624053  # min fractin of confirmed points per group for confirmation
-  min_frac_confirmation_factor_if_bd_uni_overlay: 0.5532221883488597  # relaxation factor to min proba when point is under BDUni vector
-  min_uni_db_overlay_frac: 0.7243937589483613  # min fraction of points  under BDUni vector per group for confirmation
+  min_confidence_confirmation: 0.7489066375339118 # min proba to validate a point
+  min_frac_confirmation: 0.16236610677624053 # min fractin of confirmed points per group for confirmation
+  min_frac_confirmation_factor_if_bd_uni_overlay: 0.5532221883488597 # relaxation factor to min proba when point is under BDUni vector
+  min_uni_db_overlay_frac: 0.7243937589483613 # min fraction of points  under BDUni vector per group for confirmation
   min_confidence_refutation: 0.9753597180902244 # min proba to refute a point
-  min_frac_refutation: 0.30759538271378295   # min fractin of refuted points per group for confirmation
-  min_entropy_uncertainty: 1.254212461691427   # min entropy to flag a point as uncertain
-  min_frac_entropy_uncertain: 0.7343497391001854   # min fractin of uncertain points (based on entropy) per group to flag as uncertain
+  min_frac_refutation: 0.30759538271378295 # min fractin of refuted points per group for confirmation
+  min_entropy_uncertainty: 1.254212461691427 # min entropy to flag a point as uncertain
+  min_frac_entropy_uncertain: 0.7343497391001854 # min fractin of uncertain points (based on entropy) per group to flag as uncertain
diff --git a/configs/building_validation/default.yaml b/configs/building_validation/default.yaml
@@ -1,3 +1,3 @@
 defaults:
   - application: default.yaml
-  - optimization: default.yaml
+  - optimization: default.yaml
diff --git a/configs/building_validation/optimization/default.yaml b/configs/building_validation/optimization/default.yaml
@@ -1,40 +1,38 @@
 _target_: lidar_prod.tasks.building_validation_optimization.BuildingValidationOptimizer
 
-todo: "prepare+optimize+evaluate+update"  # for test data use prepare+evaluate+update
+todo: "prepare+optimize+evaluate+update" # for test data use prepare+evaluate+update
 
 # By default we update corrected data and keep detailed codes instead of final ones.
 # This enables inspection of updated files post-optimization, with maximum details.
 use_final_classification_codes: false
 building_validator: ${building_validation.application}
 
 paths:
-  input_las_dir:  "/path/to/folder/"  # contains .las/.laz files
-  results_output_dir: "/path/to/folder/"  # will contain best optimization trial and (optionnaly) updated las
+  input_las_dir: "/path/to/folder/" # contains .las/.laz files
+  results_output_dir: "/path/to/folder/" # will contain best optimization trial and (optionnaly) updated las
   group_info_pickle_path: ${.results_output_dir}/group_info.pickle
   prepared_las_dir: ${.results_output_dir}/prepared/
   updated_las_dir: ${.results_output_dir}/updated/
   building_validation_thresholds_pickle: ${.results_output_dir}/optimized_thresholds.pickle # Wher
 
-
-# CLASSIFICATION CODES of a dataset which was inspected 
+# CLASSIFICATION CODES of a dataset which was inspected
 # and labeled post TerraSolid macro
 # Those are used to override the app default codes.
 buildings_correction_labels:
   codes:
     true_positives: [19] # building that was found by the macro
     false_positives: [20, 110, 112, 114, 115] # e.g. trees, hedge, trucks
-    false_negatives: [21]  # e.g. buildings under vegetation, low building
+    false_negatives: [21] # e.g. buildings under vegetation, low building
   # Sometimes a cluster will be ambiguous and we need
   # thresholds to decice if it is a TP or FP.
   min_frac:
     true_positives: 0.95 # >=x% of confirmed points --> cluster is a building
     false_positives: 0.05 # <x% of confirmed points --> cluster is not a building
 
-
 study:
   _target_: optuna.create_study
   study_name: "auto_precision_recall"
-  directions: ["maximize","maximize","maximize"]
+  directions: ["maximize", "maximize", "maximize"]
   sampler:
     _target_: optuna.samplers.NSGAIISampler
     population_size: 50
@@ -74,5 +72,3 @@ design:
     # constraints:
     refutation_accuracy: "a_refute"
     confirmation_accuracy: "a_confirm"
-
-
diff --git a/configs/building_validation/optimization/pytest.yaml b/configs/building_validation/optimization/pytest.yaml
@@ -2,4 +2,4 @@ defaults:
   - default.yaml
 
 design:
-  n_trials: 4
+  n_trials: 4
diff --git a/configs/config.yaml b/configs/config.yaml
@@ -11,4 +11,4 @@ defaults:
   - building_completion: default.yaml
   - basic_identification: default.yaml
   - bd_uni_connection_params: credentials.yaml
-  - _self_  # needed by pdal for legacy reasons
+  - _self_ # needed by pdal for legacy reasons
diff --git a/configs/data_format/default.yaml b/configs/data_format/default.yaml
@@ -4,27 +4,26 @@ epsg: 2154
 # Those names connect the logics between successive tasks
 las_dimensions:
   # input
-  classification: Classification  # las format
+  classification: Classification # las format
   terrascan_normal: Normal
   terrascan_distance: Distance
   terrascan_deviation: Deviation
-  terrascan_reliability: confidence  # Appears as "Reliability" in TerraScan. Unsigned char according to LASinfo.
+  terrascan_reliability: confidence # Appears as "Reliability" in TerraScan. Unsigned char according to LASinfo.
 
   # Extra dims
   # ATTENTION: If extra dimensions are added, you may want to add them in cleaning.in parameter as well.
   ai_vegetation_proba: vegetation
   ai_unclassified_proba: unclassified
-  ai_building_proba: building  # user-defined - output by deep learning model
+  ai_building_proba: building # user-defined - output by deep learning model
   entropy: entropy # user-defined - output by deep learning model
 
-
   # Intermediary channels
-  cluster_id: ClusterID  # pdal-defined -> created by clustering operations
-  uni_db_overlay: BDTopoOverlay  # user-defined -> a 0/1 flag for presence of a BDUni vector
+  cluster_id: ClusterID # pdal-defined -> created by clustering operations
+  uni_db_overlay: BDTopoOverlay # user-defined -> a 0/1 flag for presence of a BDUni vector
   candidate_buildings_flag: F_CandidateB # -> a 0/1 flag identifying candidate buildings found by rule- based classification
-  ClusterID_candidate_building: CID_CandidateB  # -> Cluster index from BuildingValidator, 0 if no cluster, 1-n otherwise
-  ClusterID_confirmed_or_high_proba: CID_IsolatedOrConfirmed  # -> Cluster index from BuildingCompletor, 0 if no cluster, 1-n otherwise
-  completion_non_candidate_flag: F_NonCandidateCompletion  # --> a 0/1 flag for non candidates points with high proba and close to confirmed buildings
+  ClusterID_candidate_building: CID_CandidateB # -> Cluster index from BuildingValidator, 0 if no cluster, 1-n otherwise
+  ClusterID_confirmed_or_high_proba: CID_IsolatedOrConfirmed # -> Cluster index from BuildingCompletor, 0 if no cluster, 1-n otherwise
+  completion_non_candidate_flag: F_NonCandidateCompletion # --> a 0/1 flag for non candidates points with high proba and close to confirmed buildings
 
   # Additionnal output channel
   ai_building_identified: Group
@@ -34,34 +33,57 @@ codes:
   unclassified: 1
   vegetation: 3
   vegetation_target:
-      vegetation_low: 3
-      vegetation_medium: 4
-      vegetation_high: 5
+    vegetation_low: 3
+    vegetation_medium: 4
+    vegetation_high: 5
   building:
-    candidates: [202]  # found by rule- based classification (TerraScan)
-    detailed:  # used for detailed output when doing threshold optimization
+    candidates: [202] # found by rule- based classification (TerraScan)
+    detailed: # used for detailed output when doing threshold optimization
       unsure_by_entropy: 200 # unsure (based on entropy)
-      unclustered: 202  # refuted
-      ia_refuted: 110  # refuted
-      ia_refuted_but_under_db_uni: 111  # unsure
-      both_unsure: 112  # unsure (otherwise)
-      ia_confirmed_only: 113  # confirmed
-      db_overlayed_only: 114  # confirmed
-      both_confirmed: 115  # confirmed
-    final:  # used at the end of the building process
-      unsure: 214  # unsure
-      not_building: 208  # refuted
-      building: 6  # confirmed
+      unclustered: 202 # refuted
+      ia_refuted: 110 # refuted
+      ia_refuted_but_under_db_uni: 111 # unsure
+      both_unsure: 112 # unsure (otherwise)
+      ia_confirmed_only: 113 # confirmed
+      db_overlayed_only: 114 # confirmed
+      both_confirmed: 115 # confirmed
+    final: # used at the end of the building process
+      unsure: 214 # unsure
+      not_building: 208 # refuted
+      building: 6 # confirmed
     detailed_to_final:
-      - ["${data_format.codes.building.detailed.unclustered}","${data_format.codes.building.final.not_building}"]
-      - ["${data_format.codes.building.detailed.ia_refuted}","${data_format.codes.building.final.not_building}"]
-      - ["${data_format.codes.building.detailed.ia_refuted_but_under_db_uni}","${data_format.codes.building.final.unsure}"]
-      - ["${data_format.codes.building.detailed.both_unsure}","${data_format.codes.building.final.unsure}"]
-      - ["${data_format.codes.building.detailed.unsure_by_entropy}","${data_format.codes.building.final.unsure}"]
-      - ["${data_format.codes.building.detailed.ia_confirmed_only}","${data_format.codes.building.final.building}"]
-      - ["${data_format.codes.building.detailed.db_overlayed_only}","${data_format.codes.building.final.building}"]
-      - ["${data_format.codes.building.detailed.both_confirmed}","${data_format.codes.building.final.building}"]
-
+      - [
+          "${data_format.codes.building.detailed.unclustered}",
+          "${data_format.codes.building.final.not_building}",
+        ]
+      - [
+          "${data_format.codes.building.detailed.ia_refuted}",
+          "${data_format.codes.building.final.not_building}",
+        ]
+      - [
+          "${data_format.codes.building.detailed.ia_refuted_but_under_db_uni}",
+          "${data_format.codes.building.final.unsure}",
+        ]
+      - [
+          "${data_format.codes.building.detailed.both_unsure}",
+          "${data_format.codes.building.final.unsure}",
+        ]
+      - [
+          "${data_format.codes.building.detailed.unsure_by_entropy}",
+          "${data_format.codes.building.final.unsure}",
+        ]
+      - [
+          "${data_format.codes.building.detailed.ia_confirmed_only}",
+          "${data_format.codes.building.final.building}",
+        ]
+      - [
+          "${data_format.codes.building.detailed.db_overlayed_only}",
+          "${data_format.codes.building.final.building}",
+        ]
+      - [
+          "${data_format.codes.building.detailed.both_confirmed}",
+          "${data_format.codes.building.final.building}",
+        ]
 
 cleaning:
   # Extra dims that are kept when application starts. Others are removed to lighten the LAS.