feat(import,synthese): add the possibility to declare jdd in the impo…

…rt file - Revision alembic to add column unique_dataset_id and src_unique_dataset_id - Add into bib_fields and into cor_entity_fields - change the model Synthese Geonature to add unique_dataset_id Reviewed-by: andriacap fix: change type of "eng_label" and "desc_label" In database these two fields are nullable then change type to allow "None" value Reviewed-by: andriacap fix(test): errors with unique_dataset_id Fix errors based on adding unique_dataset_id Reviewed-by: andriacap fix: change revision id (after rebase feat/import) Reviewed-by: andriacap fix: nomenclature raise load error Reviewed-by: andriac refact: test synthese import jdd Based on Elie's review Reviewed-by: andriac add missing line of code in synthese mixin
PnX-SI · Jul 25, 2024 · 4087502 · 4087502
1 parent e6282af
commit 4087502
Show file tree

Hide file tree

Showing 6 changed files with 282 additions and 11 deletions.
diff --git a/backend/geonature/core/gn_synthese/imports/synthese_import_mixin.py b/backend/geonature/core/gn_synthese/imports/synthese_import_mixin.py
@@ -1,5 +1,6 @@
 from math import ceil
 
+
 from geonature.core.imports.import_mixin import ImportMixin, ImportStatisticsLabels, ImportInputUrl
 
 from apptax.taxonomie.models import Taxref
@@ -23,6 +24,7 @@
     check_types,
     check_geography,
     check_counts,
+    check_datasets,
 )
 from geonature.core.imports.checks.sql import (
     do_nomenclatures_mapping,
@@ -97,7 +99,7 @@ def check_transient_data(task, logger, imprt: TImports):
         def update_batch_progress(batch, step):
             start = 0.1
             end = 0.4
-            step_count = 7
+            step_count = 8
             progress = start + ((batch + 1) / batch_count) * (step / step_count) * (end - start)
             task.update_state(state="PROGRESS", meta={"progress": progress})
 
@@ -141,6 +143,17 @@ def update_batch_progress(batch, step):
                 updated_cols |= check_types(imprt, entity, df, fields)
             update_batch_progress(batch, 4)
 
+            logger.info(f"[{batch+1}/{batch_count}] Check dataset rows")
+            with start_sentry_child(op="check.df", description="check datasets rows"):
+                updated_cols |= check_datasets(
+                    imprt,
+                    entity,
+                    df,
+                    uuid_field=fields["unique_dataset_id"],
+                    id_field=fields["id_dataset"],
+                    module_code="SYNTHESE",
+                )
+            update_batch_progress(batch, 5)
             logger.info(f"[{batch+1}/{batch_count}] Check geography…")
             with start_sentry_child(op="check.df", description="set geography"):
                 updated_cols |= check_geography(
@@ -157,7 +170,7 @@ def update_batch_progress(batch, step):
                     codemaille_field=fields["codemaille"],
                     codedepartement_field=fields["codedepartement"],
                 )
-            update_batch_progress(batch, 5)
+            update_batch_progress(batch, 6)
 
             logger.info(f"[{batch+1}/{batch_count}] Check counts…")
             with start_sentry_child(op="check.df", description="check count"):
@@ -169,12 +182,12 @@ def update_batch_progress(batch, step):
                     fields["count_max"],
                     default_count=current_app.config["IMPORT"]["DEFAULT_COUNT_VALUE"],
                 )
-            update_batch_progress(batch, 6)
+            update_batch_progress(batch, 7)
 
             logger.info(f"[{batch+1}/{batch_count}] Updating import data from dataframe…")
             with start_sentry_child(op="check.df", description="save dataframe"):
                 update_transient_data_from_dataframe(imprt, entity, updated_cols, df)
-            update_batch_progress(batch, 7)
+            update_batch_progress(batch, 8)
 
         # Checks in SQL
         convert_geom_columns(
@@ -335,6 +348,8 @@ def import_data_to_destination(imprt: TImports) -> None:
                 if source_field in imprt.columns:
                     insert_fields |= {field}
 
+        insert_fields -= {fields["unique_dataset_id"]}  # Column only used for filling `id_dataset`
+
         select_stmt = (
             sa.select(
                 *[transient_table.c[field.dest_field] for field in insert_fields],

diff --git a/backend/geonature/core/imports/checks/dataframe/core.py b/backend/geonature/core/imports/checks/dataframe/core.py
@@ -229,6 +229,7 @@ def check_datasets(
         datasets = {
             ds.unique_dataset_id.hex: ds
             for ds in TDatasets.query.filter(TDatasets.unique_dataset_id.in_(uuid))
+            .options(sa.orm.joinedload(TDatasets.nomenclature_data_origin))
             .options(sa.orm.raiseload("*"))
             .all()
         }

diff --git a/.../geonature/migrations/versions/imports/6e1852ecfea2_add_column_unique_dataset_id_to_t_.py b/.../geonature/migrations/versions/imports/6e1852ecfea2_add_column_unique_dataset_id_to_t_.py
@@ -0,0 +1,219 @@
+"""add column unique_dataset_id to t_imports_synthese and insert into bib_fields and cor_entity_field
+
+Revision ID: 6e1852ecfea2
+Revises: fe3d0b49ee14
+Create Date: 2024-03-04 12:31:00.861460
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import UUID
+from sqlalchemy.schema import Table, MetaData
+
+# revision identifiers, used by Alembic.
+revision = "6e1852ecfea2"
+down_revision = "8b149244d586"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    meta = MetaData(bind=op.get_bind())
+
+    # Add columns to t_imports_synthese table
+    with op.batch_alter_table("t_imports_synthese", schema="gn_imports") as batch_op:
+        batch_op.add_column(sa.Column("src_unique_dataset_id", sa.String))
+        batch_op.add_column(sa.Column("unique_dataset_id", UUID(as_uuid=True)))
+        batch_op.add_column(sa.Column("id_dataset", sa.Integer))
+    # Fetch id_destination for 'synthese' from bib_destinations table
+    destination = Table("bib_destinations", meta, autoload=True, schema="gn_imports")
+    id_dest_synthese = (
+        op.get_bind()
+        .execute(sa.select([destination.c.id_destination]).where(destination.c.code == "synthese"))
+        .scalar()
+    )
+    # Fetch id_entity_observation for id_destination from bib_entities table
+    entity = Table("bib_entities", meta, autoload=True, schema="gn_imports")
+    id_entity_observation = (
+        op.get_bind()
+        .execute(sa.select([entity.c.id_entity]).where(entity.c.id_destination == id_dest_synthese))
+        .scalar()
+    )
+
+    # Fetch id_theme_general from bib_themes table
+    theme = Table("bib_themes", meta, autoload=True, schema="gn_imports")
+    id_theme_general = (
+        op.get_bind()
+        .execute(sa.select([theme.c.id_theme]).where(theme.c.name_theme == "general_info"))
+        .scalar()
+    )
+
+    # Fetch id_field for 'unique_dataset_id' from bib_fields table
+    field = Table("bib_fields", meta, autoload=True, schema="gn_imports")
+    list_field_to_insert = [
+        (
+            {
+                "name_field": "unique_dataset_id",
+                "fr_label": "Identifiant JDD (UUID)",
+                "mandatory": False,
+                "autogenerated": False,
+                "display": True,
+                "mnemonique": None,
+                "source_field": "src_unique_dataset_id",
+                "dest_field": "unique_dataset_id",
+            },
+            {
+                id_entity_observation: {
+                    "id_theme": id_theme_general,
+                    "order_field": 3,
+                    "comment": "Correspondance champs standard: metadonneeId ou jddMetaId",
+                },
+            },
+        ),
+        (
+            {
+                "name_field": "id_dataset",
+                "fr_label": "Identifiant JDD",
+                "mandatory": False,
+                "autogenerated": False,
+                "display": False,
+                "mnemonique": None,
+                "source_field": None,
+                "dest_field": "id_dataset",
+            },
+            {
+                id_entity_observation: {
+                    "id_theme": id_theme_general,
+                    "order_field": 3,
+                    "comment": "",
+                },
+            },
+        ),
+    ]
+    # insert_data = {"id_destination": id_dest_synthese, **field_unique_dataset_id_info}
+
+    id_fields = [
+        id_field
+        for id_field, in op.get_bind()
+        .execute(
+            sa.insert(field)
+            .values(
+                [{"id_destination": id_dest_synthese, **field} for field, _ in list_field_to_insert]
+            )
+            .returning(field.c.id_field)
+        )
+        .fetchall()
+    ]
+
+    # Insert data into cor_entity_field table
+    cor_entity_field = Table("cor_entity_field", meta, autoload=True, schema="gn_imports")
+    cor_entity_field = Table("cor_entity_field", meta, autoload=True, schema="gn_imports")
+    op.execute(
+        sa.insert(cor_entity_field).values(
+            [
+                {"id_entity": id_entity, "id_field": id_field, **props}
+                for id_field, field_entities in zip(id_fields, list_field_to_insert)
+                for id_entity, props in field_entities[1].items()
+            ]
+        )
+    )
+
+    # Update model contentmapping to add unique_dataset_id
+    t_mappings = Table("t_mappings", meta, autoload=True, schema="gn_imports")
+
+    id_t_mapping_synthese = (
+        op.get_bind()
+        .execute(sa.select([t_mappings.c.id]).where(t_mappings.c.label == "Synthese GeoNature"))
+        .scalar()
+    )
+
+    update_query = sa.text(
+        """
+        UPDATE gn_imports.t_fieldmappings
+        SET values = values::jsonb || '{"unique_dataset_id": "unique_dataset_id"}'::jsonb
+        WHERE id = :id_t_mapping_synthese
+        """
+    )
+
+    op.get_bind().execute(update_query, id_t_mapping_synthese=id_t_mapping_synthese)
+
+
+def downgrade():
+    meta = MetaData(bind=op.get_bind())
+
+    # Drop columns from t_imports_synthese table
+    with op.batch_alter_table("t_imports_synthese", schema="gn_imports") as batch_op:
+        batch_op.drop_column("unique_dataset_id")
+        batch_op.drop_column("src_unique_dataset_id")
+        batch_op.drop_column("id_dataset")
+
+    # Fetch id_destination for 'synthese' from bib_destinations table
+    destination = Table("bib_destinations", meta, autoload=True, schema="gn_imports")
+    id_dest_synthese = (
+        op.get_bind()
+        .execute(sa.select([destination.c.id_destination]).where(destination.c.code == "synthese"))
+        .scalar()
+    )
+
+    # Fetch id_entity_observation for id_destination from bib_entities table
+    entity = Table("bib_entities", meta, autoload=True, schema="gn_imports")
+    id_entity_observation = (
+        op.get_bind()
+        .execute(sa.select([entity.c.id_entity]).where(entity.c.id_destination == id_dest_synthese))
+        .scalar()
+    )
+
+    # Fetch id_fields inserted into bib_fields table
+    field = Table("bib_fields", meta, autoload=True, schema="gn_imports")
+    id_fields = (
+        op.get_bind()
+        .execute(
+            sa.select([field.c.id_field]).where(
+                sa.or_(
+                    sa.and_(
+                        field.c.name_field == "unique_dataset_id",
+                        field.c.id_destination == id_dest_synthese,
+                    ),
+                    sa.and_(
+                        field.c.name_field == "id_dataset",
+                        field.c.id_destination == id_dest_synthese,
+                    ),
+                )
+            )
+        )
+        .scalars()
+        .all()
+    )
+
+    # Delete rows from cor_entity_field based on matching list of id_fields
+    cor_entity_field = Table("cor_entity_field", meta, autoload=True, schema="gn_imports")
+    op.execute(
+        cor_entity_field.delete().where(
+            sa.and_(
+                cor_entity_field.c.id_entity == id_entity_observation,
+                cor_entity_field.c.id_field.in_(id_fields),
+            )
+        )
+    )
+
+    op.execute(field.delete().where(field.c.id_field.in_(id_fields)))
+
+    t_mappings = Table("t_mappings", meta, autoload=True, schema="gn_imports")
+
+    # Get the ID of the "Synthese GeoNature" mapping
+    id_t_mapping_synthese = (
+        op.get_bind()
+        .execute(sa.select([t_mappings.c.id]).where(t_mappings.c.label == "Synthese GeoNature"))
+        .scalar()
+    )
+
+    revert_query = sa.text(
+        """
+        UPDATE gn_imports.t_fieldmappings
+        SET values = values::jsonb - 'unique_dataset_id'
+        WHERE id = :id_t_mapping_synthese
+        """
+    )
+
+    op.get_bind().execute(revert_query, id_t_mapping_synthese=id_t_mapping_synthese)
diff --git a/backend/geonature/tests/imports/files/synthese/jdd_to_import_file.csv b/backend/geonature/tests/imports/files/synthese/jdd_to_import_file.csv
@@ -0,0 +1,5 @@
+error;id_synthese;id_origine;comment_releve;comment_occurrence;date_debut;date_fin;heure_debut;heure_fin;cd_nom;cd_ref;nom_valide;nom_vernaculaire;nom_cite;regne;group1_inpn;group2_inpn;classe;ordre;famille;rang_taxo;nombre_min;nombre_max;alti_min;alti_max;prof_min;prof_max;observateurs;determinateur;communes;geometrie_wkt_4326;x_centroid_4326;y_centroid_4326;nom_lieu;validateur;niveau_validation;date_validation;comment_validation;preuve_numerique_url;preuve_non_numerique;jdd_nom;jdd_uuid;jdd_id;ca_nom;ca_uuid;ca_id;cd_habref;cd_habitat;nom_habitat;precision_geographique;nature_objet_geo;type_regroupement;methode_regroupement;technique_observation;biologique_statut;etat_biologique;biogeographique_statut;naturalite;preuve_existante;niveau_precision_diffusion;stade_vie;sexe;objet_denombrement;type_denombrement;niveau_sensibilite;statut_observation;floutage_dee;statut_source;type_info_geo;methode_determination;comportement;reference_biblio;uuid_perm_sinp;uuid_perm_grp_sinp;date_creation;date_modification;unique_dataset_id
+valid;1;1;Relevé n°1;Occurrence n°1;2017-01-01;2017-01-01;12:05:02;12:05:02;60612;60612;Lynx lynx (Linnaeus, 1758);;Lynx Boréal;Animalia;Chordés;Mammifères;Mammalia;Carnivora;Felidae;ES;5;5;1500;1565;;;Administrateur test;Gil;Vallouise-Pelvoux;POINT(6.5 44.85);6.5;44.85;;;En attente de validation;;;;Poil;Contact aléatoire tous règnes confondus;4d331cae-65e4-4948-b0b2-a11bc5bb46c2;1;Données d'observation de la faune, de la Flore et de la fonge du Parc national des Ecrins;57b7d0f2-4183-4b7b-8f08-6e105d476dc5;1;;;;10;Inventoriel;OBS;;Galerie/terrier;Non renseigné;Non renseigné;Non renseigné;Sauvage;Oui;Précise;Adulte;Femelle;Individu;Compté;Non sensible - Diffusion précise;Présent;Non;Terrain;Géoréférencement;Autre méthode de détermination;Non renseigné;;b4f85a2e-dd88-4cdd-aa86-f1c7370faf3f;5b427c76-bd8c-4103-a33c-884c7037aa2b;2021-01-11 14:20:46.492497;2021-01-11 14:20:46.492497;VALID_DATASET_UUID
+valid;2;2;Relevé n°2;Occurrence n°2;2017-01-01;2017-01-02;12:05:02;12:05:02;351;351;Rana temporaria Linnaeus, 1758;Grenouille rousse (La);Grenouille rousse;Animalia;Chordés;Amphibiens;Amphibia;Anura;Ranidae;ES;1;1;1500;1565;;;Administrateur test;Théo;Vallouise-Pelvoux;POINT(6.5 44.85);6.5;44.85;;;En attente de validation;;;;Poils de plumes;Contact aléatoire tous règnes confondus;4d331cae-65e4-4948-b0b2-a11bc5bb46c2;1;Données d'observation de la faune, de la Flore et de la fonge du Parc national des Ecrins;57b7d0f2-4183-4b7b-8f08-6e105d476dc5;1;;;;10;Inventoriel;OBS;;Galerie/terrier;Non renseigné;Non renseigné;Non renseigné;Sauvage;Oui;Précise;Immature;Femelle;Individu;Compté;Non sensible - Diffusion précise;Présent;Non;Terrain;Géoréférencement;Autre méthode de détermination;Non renseigné;;830c93c7-288e-40f0-a17f-15fbb50e643a;5b427c76-bd8c-4103-a33c-884c7037aa2b;2021-01-11 14:20:46.492497;2021-01-11 14:20:46.492497;
+DATASET_NOT_AUTHORIZED(unique_dataset_id);3;3;Relevé n°3;Occurrence n°3;2017-01-08;;;;67111;67111;Alburnus alburnus (Linnaeus, 1758);Ablette;Ablette;Animalia;Chordés;Poissons;Actinopterygii;Cypriniformes;Leuciscidae;ES;1;1;1600;1600;;;Administrateur test;Donovan;Vallouise-Pelvoux;POINT(6.5 44.85);6.5;44.85;;;En attente de validation;;;;Poils de plumes;Contact aléatoire tous règnes confondus;4d331cae-65e4-4948-b0b2-a11bc5bb46c2;1;Données d'observation de la faune, de la Flore et de la fonge du Parc national des Ecrins;57b7d0f2-4183-4b7b-8f08-6e105d476dc5;1;;;;100;Inventoriel;OBS;;Galerie/terrier;Non renseigné;Non renseigné;Non renseigné;Sauvage;Oui;Précise;Juvénile;Femelle;Individu;Compté;Non sensible - Diffusion précise;Présent;Non;Terrain;Géoréférencement;Autre méthode de détermination;Non renseigné;;2f92f91a-64a2-4684-90e4-140466bb34e3;5937d0f2-c96d-424b-bea4-9e3fdac894ed;2021-01-11 14:20:46.492497;2021-01-11 14:20:46.492497;FORBIDDEN_DATASET_UUID
+INVALID_UUID(unique_dataset_id);6;6;Relevé n°6;Occurrence n°6;2017-01-01;2017-01-01;12:05:02;12:05:02;351;351;Rana temporaria Linnaeus, 1758;Grenouille rousse (La);Grenouille rousse;Animalia;Chordés;Amphibiens;Amphibia;Anura;Ranidae;ES;1;1;1600;1600;;;Administrateur test;Donovan;Vallouise-Pelvoux;POINT(6.5 44.85);6.5;44.85;;;En attente de validation;;;;Poils de plumes;Contact aléatoire tous règnes confondus;4d331cae-65e4-4948-b0b2-a11bc5bb46c2;1;Données d'observation de la faune, de la Flore et de la fonge du Parc national des Ecrins;57b7d0f2-4183-4b7b-8f08-6e105d476dc5;1;;;;100;Inventoriel;OBS;;Galerie/terrier;Non renseigné;Non renseigné;Non renseigné;Sauvage;Oui;Précise;Juvénile;Femelle;Individu;Compté;Non sensible - Diffusion précise;Présent;Non;Terrain;Géoréférencement;Autre méthode de détermination;Non renseigné;;f5515e2a-b30d-11eb-8cc8-af8c2d0867b4;5937d0f2-c96d-424b-bea4-9e3fdac894ed;2021-01-11 14:20:46.492497;2021-01-11 14:20:46.492497;050d613c-543f-47fd-800a-13931b2721c7
diff --git a/backend/geonature/tests/imports/jsonschema_definitions.py b/backend/geonature/tests/imports/jsonschema_definitions.py
@@ -24,10 +24,16 @@
                 "type": "string",
             },
             "eng_label": {
-                "type": "string",
+                "type": [
+                    "string",
+                    "null",
+                ],
             },
             "desc_field": {
-                "type": "string",
+                "type": [
+                    "string",
+                    "null",
+                ],
             },
             "type_field": {
                 "type": "string",

diff --git a/backend/geonature/tests/imports/test_imports_synthese.py b/backend/geonature/tests/imports/test_imports_synthese.py
@@ -1,4 +1,4 @@
-from io import StringIO
+from io import StringIO, BytesIO
 from pathlib import Path
 from functools import partial
 from operator import or_
@@ -143,11 +143,24 @@ def new_import(synthese_destination, users, import_dataset):
 
 
 @pytest.fixture()
-def uploaded_import(new_import, import_file_name):
+def uploaded_import(new_import, datasets, import_file_name):
     with db.session.begin_nested():
         with open(tests_path / "files" / "synthese" / import_file_name, "rb") as f:
-            new_import.source_file = f.read()
-            new_import.full_file_name = "valid_file.csv"
+            f.seek(0)
+            content = f.read()
+            if import_file_name == "jdd_to_import_file.csv":
+                content = content.replace(
+                    b"VALID_DATASET_UUID",
+                    datasets["own_dataset"].unique_dataset_id.hex.encode("ascii"),
+                )
+                content = content.replace(
+                    b"FORBIDDEN_DATASET_UUID",
+                    datasets["orphan_dataset"].unique_dataset_id.hex.encode("ascii"),
+                )
+                new_import.full_file_name = "jdd_to_import_file.csv"
+            else:
+                new_import.full_file_name = "valid_file.csv"
+            new_import.source_file = content
     return new_import
 
 
@@ -174,7 +187,7 @@ def decoded_import(client, uploaded_import):
 
 @pytest.fixture()
 def fieldmapping(import_file_name, autogenerate):
-    if import_file_name == "valid_file.csv":
+    if import_file_name in ["valid_file.csv", "jdd_to_import_file.csv"]:
         return FieldMapping.query.filter_by(label="Synthese GeoNature").one().values
     else:
         return {
@@ -1271,3 +1284,15 @@ def test_import_compare_error_line_with_csv(self, users, imported_import, import
             assert int(source_row["line_number"]) == erroneous_line_number
             # and this is the test purpose assert:
             assert error_row == source_row
+
+    @pytest.mark.parametrize("import_file_name", ["jdd_to_import_file.csv"])
+    def test_import_jdd_file(self, imported_import):
+        assert_import_errors(
+            imported_import,
+            {
+                # id_dataset errors
+                # The line 2 should not be error (should be the one selected jdd default)
+                ("DATASET_NOT_AUTHORIZED", "unique_dataset_id", frozenset({2, 4})),
+                ("DATASET_NOT_FOUND", "unique_dataset_id", frozenset({5})),
+            },
+        )