diff --git a/backend/geonature/core/gn_synthese/imports/synthese_import_mixin.py b/backend/geonature/core/gn_synthese/imports/synthese_import_mixin.py index 4a72865440..ffa25be91d 100644 --- a/backend/geonature/core/gn_synthese/imports/synthese_import_mixin.py +++ b/backend/geonature/core/gn_synthese/imports/synthese_import_mixin.py @@ -1,5 +1,6 @@ from math import ceil + from geonature.core.imports.import_mixin import ImportMixin, ImportStatisticsLabels, ImportInputUrl from apptax.taxonomie.models import Taxref @@ -23,6 +24,7 @@ check_types, check_geography, check_counts, + check_datasets, ) from geonature.core.imports.checks.sql import ( do_nomenclatures_mapping, @@ -97,7 +99,7 @@ def check_transient_data(task, logger, imprt: TImports): def update_batch_progress(batch, step): start = 0.1 end = 0.4 - step_count = 7 + step_count = 8 progress = start + ((batch + 1) / batch_count) * (step / step_count) * (end - start) task.update_state(state="PROGRESS", meta={"progress": progress}) @@ -141,6 +143,17 @@ def update_batch_progress(batch, step): updated_cols |= check_types(imprt, entity, df, fields) update_batch_progress(batch, 4) + logger.info(f"[{batch+1}/{batch_count}] Check dataset rows") + with start_sentry_child(op="check.df", description="check datasets rows"): + updated_cols |= check_datasets( + imprt, + entity, + df, + uuid_field=fields["unique_dataset_id"], + id_field=fields["id_dataset"], + module_code="SYNTHESE", + ) + update_batch_progress(batch, 5) logger.info(f"[{batch+1}/{batch_count}] Check geography…") with start_sentry_child(op="check.df", description="set geography"): updated_cols |= check_geography( @@ -157,7 +170,7 @@ def update_batch_progress(batch, step): codemaille_field=fields["codemaille"], codedepartement_field=fields["codedepartement"], ) - update_batch_progress(batch, 5) + update_batch_progress(batch, 6) logger.info(f"[{batch+1}/{batch_count}] Check counts…") with start_sentry_child(op="check.df", description="check count"): @@ -169,12 +182,12 @@ def update_batch_progress(batch, step): fields["count_max"], default_count=current_app.config["IMPORT"]["DEFAULT_COUNT_VALUE"], ) - update_batch_progress(batch, 6) + update_batch_progress(batch, 7) logger.info(f"[{batch+1}/{batch_count}] Updating import data from dataframe…") with start_sentry_child(op="check.df", description="save dataframe"): update_transient_data_from_dataframe(imprt, entity, updated_cols, df) - update_batch_progress(batch, 7) + update_batch_progress(batch, 8) # Checks in SQL convert_geom_columns( @@ -335,6 +348,8 @@ def import_data_to_destination(imprt: TImports) -> None: if source_field in imprt.columns: insert_fields |= {field} + insert_fields -= {fields["unique_dataset_id"]} # Column only used for filling `id_dataset` + select_stmt = ( sa.select( *[transient_table.c[field.dest_field] for field in insert_fields], diff --git a/backend/geonature/core/imports/checks/dataframe/core.py b/backend/geonature/core/imports/checks/dataframe/core.py index f80393cf04..de644404fa 100644 --- a/backend/geonature/core/imports/checks/dataframe/core.py +++ b/backend/geonature/core/imports/checks/dataframe/core.py @@ -229,6 +229,7 @@ def check_datasets( datasets = { ds.unique_dataset_id.hex: ds for ds in TDatasets.query.filter(TDatasets.unique_dataset_id.in_(uuid)) + .options(sa.orm.joinedload(TDatasets.nomenclature_data_origin)) .options(sa.orm.raiseload("*")) .all() } diff --git a/backend/geonature/migrations/versions/imports/6e1852ecfea2_add_column_unique_dataset_id_to_t_.py b/backend/geonature/migrations/versions/imports/6e1852ecfea2_add_column_unique_dataset_id_to_t_.py new file mode 100644 index 0000000000..c712869ea0 --- /dev/null +++ b/backend/geonature/migrations/versions/imports/6e1852ecfea2_add_column_unique_dataset_id_to_t_.py @@ -0,0 +1,219 @@ +"""add column unique_dataset_id to t_imports_synthese and insert into bib_fields and cor_entity_field + +Revision ID: 6e1852ecfea2 +Revises: fe3d0b49ee14 +Create Date: 2024-03-04 12:31:00.861460 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.schema import Table, MetaData + +# revision identifiers, used by Alembic. +revision = "6e1852ecfea2" +down_revision = "8b149244d586" +branch_labels = None +depends_on = None + + +def upgrade(): + meta = MetaData(bind=op.get_bind()) + + # Add columns to t_imports_synthese table + with op.batch_alter_table("t_imports_synthese", schema="gn_imports") as batch_op: + batch_op.add_column(sa.Column("src_unique_dataset_id", sa.String)) + batch_op.add_column(sa.Column("unique_dataset_id", UUID(as_uuid=True))) + batch_op.add_column(sa.Column("id_dataset", sa.Integer)) + # Fetch id_destination for 'synthese' from bib_destinations table + destination = Table("bib_destinations", meta, autoload=True, schema="gn_imports") + id_dest_synthese = ( + op.get_bind() + .execute(sa.select([destination.c.id_destination]).where(destination.c.code == "synthese")) + .scalar() + ) + # Fetch id_entity_observation for id_destination from bib_entities table + entity = Table("bib_entities", meta, autoload=True, schema="gn_imports") + id_entity_observation = ( + op.get_bind() + .execute(sa.select([entity.c.id_entity]).where(entity.c.id_destination == id_dest_synthese)) + .scalar() + ) + + # Fetch id_theme_general from bib_themes table + theme = Table("bib_themes", meta, autoload=True, schema="gn_imports") + id_theme_general = ( + op.get_bind() + .execute(sa.select([theme.c.id_theme]).where(theme.c.name_theme == "general_info")) + .scalar() + ) + + # Fetch id_field for 'unique_dataset_id' from bib_fields table + field = Table("bib_fields", meta, autoload=True, schema="gn_imports") + list_field_to_insert = [ + ( + { + "name_field": "unique_dataset_id", + "fr_label": "Identifiant JDD (UUID)", + "mandatory": False, + "autogenerated": False, + "display": True, + "mnemonique": None, + "source_field": "src_unique_dataset_id", + "dest_field": "unique_dataset_id", + }, + { + id_entity_observation: { + "id_theme": id_theme_general, + "order_field": 3, + "comment": "Correspondance champs standard: metadonneeId ou jddMetaId", + }, + }, + ), + ( + { + "name_field": "id_dataset", + "fr_label": "Identifiant JDD", + "mandatory": False, + "autogenerated": False, + "display": False, + "mnemonique": None, + "source_field": None, + "dest_field": "id_dataset", + }, + { + id_entity_observation: { + "id_theme": id_theme_general, + "order_field": 3, + "comment": "", + }, + }, + ), + ] + # insert_data = {"id_destination": id_dest_synthese, **field_unique_dataset_id_info} + + id_fields = [ + id_field + for id_field, in op.get_bind() + .execute( + sa.insert(field) + .values( + [{"id_destination": id_dest_synthese, **field} for field, _ in list_field_to_insert] + ) + .returning(field.c.id_field) + ) + .fetchall() + ] + + # Insert data into cor_entity_field table + cor_entity_field = Table("cor_entity_field", meta, autoload=True, schema="gn_imports") + cor_entity_field = Table("cor_entity_field", meta, autoload=True, schema="gn_imports") + op.execute( + sa.insert(cor_entity_field).values( + [ + {"id_entity": id_entity, "id_field": id_field, **props} + for id_field, field_entities in zip(id_fields, list_field_to_insert) + for id_entity, props in field_entities[1].items() + ] + ) + ) + + # Update model contentmapping to add unique_dataset_id + t_mappings = Table("t_mappings", meta, autoload=True, schema="gn_imports") + + id_t_mapping_synthese = ( + op.get_bind() + .execute(sa.select([t_mappings.c.id]).where(t_mappings.c.label == "Synthese GeoNature")) + .scalar() + ) + + update_query = sa.text( + """ + UPDATE gn_imports.t_fieldmappings + SET values = values::jsonb || '{"unique_dataset_id": "unique_dataset_id"}'::jsonb + WHERE id = :id_t_mapping_synthese + """ + ) + + op.get_bind().execute(update_query, id_t_mapping_synthese=id_t_mapping_synthese) + + +def downgrade(): + meta = MetaData(bind=op.get_bind()) + + # Drop columns from t_imports_synthese table + with op.batch_alter_table("t_imports_synthese", schema="gn_imports") as batch_op: + batch_op.drop_column("unique_dataset_id") + batch_op.drop_column("src_unique_dataset_id") + batch_op.drop_column("id_dataset") + + # Fetch id_destination for 'synthese' from bib_destinations table + destination = Table("bib_destinations", meta, autoload=True, schema="gn_imports") + id_dest_synthese = ( + op.get_bind() + .execute(sa.select([destination.c.id_destination]).where(destination.c.code == "synthese")) + .scalar() + ) + + # Fetch id_entity_observation for id_destination from bib_entities table + entity = Table("bib_entities", meta, autoload=True, schema="gn_imports") + id_entity_observation = ( + op.get_bind() + .execute(sa.select([entity.c.id_entity]).where(entity.c.id_destination == id_dest_synthese)) + .scalar() + ) + + # Fetch id_fields inserted into bib_fields table + field = Table("bib_fields", meta, autoload=True, schema="gn_imports") + id_fields = ( + op.get_bind() + .execute( + sa.select([field.c.id_field]).where( + sa.or_( + sa.and_( + field.c.name_field == "unique_dataset_id", + field.c.id_destination == id_dest_synthese, + ), + sa.and_( + field.c.name_field == "id_dataset", + field.c.id_destination == id_dest_synthese, + ), + ) + ) + ) + .scalars() + .all() + ) + + # Delete rows from cor_entity_field based on matching list of id_fields + cor_entity_field = Table("cor_entity_field", meta, autoload=True, schema="gn_imports") + op.execute( + cor_entity_field.delete().where( + sa.and_( + cor_entity_field.c.id_entity == id_entity_observation, + cor_entity_field.c.id_field.in_(id_fields), + ) + ) + ) + + op.execute(field.delete().where(field.c.id_field.in_(id_fields))) + + t_mappings = Table("t_mappings", meta, autoload=True, schema="gn_imports") + + # Get the ID of the "Synthese GeoNature" mapping + id_t_mapping_synthese = ( + op.get_bind() + .execute(sa.select([t_mappings.c.id]).where(t_mappings.c.label == "Synthese GeoNature")) + .scalar() + ) + + revert_query = sa.text( + """ + UPDATE gn_imports.t_fieldmappings + SET values = values::jsonb - 'unique_dataset_id' + WHERE id = :id_t_mapping_synthese + """ + ) + + op.get_bind().execute(revert_query, id_t_mapping_synthese=id_t_mapping_synthese) diff --git a/backend/geonature/tests/imports/files/synthese/jdd_to_import_file.csv b/backend/geonature/tests/imports/files/synthese/jdd_to_import_file.csv new file mode 100644 index 0000000000..df362043f2 --- /dev/null +++ b/backend/geonature/tests/imports/files/synthese/jdd_to_import_file.csv @@ -0,0 +1,5 @@ +error;id_synthese;id_origine;comment_releve;comment_occurrence;date_debut;date_fin;heure_debut;heure_fin;cd_nom;cd_ref;nom_valide;nom_vernaculaire;nom_cite;regne;group1_inpn;group2_inpn;classe;ordre;famille;rang_taxo;nombre_min;nombre_max;alti_min;alti_max;prof_min;prof_max;observateurs;determinateur;communes;geometrie_wkt_4326;x_centroid_4326;y_centroid_4326;nom_lieu;validateur;niveau_validation;date_validation;comment_validation;preuve_numerique_url;preuve_non_numerique;jdd_nom;jdd_uuid;jdd_id;ca_nom;ca_uuid;ca_id;cd_habref;cd_habitat;nom_habitat;precision_geographique;nature_objet_geo;type_regroupement;methode_regroupement;technique_observation;biologique_statut;etat_biologique;biogeographique_statut;naturalite;preuve_existante;niveau_precision_diffusion;stade_vie;sexe;objet_denombrement;type_denombrement;niveau_sensibilite;statut_observation;floutage_dee;statut_source;type_info_geo;methode_determination;comportement;reference_biblio;uuid_perm_sinp;uuid_perm_grp_sinp;date_creation;date_modification;unique_dataset_id +valid;1;1;Relevé n°1;Occurrence n°1;2017-01-01;2017-01-01;12:05:02;12:05:02;60612;60612;Lynx lynx (Linnaeus, 1758);;Lynx Boréal;Animalia;Chordés;Mammifères;Mammalia;Carnivora;Felidae;ES;5;5;1500;1565;;;Administrateur test;Gil;Vallouise-Pelvoux;POINT(6.5 44.85);6.5;44.85;;;En attente de validation;;;;Poil;Contact aléatoire tous règnes confondus;4d331cae-65e4-4948-b0b2-a11bc5bb46c2;1;Données d'observation de la faune, de la Flore et de la fonge du Parc national des Ecrins;57b7d0f2-4183-4b7b-8f08-6e105d476dc5;1;;;;10;Inventoriel;OBS;;Galerie/terrier;Non renseigné;Non renseigné;Non renseigné;Sauvage;Oui;Précise;Adulte;Femelle;Individu;Compté;Non sensible - Diffusion précise;Présent;Non;Terrain;Géoréférencement;Autre méthode de détermination;Non renseigné;;b4f85a2e-dd88-4cdd-aa86-f1c7370faf3f;5b427c76-bd8c-4103-a33c-884c7037aa2b;2021-01-11 14:20:46.492497;2021-01-11 14:20:46.492497;VALID_DATASET_UUID +valid;2;2;Relevé n°2;Occurrence n°2;2017-01-01;2017-01-02;12:05:02;12:05:02;351;351;Rana temporaria Linnaeus, 1758;Grenouille rousse (La);Grenouille rousse;Animalia;Chordés;Amphibiens;Amphibia;Anura;Ranidae;ES;1;1;1500;1565;;;Administrateur test;Théo;Vallouise-Pelvoux;POINT(6.5 44.85);6.5;44.85;;;En attente de validation;;;;Poils de plumes;Contact aléatoire tous règnes confondus;4d331cae-65e4-4948-b0b2-a11bc5bb46c2;1;Données d'observation de la faune, de la Flore et de la fonge du Parc national des Ecrins;57b7d0f2-4183-4b7b-8f08-6e105d476dc5;1;;;;10;Inventoriel;OBS;;Galerie/terrier;Non renseigné;Non renseigné;Non renseigné;Sauvage;Oui;Précise;Immature;Femelle;Individu;Compté;Non sensible - Diffusion précise;Présent;Non;Terrain;Géoréférencement;Autre méthode de détermination;Non renseigné;;830c93c7-288e-40f0-a17f-15fbb50e643a;5b427c76-bd8c-4103-a33c-884c7037aa2b;2021-01-11 14:20:46.492497;2021-01-11 14:20:46.492497; +DATASET_NOT_AUTHORIZED(unique_dataset_id);3;3;Relevé n°3;Occurrence n°3;2017-01-08;;;;67111;67111;Alburnus alburnus (Linnaeus, 1758);Ablette;Ablette;Animalia;Chordés;Poissons;Actinopterygii;Cypriniformes;Leuciscidae;ES;1;1;1600;1600;;;Administrateur test;Donovan;Vallouise-Pelvoux;POINT(6.5 44.85);6.5;44.85;;;En attente de validation;;;;Poils de plumes;Contact aléatoire tous règnes confondus;4d331cae-65e4-4948-b0b2-a11bc5bb46c2;1;Données d'observation de la faune, de la Flore et de la fonge du Parc national des Ecrins;57b7d0f2-4183-4b7b-8f08-6e105d476dc5;1;;;;100;Inventoriel;OBS;;Galerie/terrier;Non renseigné;Non renseigné;Non renseigné;Sauvage;Oui;Précise;Juvénile;Femelle;Individu;Compté;Non sensible - Diffusion précise;Présent;Non;Terrain;Géoréférencement;Autre méthode de détermination;Non renseigné;;2f92f91a-64a2-4684-90e4-140466bb34e3;5937d0f2-c96d-424b-bea4-9e3fdac894ed;2021-01-11 14:20:46.492497;2021-01-11 14:20:46.492497;FORBIDDEN_DATASET_UUID +INVALID_UUID(unique_dataset_id);6;6;Relevé n°6;Occurrence n°6;2017-01-01;2017-01-01;12:05:02;12:05:02;351;351;Rana temporaria Linnaeus, 1758;Grenouille rousse (La);Grenouille rousse;Animalia;Chordés;Amphibiens;Amphibia;Anura;Ranidae;ES;1;1;1600;1600;;;Administrateur test;Donovan;Vallouise-Pelvoux;POINT(6.5 44.85);6.5;44.85;;;En attente de validation;;;;Poils de plumes;Contact aléatoire tous règnes confondus;4d331cae-65e4-4948-b0b2-a11bc5bb46c2;1;Données d'observation de la faune, de la Flore et de la fonge du Parc national des Ecrins;57b7d0f2-4183-4b7b-8f08-6e105d476dc5;1;;;;100;Inventoriel;OBS;;Galerie/terrier;Non renseigné;Non renseigné;Non renseigné;Sauvage;Oui;Précise;Juvénile;Femelle;Individu;Compté;Non sensible - Diffusion précise;Présent;Non;Terrain;Géoréférencement;Autre méthode de détermination;Non renseigné;;f5515e2a-b30d-11eb-8cc8-af8c2d0867b4;5937d0f2-c96d-424b-bea4-9e3fdac894ed;2021-01-11 14:20:46.492497;2021-01-11 14:20:46.492497;050d613c-543f-47fd-800a-13931b2721c7 diff --git a/backend/geonature/tests/imports/jsonschema_definitions.py b/backend/geonature/tests/imports/jsonschema_definitions.py index ee161519de..017146dce0 100644 --- a/backend/geonature/tests/imports/jsonschema_definitions.py +++ b/backend/geonature/tests/imports/jsonschema_definitions.py @@ -24,10 +24,16 @@ "type": "string", }, "eng_label": { - "type": "string", + "type": [ + "string", + "null", + ], }, "desc_field": { - "type": "string", + "type": [ + "string", + "null", + ], }, "type_field": { "type": "string", diff --git a/backend/geonature/tests/imports/test_imports_synthese.py b/backend/geonature/tests/imports/test_imports_synthese.py index f080cd2649..a202b4e596 100644 --- a/backend/geonature/tests/imports/test_imports_synthese.py +++ b/backend/geonature/tests/imports/test_imports_synthese.py @@ -1,4 +1,4 @@ -from io import StringIO +from io import StringIO, BytesIO from pathlib import Path from functools import partial from operator import or_ @@ -143,11 +143,24 @@ def new_import(synthese_destination, users, import_dataset): @pytest.fixture() -def uploaded_import(new_import, import_file_name): +def uploaded_import(new_import, datasets, import_file_name): with db.session.begin_nested(): with open(tests_path / "files" / "synthese" / import_file_name, "rb") as f: - new_import.source_file = f.read() - new_import.full_file_name = "valid_file.csv" + f.seek(0) + content = f.read() + if import_file_name == "jdd_to_import_file.csv": + content = content.replace( + b"VALID_DATASET_UUID", + datasets["own_dataset"].unique_dataset_id.hex.encode("ascii"), + ) + content = content.replace( + b"FORBIDDEN_DATASET_UUID", + datasets["orphan_dataset"].unique_dataset_id.hex.encode("ascii"), + ) + new_import.full_file_name = "jdd_to_import_file.csv" + else: + new_import.full_file_name = "valid_file.csv" + new_import.source_file = content return new_import @@ -174,7 +187,7 @@ def decoded_import(client, uploaded_import): @pytest.fixture() def fieldmapping(import_file_name, autogenerate): - if import_file_name == "valid_file.csv": + if import_file_name in ["valid_file.csv", "jdd_to_import_file.csv"]: return FieldMapping.query.filter_by(label="Synthese GeoNature").one().values else: return { @@ -1271,3 +1284,15 @@ def test_import_compare_error_line_with_csv(self, users, imported_import, import assert int(source_row["line_number"]) == erroneous_line_number # and this is the test purpose assert: assert error_row == source_row + + @pytest.mark.parametrize("import_file_name", ["jdd_to_import_file.csv"]) + def test_import_jdd_file(self, imported_import): + assert_import_errors( + imported_import, + { + # id_dataset errors + # The line 2 should not be error (should be the one selected jdd default) + ("DATASET_NOT_AUTHORIZED", "unique_dataset_id", frozenset({2, 4})), + ("DATASET_NOT_FOUND", "unique_dataset_id", frozenset({5})), + }, + )