Skip to content

Commit

Permalink
feat(import,synthese): add the possibility to declare jdd in the impo…
Browse files Browse the repository at this point in the history
…rt file

- Revision alembic to add column unique_dataset_id and src_unique_dataset_id
- Add into bib_fields and into cor_entity_fields
- change the model Synthese Geonature to add unique_dataset_id

Reviewed-by: andriacap

fix: change type of "eng_label" and "desc_label"

In database these two fields are nullable then
change type to allow "None" value

Reviewed-by: andriacap

fix(test): errors with unique_dataset_id

Fix errors based on adding unique_dataset_id

Reviewed-by: andriacap

fix: change revision id (after rebase feat/import)

Reviewed-by: andriacap

fix: nomenclature raise load error

Reviewed-by: andriac

refact: test synthese import jdd

Based on Elie's review

Reviewed-by: andriac

add missing line of code in synthese mixin
  • Loading branch information
andriacap authored and jacquesfize committed Jul 25, 2024
1 parent e6282af commit 4087502
Show file tree
Hide file tree
Showing 6 changed files with 282 additions and 11 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from math import ceil


from geonature.core.imports.import_mixin import ImportMixin, ImportStatisticsLabels, ImportInputUrl

from apptax.taxonomie.models import Taxref
Expand All @@ -23,6 +24,7 @@
check_types,
check_geography,
check_counts,
check_datasets,
)
from geonature.core.imports.checks.sql import (
do_nomenclatures_mapping,
Expand Down Expand Up @@ -97,7 +99,7 @@ def check_transient_data(task, logger, imprt: TImports):
def update_batch_progress(batch, step):
start = 0.1
end = 0.4
step_count = 7
step_count = 8
progress = start + ((batch + 1) / batch_count) * (step / step_count) * (end - start)
task.update_state(state="PROGRESS", meta={"progress": progress})

Expand Down Expand Up @@ -141,6 +143,17 @@ def update_batch_progress(batch, step):
updated_cols |= check_types(imprt, entity, df, fields)
update_batch_progress(batch, 4)

logger.info(f"[{batch+1}/{batch_count}] Check dataset rows")
with start_sentry_child(op="check.df", description="check datasets rows"):
updated_cols |= check_datasets(
imprt,
entity,
df,
uuid_field=fields["unique_dataset_id"],
id_field=fields["id_dataset"],
module_code="SYNTHESE",
)
update_batch_progress(batch, 5)
logger.info(f"[{batch+1}/{batch_count}] Check geography…")
with start_sentry_child(op="check.df", description="set geography"):
updated_cols |= check_geography(
Expand All @@ -157,7 +170,7 @@ def update_batch_progress(batch, step):
codemaille_field=fields["codemaille"],
codedepartement_field=fields["codedepartement"],
)
update_batch_progress(batch, 5)
update_batch_progress(batch, 6)

logger.info(f"[{batch+1}/{batch_count}] Check counts…")
with start_sentry_child(op="check.df", description="check count"):
Expand All @@ -169,12 +182,12 @@ def update_batch_progress(batch, step):
fields["count_max"],
default_count=current_app.config["IMPORT"]["DEFAULT_COUNT_VALUE"],
)
update_batch_progress(batch, 6)
update_batch_progress(batch, 7)

logger.info(f"[{batch+1}/{batch_count}] Updating import data from dataframe…")
with start_sentry_child(op="check.df", description="save dataframe"):
update_transient_data_from_dataframe(imprt, entity, updated_cols, df)
update_batch_progress(batch, 7)
update_batch_progress(batch, 8)

# Checks in SQL
convert_geom_columns(
Expand Down Expand Up @@ -335,6 +348,8 @@ def import_data_to_destination(imprt: TImports) -> None:
if source_field in imprt.columns:
insert_fields |= {field}

insert_fields -= {fields["unique_dataset_id"]} # Column only used for filling `id_dataset`

select_stmt = (
sa.select(
*[transient_table.c[field.dest_field] for field in insert_fields],
Expand Down
1 change: 1 addition & 0 deletions backend/geonature/core/imports/checks/dataframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ def check_datasets(
datasets = {
ds.unique_dataset_id.hex: ds
for ds in TDatasets.query.filter(TDatasets.unique_dataset_id.in_(uuid))
.options(sa.orm.joinedload(TDatasets.nomenclature_data_origin))
.options(sa.orm.raiseload("*"))
.all()
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
"""add column unique_dataset_id to t_imports_synthese and insert into bib_fields and cor_entity_field
Revision ID: 6e1852ecfea2
Revises: fe3d0b49ee14
Create Date: 2024-03-04 12:31:00.861460
"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.schema import Table, MetaData

# revision identifiers, used by Alembic.
revision = "6e1852ecfea2"
down_revision = "8b149244d586"
branch_labels = None
depends_on = None


def upgrade():
meta = MetaData(bind=op.get_bind())

# Add columns to t_imports_synthese table
with op.batch_alter_table("t_imports_synthese", schema="gn_imports") as batch_op:
batch_op.add_column(sa.Column("src_unique_dataset_id", sa.String))
batch_op.add_column(sa.Column("unique_dataset_id", UUID(as_uuid=True)))
batch_op.add_column(sa.Column("id_dataset", sa.Integer))
# Fetch id_destination for 'synthese' from bib_destinations table
destination = Table("bib_destinations", meta, autoload=True, schema="gn_imports")
id_dest_synthese = (
op.get_bind()
.execute(sa.select([destination.c.id_destination]).where(destination.c.code == "synthese"))
.scalar()
)
# Fetch id_entity_observation for id_destination from bib_entities table
entity = Table("bib_entities", meta, autoload=True, schema="gn_imports")
id_entity_observation = (
op.get_bind()
.execute(sa.select([entity.c.id_entity]).where(entity.c.id_destination == id_dest_synthese))
.scalar()
)

# Fetch id_theme_general from bib_themes table
theme = Table("bib_themes", meta, autoload=True, schema="gn_imports")
id_theme_general = (
op.get_bind()
.execute(sa.select([theme.c.id_theme]).where(theme.c.name_theme == "general_info"))
.scalar()
)

# Fetch id_field for 'unique_dataset_id' from bib_fields table
field = Table("bib_fields", meta, autoload=True, schema="gn_imports")
list_field_to_insert = [
(
{
"name_field": "unique_dataset_id",
"fr_label": "Identifiant JDD (UUID)",
"mandatory": False,
"autogenerated": False,
"display": True,
"mnemonique": None,
"source_field": "src_unique_dataset_id",
"dest_field": "unique_dataset_id",
},
{
id_entity_observation: {
"id_theme": id_theme_general,
"order_field": 3,
"comment": "Correspondance champs standard: metadonneeId ou jddMetaId",
},
},
),
(
{
"name_field": "id_dataset",
"fr_label": "Identifiant JDD",
"mandatory": False,
"autogenerated": False,
"display": False,
"mnemonique": None,
"source_field": None,
"dest_field": "id_dataset",
},
{
id_entity_observation: {
"id_theme": id_theme_general,
"order_field": 3,
"comment": "",
},
},
),
]
# insert_data = {"id_destination": id_dest_synthese, **field_unique_dataset_id_info}

id_fields = [
id_field
for id_field, in op.get_bind()
.execute(
sa.insert(field)
.values(
[{"id_destination": id_dest_synthese, **field} for field, _ in list_field_to_insert]
)
.returning(field.c.id_field)
)
.fetchall()
]

# Insert data into cor_entity_field table
cor_entity_field = Table("cor_entity_field", meta, autoload=True, schema="gn_imports")
cor_entity_field = Table("cor_entity_field", meta, autoload=True, schema="gn_imports")
op.execute(
sa.insert(cor_entity_field).values(
[
{"id_entity": id_entity, "id_field": id_field, **props}
for id_field, field_entities in zip(id_fields, list_field_to_insert)
for id_entity, props in field_entities[1].items()
]
)
)

# Update model contentmapping to add unique_dataset_id
t_mappings = Table("t_mappings", meta, autoload=True, schema="gn_imports")

id_t_mapping_synthese = (
op.get_bind()
.execute(sa.select([t_mappings.c.id]).where(t_mappings.c.label == "Synthese GeoNature"))
.scalar()
)

update_query = sa.text(
"""
UPDATE gn_imports.t_fieldmappings
SET values = values::jsonb || '{"unique_dataset_id": "unique_dataset_id"}'::jsonb
WHERE id = :id_t_mapping_synthese
"""
)

op.get_bind().execute(update_query, id_t_mapping_synthese=id_t_mapping_synthese)


def downgrade():
meta = MetaData(bind=op.get_bind())

# Drop columns from t_imports_synthese table
with op.batch_alter_table("t_imports_synthese", schema="gn_imports") as batch_op:
batch_op.drop_column("unique_dataset_id")
batch_op.drop_column("src_unique_dataset_id")
batch_op.drop_column("id_dataset")

# Fetch id_destination for 'synthese' from bib_destinations table
destination = Table("bib_destinations", meta, autoload=True, schema="gn_imports")
id_dest_synthese = (
op.get_bind()
.execute(sa.select([destination.c.id_destination]).where(destination.c.code == "synthese"))
.scalar()
)

# Fetch id_entity_observation for id_destination from bib_entities table
entity = Table("bib_entities", meta, autoload=True, schema="gn_imports")
id_entity_observation = (
op.get_bind()
.execute(sa.select([entity.c.id_entity]).where(entity.c.id_destination == id_dest_synthese))
.scalar()
)

# Fetch id_fields inserted into bib_fields table
field = Table("bib_fields", meta, autoload=True, schema="gn_imports")
id_fields = (
op.get_bind()
.execute(
sa.select([field.c.id_field]).where(
sa.or_(
sa.and_(
field.c.name_field == "unique_dataset_id",
field.c.id_destination == id_dest_synthese,
),
sa.and_(
field.c.name_field == "id_dataset",
field.c.id_destination == id_dest_synthese,
),
)
)
)
.scalars()
.all()
)

# Delete rows from cor_entity_field based on matching list of id_fields
cor_entity_field = Table("cor_entity_field", meta, autoload=True, schema="gn_imports")
op.execute(
cor_entity_field.delete().where(
sa.and_(
cor_entity_field.c.id_entity == id_entity_observation,
cor_entity_field.c.id_field.in_(id_fields),
)
)
)

op.execute(field.delete().where(field.c.id_field.in_(id_fields)))

t_mappings = Table("t_mappings", meta, autoload=True, schema="gn_imports")

# Get the ID of the "Synthese GeoNature" mapping
id_t_mapping_synthese = (
op.get_bind()
.execute(sa.select([t_mappings.c.id]).where(t_mappings.c.label == "Synthese GeoNature"))
.scalar()
)

revert_query = sa.text(
"""
UPDATE gn_imports.t_fieldmappings
SET values = values::jsonb - 'unique_dataset_id'
WHERE id = :id_t_mapping_synthese
"""
)

op.get_bind().execute(revert_query, id_t_mapping_synthese=id_t_mapping_synthese)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
error;id_synthese;id_origine;comment_releve;comment_occurrence;date_debut;date_fin;heure_debut;heure_fin;cd_nom;cd_ref;nom_valide;nom_vernaculaire;nom_cite;regne;group1_inpn;group2_inpn;classe;ordre;famille;rang_taxo;nombre_min;nombre_max;alti_min;alti_max;prof_min;prof_max;observateurs;determinateur;communes;geometrie_wkt_4326;x_centroid_4326;y_centroid_4326;nom_lieu;validateur;niveau_validation;date_validation;comment_validation;preuve_numerique_url;preuve_non_numerique;jdd_nom;jdd_uuid;jdd_id;ca_nom;ca_uuid;ca_id;cd_habref;cd_habitat;nom_habitat;precision_geographique;nature_objet_geo;type_regroupement;methode_regroupement;technique_observation;biologique_statut;etat_biologique;biogeographique_statut;naturalite;preuve_existante;niveau_precision_diffusion;stade_vie;sexe;objet_denombrement;type_denombrement;niveau_sensibilite;statut_observation;floutage_dee;statut_source;type_info_geo;methode_determination;comportement;reference_biblio;uuid_perm_sinp;uuid_perm_grp_sinp;date_creation;date_modification;unique_dataset_id
valid;1;1;Relevé n°1;Occurrence n°1;2017-01-01;2017-01-01;12:05:02;12:05:02;60612;60612;Lynx lynx (Linnaeus, 1758);;Lynx Boréal;Animalia;Chordés;Mammifères;Mammalia;Carnivora;Felidae;ES;5;5;1500;1565;;;Administrateur test;Gil;Vallouise-Pelvoux;POINT(6.5 44.85);6.5;44.85;;;En attente de validation;;;;Poil;Contact aléatoire tous règnes confondus;4d331cae-65e4-4948-b0b2-a11bc5bb46c2;1;Données d'observation de la faune, de la Flore et de la fonge du Parc national des Ecrins;57b7d0f2-4183-4b7b-8f08-6e105d476dc5;1;;;;10;Inventoriel;OBS;;Galerie/terrier;Non renseigné;Non renseigné;Non renseigné;Sauvage;Oui;Précise;Adulte;Femelle;Individu;Compté;Non sensible - Diffusion précise;Présent;Non;Terrain;Géoréférencement;Autre méthode de détermination;Non renseigné;;b4f85a2e-dd88-4cdd-aa86-f1c7370faf3f;5b427c76-bd8c-4103-a33c-884c7037aa2b;2021-01-11 14:20:46.492497;2021-01-11 14:20:46.492497;VALID_DATASET_UUID
valid;2;2;Relevé n°2;Occurrence n°2;2017-01-01;2017-01-02;12:05:02;12:05:02;351;351;Rana temporaria Linnaeus, 1758;Grenouille rousse (La);Grenouille rousse;Animalia;Chordés;Amphibiens;Amphibia;Anura;Ranidae;ES;1;1;1500;1565;;;Administrateur test;Théo;Vallouise-Pelvoux;POINT(6.5 44.85);6.5;44.85;;;En attente de validation;;;;Poils de plumes;Contact aléatoire tous règnes confondus;4d331cae-65e4-4948-b0b2-a11bc5bb46c2;1;Données d'observation de la faune, de la Flore et de la fonge du Parc national des Ecrins;57b7d0f2-4183-4b7b-8f08-6e105d476dc5;1;;;;10;Inventoriel;OBS;;Galerie/terrier;Non renseigné;Non renseigné;Non renseigné;Sauvage;Oui;Précise;Immature;Femelle;Individu;Compté;Non sensible - Diffusion précise;Présent;Non;Terrain;Géoréférencement;Autre méthode de détermination;Non renseigné;;830c93c7-288e-40f0-a17f-15fbb50e643a;5b427c76-bd8c-4103-a33c-884c7037aa2b;2021-01-11 14:20:46.492497;2021-01-11 14:20:46.492497;
DATASET_NOT_AUTHORIZED(unique_dataset_id);3;3;Relevé n°3;Occurrence n°3;2017-01-08;;;;67111;67111;Alburnus alburnus (Linnaeus, 1758);Ablette;Ablette;Animalia;Chordés;Poissons;Actinopterygii;Cypriniformes;Leuciscidae;ES;1;1;1600;1600;;;Administrateur test;Donovan;Vallouise-Pelvoux;POINT(6.5 44.85);6.5;44.85;;;En attente de validation;;;;Poils de plumes;Contact aléatoire tous règnes confondus;4d331cae-65e4-4948-b0b2-a11bc5bb46c2;1;Données d'observation de la faune, de la Flore et de la fonge du Parc national des Ecrins;57b7d0f2-4183-4b7b-8f08-6e105d476dc5;1;;;;100;Inventoriel;OBS;;Galerie/terrier;Non renseigné;Non renseigné;Non renseigné;Sauvage;Oui;Précise;Juvénile;Femelle;Individu;Compté;Non sensible - Diffusion précise;Présent;Non;Terrain;Géoréférencement;Autre méthode de détermination;Non renseigné;;2f92f91a-64a2-4684-90e4-140466bb34e3;5937d0f2-c96d-424b-bea4-9e3fdac894ed;2021-01-11 14:20:46.492497;2021-01-11 14:20:46.492497;FORBIDDEN_DATASET_UUID
INVALID_UUID(unique_dataset_id);6;6;Relevé n°6;Occurrence n°6;2017-01-01;2017-01-01;12:05:02;12:05:02;351;351;Rana temporaria Linnaeus, 1758;Grenouille rousse (La);Grenouille rousse;Animalia;Chordés;Amphibiens;Amphibia;Anura;Ranidae;ES;1;1;1600;1600;;;Administrateur test;Donovan;Vallouise-Pelvoux;POINT(6.5 44.85);6.5;44.85;;;En attente de validation;;;;Poils de plumes;Contact aléatoire tous règnes confondus;4d331cae-65e4-4948-b0b2-a11bc5bb46c2;1;Données d'observation de la faune, de la Flore et de la fonge du Parc national des Ecrins;57b7d0f2-4183-4b7b-8f08-6e105d476dc5;1;;;;100;Inventoriel;OBS;;Galerie/terrier;Non renseigné;Non renseigné;Non renseigné;Sauvage;Oui;Précise;Juvénile;Femelle;Individu;Compté;Non sensible - Diffusion précise;Présent;Non;Terrain;Géoréférencement;Autre méthode de détermination;Non renseigné;;f5515e2a-b30d-11eb-8cc8-af8c2d0867b4;5937d0f2-c96d-424b-bea4-9e3fdac894ed;2021-01-11 14:20:46.492497;2021-01-11 14:20:46.492497;050d613c-543f-47fd-800a-13931b2721c7
10 changes: 8 additions & 2 deletions backend/geonature/tests/imports/jsonschema_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,16 @@
"type": "string",
},
"eng_label": {
"type": "string",
"type": [
"string",
"null",
],
},
"desc_field": {
"type": "string",
"type": [
"string",
"null",
],
},
"type_field": {
"type": "string",
Expand Down
35 changes: 30 additions & 5 deletions backend/geonature/tests/imports/test_imports_synthese.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from io import StringIO
from io import StringIO, BytesIO
from pathlib import Path
from functools import partial
from operator import or_
Expand Down Expand Up @@ -143,11 +143,24 @@ def new_import(synthese_destination, users, import_dataset):


@pytest.fixture()
def uploaded_import(new_import, import_file_name):
def uploaded_import(new_import, datasets, import_file_name):
with db.session.begin_nested():
with open(tests_path / "files" / "synthese" / import_file_name, "rb") as f:
new_import.source_file = f.read()
new_import.full_file_name = "valid_file.csv"
f.seek(0)
content = f.read()
if import_file_name == "jdd_to_import_file.csv":
content = content.replace(
b"VALID_DATASET_UUID",
datasets["own_dataset"].unique_dataset_id.hex.encode("ascii"),
)
content = content.replace(
b"FORBIDDEN_DATASET_UUID",
datasets["orphan_dataset"].unique_dataset_id.hex.encode("ascii"),
)
new_import.full_file_name = "jdd_to_import_file.csv"
else:
new_import.full_file_name = "valid_file.csv"
new_import.source_file = content
return new_import


Expand All @@ -174,7 +187,7 @@ def decoded_import(client, uploaded_import):

@pytest.fixture()
def fieldmapping(import_file_name, autogenerate):
if import_file_name == "valid_file.csv":
if import_file_name in ["valid_file.csv", "jdd_to_import_file.csv"]:
return FieldMapping.query.filter_by(label="Synthese GeoNature").one().values
else:
return {
Expand Down Expand Up @@ -1271,3 +1284,15 @@ def test_import_compare_error_line_with_csv(self, users, imported_import, import
assert int(source_row["line_number"]) == erroneous_line_number
# and this is the test purpose assert:
assert error_row == source_row

@pytest.mark.parametrize("import_file_name", ["jdd_to_import_file.csv"])
def test_import_jdd_file(self, imported_import):
assert_import_errors(
imported_import,
{
# id_dataset errors
# The line 2 should not be error (should be the one selected jdd default)
("DATASET_NOT_AUTHORIZED", "unique_dataset_id", frozenset({2, 4})),
("DATASET_NOT_FOUND", "unique_dataset_id", frozenset({5})),
},
)

0 comments on commit 4087502

Please sign in to comment.