diff --git a/api/src/api/schemas/extension/schema_fields.py b/api/src/api/schemas/extension/schema_fields.py index 8431c3ecd..8f4035afa 100644 --- a/api/src/api/schemas/extension/schema_fields.py +++ b/api/src/api/schemas/extension/schema_fields.py @@ -1,7 +1,6 @@ import copy import enum import typing -import uuid from apiflask import fields as original_fields from marshmallow import ValidationError @@ -133,7 +132,12 @@ class UUID(original_fields.UUID, MixinField): def __init__(self, **kwargs: typing.Any): super().__init__(**kwargs) - self.metadata["example"] = uuid.uuid4() + + # Set a default value for the UUID if none supplied + example_value = kwargs.get("metadata", {}).get( + "example", "123e4567-e89b-12d3-a456-426614174000" + ) + self.metadata["example"] = example_value class Date(original_fields.Date, MixinField): diff --git a/api/src/data_migration/transformation/subtask/transform_opportunity_attachment.py b/api/src/data_migration/transformation/subtask/transform_opportunity_attachment.py new file mode 100644 index 000000000..82162fb17 --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_opportunity_attachment.py @@ -0,0 +1,144 @@ +import logging +from typing import Sequence + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.constants.lookup_constants import OpportunityAttachmentType +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import Opportunity, OpportunityAttachment +from src.db.models.staging.attachment import TsynopsisAttachment + +logger = logging.getLogger(__name__) + + +class TransformOpportunityAttachment(AbstractTransformSubTask): + + def transform_records(self) -> None: + + # Fetch staging attachment / our attachment / opportunity groups + records = self.fetch_with_opportunity( + TsynopsisAttachment, + OpportunityAttachment, + [TsynopsisAttachment.syn_att_id == OpportunityAttachment.attachment_id], + ) + + self.process_opportunity_attachment_group(records) + + def process_opportunity_attachment_group( + self, + records: Sequence[ + tuple[TsynopsisAttachment, OpportunityAttachment | None, Opportunity | None] + ], + ) -> None: + for source_attachment, target_attachment, opportunity in records: + try: + self.process_opportunity_attachment( + source_attachment, target_attachment, opportunity + ) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.OPPORTUNITY_ATTACHMENT, + ) + logger.exception( + "Failed to process opportunity attachment", + extra=transform_util.get_log_extra_opportunity_attachment(source_attachment), + ) + + def process_opportunity_attachment( + self, + source_attachment: TsynopsisAttachment, + target_attachment: OpportunityAttachment | None, + opportunity: Opportunity | None, + ) -> None: + + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.OPPORTUNITY_ATTACHMENT, + ) + + extra = transform_util.get_log_extra_opportunity_attachment(source_attachment) + logger.info("Processing opportunity attachment", extra=extra) + + if source_attachment.is_deleted: + # TODO - https://github.com/HHS/simpler-grants-gov/issues/3322 + # deletes are more complex because of s3 + # this just handles deleting the DB record at the moment + self._handle_delete( + source=source_attachment, + target=target_attachment, + record_type=transform_constants.OPPORTUNITY_ATTACHMENT, + extra=extra, + ) + + elif opportunity is None: + # This shouldn't be possible as the incoming data has foreign keys, but as a safety net + # we'll make sure the opportunity actually exists + raise ValueError( + "Opportunity attachment cannot be processed as the opportunity for it does not exist" + ) + + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_attachment is None + + logger.info("Transforming and upserting opportunity attachment", extra=extra) + + transformed_opportunity_attachment = transform_opportunity_attachment( + source_attachment, target_attachment + ) + + # TODO - we'll need to handle more with the s3 files here + if is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.OPPORTUNITY_ATTACHMENT, + ) + self.db_session.add(transformed_opportunity_attachment) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.OPPORTUNITY_ATTACHMENT, + ) + self.db_session.merge(transformed_opportunity_attachment) + + logger.info("Processed opportunity attachment", extra=extra) + source_attachment.transformed_at = self.transform_time + + +def transform_opportunity_attachment( + source_attachment: TsynopsisAttachment, incoming_attachment: OpportunityAttachment | None +) -> OpportunityAttachment: + + log_extra = transform_util.get_log_extra_opportunity_attachment(source_attachment) + + if incoming_attachment is None: + logger.info("Creating new opportunity attachment record", extra=log_extra) + + # We always create a new record here and merge it in the calling function + # this way if there is any error doing the transformation, we don't modify the existing one. + target_attachment = OpportunityAttachment( + attachment_id=source_attachment.syn_att_id, + opportunity_id=source_attachment.opportunity_id, + # TODO - we'll eventually remove attachment type, for now just arbitrarily set the value + opportunity_attachment_type=OpportunityAttachmentType.OTHER, + # TODO - in https://github.com/HHS/simpler-grants-gov/issues/3322 + # we'll actually handle the file location logic with s3 + file_location="TODO", # TODO - next PR + mime_type=source_attachment.mime_type, + file_name=source_attachment.file_name, + file_description=source_attachment.file_desc, + file_size_bytes=source_attachment.file_lob_size, + created_by=source_attachment.creator_id, + updated_by=source_attachment.last_upd_id, + legacy_folder_id=source_attachment.syn_att_folder_id, + ) + + transform_util.transform_update_create_timestamp( + source_attachment, target_attachment, log_extra=log_extra + ) + + return target_attachment diff --git a/api/src/data_migration/transformation/transform_constants.py b/api/src/data_migration/transformation/transform_constants.py index 7bcb1f74a..449cdf5fd 100644 --- a/api/src/data_migration/transformation/transform_constants.py +++ b/api/src/data_migration/transformation/transform_constants.py @@ -35,6 +35,7 @@ FUNDING_CATEGORY = "funding_category" FUNDING_INSTRUMENT = "funding_instrument" AGENCY = "agency" +OPPORTUNITY_ATTACHMENT = "opportunity_attachment" class Metrics(StrEnum): diff --git a/api/src/data_migration/transformation/transform_oracle_data_task.py b/api/src/data_migration/transformation/transform_oracle_data_task.py index b0aecd485..353e39b14 100644 --- a/api/src/data_migration/transformation/transform_oracle_data_task.py +++ b/api/src/data_migration/transformation/transform_oracle_data_task.py @@ -22,6 +22,9 @@ TransformFundingInstrument, ) from src.data_migration.transformation.subtask.transform_opportunity import TransformOpportunity +from src.data_migration.transformation.subtask.transform_opportunity_attachment import ( + TransformOpportunityAttachment, +) from src.data_migration.transformation.subtask.transform_opportunity_summary import ( TransformOpportunitySummary, ) @@ -42,6 +45,9 @@ class TransformOracleDataTaskConfig(PydanticBaseEnvConfig): enable_funding_category: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_CATEGORY enable_funding_instrument: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_INSTRUMENT enable_agency: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_AGENCY + enable_opportunity_attachment: bool = ( + False # TRANSFORM_ORACLE_DATA_ENABLE_OPPORTUNITY_ATTACHMENT + ) class TransformOracleDataTask(Task): @@ -85,3 +91,6 @@ def run_task(self) -> None: if self.transform_config.enable_agency: TransformAgency(self).run() TransformAgencyHierarchy(self).run() + + if self.transform_config.enable_opportunity_attachment: + TransformOpportunityAttachment(self).run() diff --git a/api/src/data_migration/transformation/transform_util.py b/api/src/data_migration/transformation/transform_util.py index cdefb5425..5e98e3efe 100644 --- a/api/src/data_migration/transformation/transform_util.py +++ b/api/src/data_migration/transformation/transform_util.py @@ -23,6 +23,7 @@ OpportunityAssistanceListing, OpportunitySummary, ) +from src.db.models.staging.attachment import TsynopsisAttachment from src.db.models.staging.opportunity import Topportunity, TopportunityCfda from src.db.models.staging.staging_base import StagingBase from src.util import datetime_util @@ -535,3 +536,11 @@ def get_log_extra_funding_instrument(source_funding_instrument: SourceFundingIns "revision_number": getattr(source_funding_instrument, "revision_number", None), "table_name": source_funding_instrument.__tablename__, } + + +def get_log_extra_opportunity_attachment(source_attachment: TsynopsisAttachment) -> dict: + return { + "opportunity_id": source_attachment.opportunity_id, + "syn_att_id": source_attachment.syn_att_id, + "att_revision_number": source_attachment.att_revision_number, + } diff --git a/api/tests/src/data_migration/transformation/conftest.py b/api/tests/src/data_migration/transformation/conftest.py index b17a38910..47a320b51 100644 --- a/api/tests/src/data_migration/transformation/conftest.py +++ b/api/tests/src/data_migration/transformation/conftest.py @@ -14,6 +14,7 @@ LinkOpportunitySummaryFundingInstrument, Opportunity, OpportunityAssistanceListing, + OpportunityAttachment, OpportunitySummary, ) from tests.conftest import BaseTestClass @@ -330,6 +331,33 @@ def setup_agency( return tgroups +def setup_opportunity_attachment( + create_existing: bool, + opportunity: Opportunity, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, +): + if source_values is None: + source_values = {} + + synopsis_attachment = f.StagingTsynopsisAttachmentFactory.create( + opportunity=None, + opportunity_id=opportunity.opportunity_id, + is_deleted=is_delete, + already_transformed=is_already_processed, + **source_values, + ) + + if create_existing: + f.OpportunityAttachmentFactory.create( + attachment_id=synopsis_attachment.syn_att_id, + opportunity=opportunity, + ) + + return synopsis_attachment + + def validate_matching_fields( source, destination, fields: list[Tuple[str, str]], expect_all_to_match: bool ): @@ -760,3 +788,39 @@ def validate_agency( validate_matching_fields( tgroup_map, agency.agency_contact_info, agency_contact_field_mapping, expect_values_to_match ) + + +def validate_opportunity_attachment( + db_session, + source_attachment, + expect_in_db: bool = True, + expect_values_to_match: bool = True, +): + + opportunity_attachment = ( + db_session.query(OpportunityAttachment) + .filter(OpportunityAttachment.attachment_id == source_attachment.syn_att_id) + .one_or_none() + ) + + if not expect_in_db: + assert opportunity_attachment is None + return + + assert opportunity_attachment is not None + validate_matching_fields( + source_attachment, + opportunity_attachment, + [ + ("syn_att_id", "attachment_id"), + ("opportunity_id", "opportunity_id"), + ("mime_type", "mime_type"), + ("file_name", "file_name"), + ("file_desc", "file_description"), + ("file_lob_size", "file_size_bytes"), + ("creator_id", "created_by"), + ("last_upd_id", "updated_by"), + ("syn_att_folder_id", "legacy_folder_id"), + ], + expect_values_to_match, + ) diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity_attachment.py b/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity_attachment.py index 0c9895e89..0a44a3f7d 100644 --- a/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity_attachment.py +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity_attachment.py @@ -1,41 +1,117 @@ -from src.db.models.foreign.attachment import TsynopsisAttachment as TsynopsisAttachmentF -from src.db.models.staging.attachment import TsynopsisAttachment as TsynopsisAttachmentS -from tests.src.db.models.factories import ( - ForeignTsynopsisAttachmentFactory, - StagingTsynopsisAttachmentFactory, +import pytest + +import tests.src.db.models.factories as f +from src.data_migration.transformation import transform_constants +from src.data_migration.transformation.subtask.transform_opportunity_attachment import ( + TransformOpportunityAttachment, +) +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_opportunity_attachment, + validate_opportunity_attachment, ) -def test_uploading_attachment_staging(db_session, enable_factory_create, tmp_path): - att = StagingTsynopsisAttachmentFactory.create(file_lob=b"Testing attachment") - db_session.commit() - db_session.expire_all() +class TestTransformOpportunitySummary(BaseTransformTestClass): + @pytest.fixture() + def transform_opportunity_attachment(self, transform_oracle_data_task): + return TransformOpportunityAttachment(transform_oracle_data_task) + + def test_transform_opportunity_attachment(self, db_session, transform_opportunity_attachment): + + opportunity1 = f.OpportunityFactory.create(opportunity_attachments=[]) + + insert1 = setup_opportunity_attachment(create_existing=False, opportunity=opportunity1) + insert2 = setup_opportunity_attachment(create_existing=False, opportunity=opportunity1) + + update1 = setup_opportunity_attachment(create_existing=True, opportunity=opportunity1) + update2 = setup_opportunity_attachment(create_existing=True, opportunity=opportunity1) + + delete1 = setup_opportunity_attachment( + create_existing=True, is_delete=True, opportunity=opportunity1 + ) + + opportunity2 = f.OpportunityFactory.create(opportunity_attachments=[]) + + insert3 = setup_opportunity_attachment(create_existing=False, opportunity=opportunity2) + update3 = setup_opportunity_attachment(create_existing=True, opportunity=opportunity2) + delete2 = setup_opportunity_attachment( + create_existing=True, is_delete=True, opportunity=opportunity2 + ) + + already_processed_insert = setup_opportunity_attachment( + create_existing=False, opportunity=opportunity2, is_already_processed=True + ) + already_processed_update = setup_opportunity_attachment( + create_existing=True, opportunity=opportunity2, is_already_processed=True + ) + + delete_but_current_missing = setup_opportunity_attachment( + create_existing=False, opportunity=opportunity2, is_delete=True + ) + + transform_opportunity_attachment.run_subtask() + + validate_opportunity_attachment(db_session, insert1) + validate_opportunity_attachment(db_session, insert2) + validate_opportunity_attachment(db_session, insert3) + + validate_opportunity_attachment(db_session, update1) + validate_opportunity_attachment(db_session, update2) + validate_opportunity_attachment(db_session, update3) + + validate_opportunity_attachment(db_session, delete1, expect_in_db=False) + validate_opportunity_attachment(db_session, delete2, expect_in_db=False) + + validate_opportunity_attachment(db_session, already_processed_insert, expect_in_db=False) + validate_opportunity_attachment( + db_session, already_processed_update, expect_values_to_match=False + ) + + validate_opportunity_attachment(db_session, delete_but_current_missing, expect_in_db=False) + + metrics = transform_opportunity_attachment.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 9 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - db_att = ( - db_session.query(TsynopsisAttachmentS) - .filter(TsynopsisAttachmentS.opportunity_id == att.opportunity_id) - .one_or_none() - ) - temp_file = tmp_path / "out_file.txt" - temp_file.write_bytes(db_att.file_lob) - file_content = temp_file.read_bytes() + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_opportunity_attachment.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 9 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - assert file_content == db_att.file_lob + def test_transform_opportunity_attachment_delete_but_current_missing( + self, db_session, transform_opportunity_attachment + ): + opportunity = f.OpportunityFactory.create(opportunity_attachments=[]) + delete_but_current_missing = setup_opportunity_attachment( + create_existing=False, opportunity=opportunity, is_delete=True + ) + transform_opportunity_attachment.process_opportunity_attachment( + delete_but_current_missing, None, opportunity + ) -def test_uploading_attachment_foreign(db_session, enable_factory_create, tmp_path): - att = ForeignTsynopsisAttachmentFactory.create(file_lob=b"Testing attachment") - db_session.commit() - db_session.expire_all() + validate_opportunity_attachment(db_session, delete_but_current_missing, expect_in_db=False) + assert delete_but_current_missing.transformed_at is not None + assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" - db_att = ( - db_session.query(TsynopsisAttachmentF) - .filter(TsynopsisAttachmentF.opportunity_id == att.opportunity_id) - .one_or_none() - ) + def test_transform_opportunity_attachment_no_opportunity( + self, db_session, transform_opportunity_attachment + ): + opportunity = f.OpportunityFactory.create(opportunity_attachments=[]) + insert = setup_opportunity_attachment(create_existing=False, opportunity=opportunity) - temp_file = tmp_path / "out_file.txt" - temp_file.write_bytes(db_att.file_lob) - file_content = temp_file.read_bytes() + # Don't pass the opportunity in - as if it wasn't found + with pytest.raises( + ValueError, + match="Opportunity attachment cannot be processed as the opportunity for it does not exist", + ): + transform_opportunity_attachment.process_opportunity_attachment(insert, None, None) - assert file_content == db_att.file_lob + assert insert.transformed_at is None