Skip to content

Make it clear when study already exported #585

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 22 additions & 17 deletions pixl_dcmd/src/pixl_dcmd/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
from sqlalchemy.orm.session import Session

from core.db.models import Image, Extract
from sqlalchemy import URL, create_engine, exists
from sqlalchemy import ColumnElement, URL, create_engine, exists
from sqlalchemy.orm import sessionmaker, exc

from core.exceptions import PixlDiscardError
from pixl_dcmd.dicom_helpers import StudyInfo

url = URL.create(
Expand Down Expand Up @@ -126,29 +127,33 @@ def get_unexported_image(
Get an existing, non-exported (for this project) image record from the database
identified by the study UID. If no result is found, retry with querying on
MRN + accession number. If this fails as well, raise a NoResultFound.
If study has already been exported, raise a PixlDiscardError.
"""
try:
existing_image: Image = (
pixl_session.query(Image)
.join(Extract)
.filter(
Extract.slug == project_slug,
Image.study_uid == study_info.study_uid,
Image.exported_at == None, # noqa: E711
)
.one()
existing_image = _query_and_raise_if_exported(
pixl_session,
[Extract.slug == project_slug, Image.study_uid == study_info.study_uid],
)
# If no image is found by study UID, try MRN + accession number
except exc.NoResultFound:
existing_image = (
pixl_session.query(Image)
.join(Extract)
.filter(
existing_image = _query_and_raise_if_exported(
pixl_session,
[
Extract.slug == project_slug,
Image.mrn == study_info.mrn,
Image.accession_number == study_info.accession_number,
Image.exported_at == None, # noqa: E711
)
.one()
],
)
return existing_image


def _query_and_raise_if_exported(
pixl_session: Session, clause_list: list[ColumnElement[bool]]
) -> Image:
existing_image: Image = (
pixl_session.query(Image).join(Extract).filter(*clause_list).one()
)
if existing_image.exported_at is not None:
msg = "Study already exported"
raise PixlDiscardError(msg)
return existing_image
162 changes: 127 additions & 35 deletions pixl_dcmd/tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@
from __future__ import annotations

import datetime
from dataclasses import dataclass

import pytest
import sqlalchemy

from core.db.models import Extract, Image
from core.exceptions import PixlDiscardError
from pixl_dcmd._database import (
get_unexported_image,
get_uniq_pseudo_study_uid_and_update_db,
Expand All @@ -26,13 +30,81 @@
from sqlalchemy.orm import Session

STUDY_DATE = datetime.date.fromisoformat("2023-01-01")


@dataclass
class StudyData:
"""Database setup of study data."""

mrn: str
accession_number: str
study_uid: str | None
study_date: datetime.date | sqlalchemy.Date = STUDY_DATE
pseudo_patient_id: str | None = None
pseudo_study_uid: str | None = None
exported_at: datetime.datetime | sqlalchemy.DateTime | None = None


@dataclass
class TestData:
"""All test data, separated so that the input values can be overriden and different to db data."""

db: StudyData
input: StudyInfo


def _create_test_data(study_data: StudyData) -> TestData:
return TestData(
study_data,
StudyInfo(
mrn=study_data.mrn,
accession_number=study_data.accession_number,
study_uid=study_data.study_uid,
),
)


TEST_PROJECT_SLUG = "test-extract-uclh-omop-cdm"
TEST_STUDY_INFO = StudyInfo(
mrn="123456", accession_number="abcde", study_uid="1.2.3.4.5"
UNPROCESSED_STUDY = _create_test_data(
StudyData(mrn="123456", accession_number="abcde", study_uid="1.2.3.4.5")
)
TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID = StudyInfo(
mrn="234567", accession_number="bcdef", study_uid="2.3.4.5.6"
PSEUDO_IDS_STUDY = _create_test_data(
StudyData(
mrn="234567",
accession_number="bcdef",
study_uid="2.3.4.5.6",
pseudo_patient_id="garbled",
pseudo_study_uid="0.0.0.0.0.0",
)
)

EXPORTED_STUDY = _create_test_data(
StudyData(
mrn="exported_mrn",
accession_number="exported_accession",
study_uid="6.6.6",
pseudo_study_uid="1.1.1",
exported_at=datetime.datetime.fromisoformat("2024-01-01"),
)
)

# For duplicates, the database should have no UIDs
DUPLICATE_ACCESSION_BUT_HAS_UID = _create_test_data(
StudyData(
mrn="duplicate_mrn",
accession_number="duplicate_accession",
study_uid="7.7.7",
)
)

DUPLICATE_ACCESSION_NO_UID = _create_test_data(
StudyData(
mrn="duplicate_mrn",
accession_number="duplicate_accession",
study_uid="8.8.8",
)
)
DUPLICATE_ACCESSION_NO_UID.db.study_uid = None


@pytest.fixture()
Expand All @@ -42,46 +114,45 @@ def rows_for_database_testing(db_session) -> Session:
pytest_pixl.dicom.generate_dicom_dataset.
"""
extract = Extract(slug=TEST_PROJECT_SLUG)

existing_image = Image(
mrn=TEST_STUDY_INFO.mrn,
accession_number=TEST_STUDY_INFO.accession_number,
study_uid=TEST_STUDY_INFO.study_uid,
study_date=STUDY_DATE,
extract=extract,
)

existing_image_with_pseudo_study_uid = Image(
mrn=TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.mrn,
accession_number=TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.accession_number,
study_uid=TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.study_uid,
study_date=STUDY_DATE,
extract=extract,
# This should be a valid VR UI value!
# https://dicom.nema.org/medical/dicom/current/output/html/part05.html#table_6.2-1
pseudo_study_uid="0.0.0.0.0.0",
pseudo_patient_id=TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.mrn,
)

with db_session:
db_session.add_all(
[extract, existing_image, existing_image_with_pseudo_study_uid]
[
extract,
_image_from_study_data(UNPROCESSED_STUDY.db, extract),
_image_from_study_data(PSEUDO_IDS_STUDY.db, extract),
_image_from_study_data(EXPORTED_STUDY.db, extract),
_image_from_study_data(DUPLICATE_ACCESSION_BUT_HAS_UID.db, extract),
_image_from_study_data(DUPLICATE_ACCESSION_NO_UID.db, extract),
]
)
db_session.commit()

return db_session


def _image_from_study_data(study_data: StudyData, extract: Extract) -> Image:
return Image(
mrn=study_data.mrn,
accession_number=study_data.accession_number,
study_uid=study_data.study_uid,
study_date=study_data.study_date,
pseudo_study_uid=study_data.pseudo_study_uid,
pseudo_patient_id=study_data.pseudo_patient_id,
exported_at=study_data.exported_at,
extract=extract,
)


def test_get_uniq_pseudo_study_uid_and_update_db(rows_for_database_testing, db_session):
"""
GIVEN an existing image that already has a pseudo_study_uid
WHEN we query the database for that image
THEN the function should return the existing pseudo_study_uid.
"""
pseudo_study_uid = get_uniq_pseudo_study_uid_and_update_db(
TEST_PROJECT_SLUG, TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID
TEST_PROJECT_SLUG, PSEUDO_IDS_STUDY.input
)
assert pseudo_study_uid == "0.0.0.0.0.0"
assert pseudo_study_uid == PSEUDO_IDS_STUDY.db.pseudo_study_uid


def test_get_pseudo_patient_id_and_update_db(rows_for_database_testing, db_session):
Expand All @@ -92,13 +163,11 @@ def test_get_pseudo_patient_id_and_update_db(rows_for_database_testing, db_sessi
"""
get_pseudo_patient_id_and_update_db(
TEST_PROJECT_SLUG,
TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID,
TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.mrn,
)
result = get_unexported_image(
TEST_PROJECT_SLUG, TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID, db_session
PSEUDO_IDS_STUDY.input,
PSEUDO_IDS_STUDY.input.mrn,
)
assert result.pseudo_patient_id == TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.mrn
result = get_unexported_image(TEST_PROJECT_SLUG, PSEUDO_IDS_STUDY.input, db_session)
assert result.pseudo_patient_id == PSEUDO_IDS_STUDY.db.pseudo_patient_id


def test_get_unexported_image_fallback(rows_for_database_testing, db_session):
Expand All @@ -113,4 +182,27 @@ def test_get_unexported_image_fallback(rows_for_database_testing, db_session):
study_uid="nope",
)
result = get_unexported_image(TEST_PROJECT_SLUG, wrong_uid_info, db_session)
assert result.study_uid == TEST_STUDY_INFO.study_uid
assert result.study_uid == UNPROCESSED_STUDY.input.study_uid


def test_exported_image_throws(rows_for_database_testing, db_session):
"""
GIVEN a database entry with the image exported
WHEN we query for the image
THEN a NoRowFound exception should be thrown
"""
with pytest.raises(PixlDiscardError) as exception:
get_unexported_image(TEST_PROJECT_SLUG, EXPORTED_STUDY.input, db_session)
assert str(exception.value) == "Study already exported"


def test_duplicate_image_throws(rows_for_database_testing, db_session):
"""
GIVEN the database has two entries for the same accession number, one with a study uid
WHEN the input without the study uid is processed
THEN a MultipleResultsFound exception should be thrown
"""
with pytest.raises(sqlalchemy.exc.MultipleResultsFound):
get_unexported_image(
TEST_PROJECT_SLUG, DUPLICATE_ACCESSION_NO_UID.input, db_session
)
4 changes: 2 additions & 2 deletions pixl_dcmd/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@
import numpy as np
import pydicom
import pytest
import sqlalchemy
from pytest_check import check
from core.db.models import Image
from core.dicom_tags import (
PrivateDicomTag,
add_private_tag,
create_private_tag,
)
from core.exceptions import PixlDiscardError
from core.project_config import load_project_config, load_tag_operations
from core.project_config.pixl_config_model import load_config_and_validate
from decouple import config
Expand Down Expand Up @@ -251,7 +251,7 @@ def test_image_already_exported_throws(test_project_config, exported_dicom_datas
WHEN the dicom tag scheme is applied
THEN an exception will be thrown as
"""
with pytest.raises(sqlalchemy.exc.NoResultFound):
with pytest.raises(PixlDiscardError):
anonymise_dicom_and_update_db(
exported_dicom_dataset,
config=test_project_config,
Expand Down