Skip to content

Fix RNA uploads/delivery message #4145

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 22 additions & 77 deletions cg/meta/upload/scout/uploadscoutapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pathlib import Path

from housekeeper.store.models import File, Version
from pydantic.dataclasses import dataclass

from cg.apps.housekeeper.hk import HousekeeperAPI
from cg.apps.lims import LimsAPI
Expand All @@ -26,22 +25,13 @@
from cg.meta.workflow.analysis import AnalysisAPI
from cg.meta.workflow.utils.genome_build_helpers import genome_to_scout_format, get_genome_build
from cg.models.scout.scout_load_config import ScoutLoadConfig
from cg.store.api.data_classes import RNADNACollection
from cg.store.models import Analysis, Case, Customer, Sample
from cg.store.store import Store

LOG = logging.getLogger(__name__)


@dataclass
class RNADNACollection:
"""Contains the id for an RNA sample, the name of its connected DNA sample,
and a list of connected, uploaded DNA cases."""

rna_sample_internal_id: str
dna_sample_name: str
dna_case_ids: list[str]


class UploadScoutAPI:
"""Class that handles everything that has to do with uploading to Scout."""

Expand Down Expand Up @@ -182,15 +172,6 @@ def get_rna_omics_outrider(self, case_id: str) -> File | None:
tags: set[str] = {AnalysisTag.OUTRIDER, case_id, AnalysisTag.CLINICAL}
return self.housekeeper.get_file_from_latest_version(bundle_name=case_id, tags=tags)

def get_unique_dna_cases_related_to_rna_case(self, case_id: str) -> set[str]:
"""Return a set of unique DNA cases related to an RNA case."""
case: Case = self.status_db.get_case_by_internal_id(case_id)
rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(case)
unique_dna_cases_related_to_rna_case: set[str] = set()
for rna_dna_collection in rna_dna_collections:
unique_dna_cases_related_to_rna_case.update(rna_dna_collection.dna_case_ids)
return unique_dna_cases_related_to_rna_case

def get_rna_alignment_cram(self, case_id: str, sample_id: str) -> File | None:
"""Return an RNA alignment CRAM file for a case in Housekeeper."""
tags: set[str] = {AlignmentFileTag.CRAM, sample_id}
Expand All @@ -206,9 +187,11 @@ def get_rna_alignment_cram(self, case_id: str, sample_id: str) -> File | None:
def upload_rna_alignment_file(self, case_id: str, dry_run: bool) -> None:
"""Upload RNA alignment file to Scout."""
rna_case: Case = self.status_db.get_case_by_internal_id(case_id)
rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(rna_case)
rna_dna_collections: list[RNADNACollection] = (
self.status_db.get_related_dna_cases_with_samples(rna_case)
)
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
rna_alignment_cram: File | None = self.get_rna_alignment_cram(
case_id=case_id, sample_id=rna_sample_internal_id
Expand Down Expand Up @@ -346,9 +329,11 @@ def upload_rna_coverage_bigwig_to_scout(self, case_id: str, dry_run: bool) -> No

status_db: Store = self.status_db
rna_case = status_db.get_case_by_internal_id(case_id)
rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(rna_case)
rna_dna_collections: list[RNADNACollection] = (
self.status_db.get_related_dna_cases_with_samples(rna_case)
)
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
rna_coverage_bigwig: File | None = self.get_rna_coverage_bigwig(
case_id=case_id, sample_id=rna_sample_internal_id
Expand Down Expand Up @@ -382,7 +367,7 @@ def upload_omics_sample_id_to_scout(
self, dry_run: bool, rna_dna_collections: list[RNADNACollection]
) -> None:
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
for dna_case_id in rna_dna_collection.dna_case_ids:
LOG.info(
Expand All @@ -406,7 +391,7 @@ def upload_rna_fraser_outrider_to_scout(
"""Upload omics fraser and outrider file for a case to Scout."""
status_db: Store = self.status_db
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
rna_fraser: File | None = self.get_rna_omics_fraser(case_id=case_id)
rna_outrider: File | None = self.get_rna_omics_outrider(case_id=case_id)
Expand Down Expand Up @@ -442,7 +427,7 @@ def upload_rna_fraser_outrider_to_scout(
def upload_rna_genome_build_to_scout(
self,
dry_run: bool,
rna_case: str,
rna_case: Case,
rna_dna_collections: list[RNADNACollection],
) -> None:
"""Upload RNA genome built for a RNA/DNA case to Scout."""
Expand Down Expand Up @@ -502,9 +487,11 @@ def upload_splice_junctions_bed_to_scout(self, dry_run: bool, case_id: str) -> N
status_db: Store = self.status_db
rna_case: Case = status_db.get_case_by_internal_id(case_id)

rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(rna_case)
rna_dna_collections: list[RNADNACollection] = (
self.status_db.get_related_dna_cases_with_samples(rna_case)
)
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
splice_junctions_bed: File | None = self.get_splice_junctions_bed(
case_id=case_id, sample_id=rna_sample_internal_id
Expand Down Expand Up @@ -615,7 +602,9 @@ def upload_rna_omics_to_scout(self, dry_run: bool, case_id: str) -> None:
"""Upload RNA omics files to Scout."""
status_db: Store = self.status_db
rna_case = status_db.get_case_by_internal_id(case_id)
rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(rna_case)
rna_dna_collections: list[RNADNACollection] = (
self.status_db.get_related_dna_cases_with_samples(rna_case)
)
self.upload_omics_sample_id_to_scout(
dry_run=dry_run, rna_dna_collections=rna_dna_collections
)
Expand Down Expand Up @@ -675,45 +664,6 @@ def get_config_builder(self, analysis, hk_version) -> ScoutConfigBuilder:

return config_builders[analysis.workflow]

def create_rna_dna_collections(self, rna_case: Case) -> list[RNADNACollection]:
return [self.create_rna_dna_collection(link.sample) for link in rna_case.links]

def create_rna_dna_collection(self, rna_sample: Sample) -> RNADNACollection:
"""Creates a collection containing the given RNA sample id, its related DNA sample name, and
a list of ids for the DNA cases connected to the DNA sample."""
if not rna_sample.subject_id:
raise CgDataError(
f"Failed to link RNA sample {rna_sample.internal_id} to DNA samples - subject_id field is empty."
)

collaborators: set[Customer] = rna_sample.customer.collaborators
subject_id_samples: list[Sample] = (
self.status_db.get_samples_by_customer_ids_and_subject_id_and_is_tumour(
customer_ids=[customer.id for customer in collaborators],
subject_id=rna_sample.subject_id,
is_tumour=rna_sample.is_tumour,
)
)

subject_id_dna_samples: list[Sample] = self._get_application_prep_category(
subject_id_samples
)

if len(subject_id_dna_samples) != 1:
raise CgDataError(
f"Failed to upload files for RNA case: unexpected number of DNA sample matches for subject_id: "
f"{rna_sample.subject_id}. Number of matches: {len(subject_id_dna_samples)} "
)
dna_sample: Sample = subject_id_dna_samples[0]
dna_cases: list[str] = self._dna_cases_related_to_dna_sample(
dna_sample=dna_sample, collaborators=collaborators
)
return RNADNACollection(
rna_sample_internal_id=rna_sample.internal_id,
dna_sample_name=dna_sample.name,
dna_case_ids=dna_cases,
)

def _dna_cases_related_to_dna_sample(
self, dna_sample: Sample, collaborators: set[Customer]
) -> list[str]:
Expand Down Expand Up @@ -768,11 +718,6 @@ def _get_application_prep_category(

def get_related_uploaded_dna_cases(self, rna_case_id: str) -> set[str]:
"""Returns all uploaded DNA cases related to the specified RNA case."""
unique_dna_case_ids: set[str] = self.get_unique_dna_cases_related_to_rna_case(rna_case_id)
uploaded_dna_cases: set[str] = set()
for dna_case_id in unique_dna_case_ids:
if self.status_db.get_case_by_internal_id(dna_case_id).is_uploaded:
uploaded_dna_cases.add(dna_case_id)
else:
LOG.warning(f"Related DNA case {dna_case_id} has not been completed.")
return uploaded_dna_cases
rna_case: Case = self.status_db.get_case_by_internal_id(rna_case_id)
dna_cases: list[Case] = self.status_db.get_uploaded_related_dna_cases(rna_case)
return {dna_case.internal_id for dna_case in dna_cases}
11 changes: 11 additions & 0 deletions cg/store/api/data_classes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from pydantic.dataclasses import dataclass


@dataclass
class RNADNACollection:
"""Contains the id for an RNA sample, the name of its connected DNA sample,
and a list of connected, uploaded DNA cases."""

rna_sample_id: str
dna_sample_name: str
dna_case_ids: list[str]
93 changes: 66 additions & 27 deletions cg/store/crud/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
SampleType,
)
from cg.constants.sequencing import DNA_PREP_CATEGORIES, SeqLibraryPrepCategory
from cg.exc import CaseNotFoundError, CgError, OrderNotFoundError, SampleNotFoundError
from cg.exc import CaseNotFoundError, CgDataError, CgError, OrderNotFoundError, SampleNotFoundError
from cg.models.orders.constants import OrderType
from cg.models.orders.sample_base import SexEnum
from cg.server.dto.samples.collaborator_samples_request import CollaboratorSamplesRequest
from cg.services.orders.order_service.models import OrderQueryParams
from cg.store.api.data_classes import RNADNACollection
from cg.store.base import BaseHandler
from cg.store.exc import EntryNotFoundError
from cg.store.filters.status_analysis_filters import AnalysisFilter, apply_analysis_filter
Expand Down Expand Up @@ -1657,48 +1658,86 @@ def _get_related_samples_query(
SampleFilter.BY_CUSTOMER_ENTRY_IDS,
],
)
if samples.count() != 1:
samples: list[Sample] = samples.all()
raise CgDataError(
f"No unique DNA sample could be found: found {len(samples)} samples: {[sample.internal_id for sample in samples]}"
)
return samples

def get_uploaded_related_dna_cases(self, rna_case: Case) -> list[Case]:
"""Returns all uploaded DNA cases ids related to the given RNA case."""

related_dna_cases: list[Case] = []
collaborators: set[Customer] = rna_case.customer.collaborators
for rna_sample in rna_case.samples:

collaborators: set[Customer] = rna_sample.customer.collaborators
if not rna_sample.subject_id:
raise CgDataError(
f"Failed to link RNA sample {rna_sample.internal_id} to DNA samples - subject_id field is empty."
)

related_dna_samples_query: Query = self._get_related_samples_query(
sample=rna_sample,
prep_categories=DNA_PREP_CATEGORIES,
collaborators=collaborators,
)

dna_samples_cases_analysis_query: Query = (
related_dna_samples_query.join(Sample.links).join(CaseSample.case).join(Analysis)
customer_ids: list[int] = [customer.id for customer in collaborators]
uploaded_dna_cases: list[Case] = self._get_uploaded_dna_cases(
sample_query=related_dna_samples_query, customer_ids=customer_ids
)

dna_samples_cases_analysis_query: Query = apply_case_filter(
cases=dna_samples_cases_analysis_query,
workflows=DNA_WORKFLOWS_WITH_SCOUT_UPLOAD,
customer_entry_ids=[customer.id for customer in collaborators],
filter_functions=[
CaseFilter.BY_WORKFLOWS,
CaseFilter.BY_CUSTOMER_ENTRY_IDS,
],
related_dna_cases.extend(uploaded_dna_cases)
if not related_dna_cases:
raise CgDataError(
f"No matching uploaded DNA cases for case {rna_case.internal_id} ({rna_case.name})."
)
return related_dna_cases

uploaded_dna_cases: list[Case] = (
apply_analysis_filter(
analyses=dna_samples_cases_analysis_query,
filter_functions=[AnalysisFilter.IS_UPLOADED],
def _get_uploaded_dna_cases(self, sample_query: Query, customer_ids: list[int]) -> list[Case]:
dna_samples_cases_analysis_query: Query = (
sample_query.join(Sample.links).join(CaseSample.case).join(Analysis)
)
dna_samples_cases_analysis_query: Query = apply_case_filter(
cases=dna_samples_cases_analysis_query,
workflows=DNA_WORKFLOWS_WITH_SCOUT_UPLOAD,
customer_entry_ids=customer_ids,
filter_functions=[
CaseFilter.BY_WORKFLOWS,
CaseFilter.BY_CUSTOMER_ENTRY_IDS,
],
)
uploaded_dna_cases: list[Case] = (
apply_analysis_filter(
analyses=dna_samples_cases_analysis_query,
filter_functions=[AnalysisFilter.IS_UPLOADED],
)
.with_entities(Case)
.all()
)
return uploaded_dna_cases

def get_related_dna_cases_with_samples(self, rna_case: Case) -> list[RNADNACollection]:
collaborators = rna_case.customer.collaborators
collaborator_ids: list[int] = [collaborator.id for collaborator in collaborators]
rna_dna_collections: list[RNADNACollection] = []
for sample in rna_case.samples:
related_dna_samples: Query = self._get_related_samples_query(
sample=sample, prep_categories=DNA_PREP_CATEGORIES, collaborators=collaborators
)
nr_of_related_samples: int = related_dna_samples.count()
if nr_of_related_samples != 1:
raise CgDataError(
f"Failed to upload files for RNA case: unexpected number of DNA sample matches for subject_id: "
f"{sample.subject_id}. Number of matches: {nr_of_related_samples} "
)
.with_entities(Case)
.all()
dna_sample_name: str = related_dna_samples.first().name
dna_cases: list[Case] = self._get_uploaded_dna_cases(
sample_query=related_dna_samples, customer_ids=collaborator_ids
)

related_dna_cases.extend([case for case in uploaded_dna_cases])
if not related_dna_cases:
raise CaseNotFoundError(
f"No matching uploaded DNA cases for case {rna_case.internal_id} ({rna_case.name})."
dna_case_ids: list[str] = [case.internal_id for case in dna_cases]
collection = RNADNACollection(
rna_sample_id=sample.internal_id,
dna_sample_name=dna_sample_name,
dna_case_ids=dna_case_ids,
)
return related_dna_cases
rna_dna_collections.append(collection)
return rna_dna_collections
Loading
Loading