Skip to content

Commit

Permalink
Merge pull request #422 from Clinical-Genomics/n_intervals_by_build
Browse files Browse the repository at this point in the history
Intervals count by build endpoint
  • Loading branch information
northwestwitch authored Feb 18, 2025
2 parents 8e049a1 + 700a796 commit 5fc1747
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 13 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
## [unreleased]
### Added
- `/intervals/intervals_count_by_build` endpoint, which retuns the number of genes, transcripts, exons for each genome build
### Changed
- Updated schug library to v1.10
- Disable SQLAlchemy logger
Expand Down
29 changes: 20 additions & 9 deletions src/chanjo2/crud/intervals.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
import logging
from typing import List, Optional, Union
from typing import Dict, List, Optional, Union

from sqlalchemy import delete, or_, text
from sqlalchemy import delete, func, or_, text
from sqlalchemy.orm import Session, query
from sqlalchemy.sql.expression import Delete

from chanjo2.models import SQLExon, SQLGene, SQLTranscript
from chanjo2.models.pydantic_models import (
Builds,
ExonBase,
GeneBase,
TranscriptBase,
TranscriptTag,
)
from chanjo2.models.pydantic_models import Builds, TranscriptBase, TranscriptTag

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -192,3 +186,20 @@ def bulk_insert_exons(db: Session, exons: List[SQLExon]) -> None:
"""Bulk insert exons into the database."""
db.bulk_save_objects(exons)
db.commit()


def get_interval_counts(db: Session) -> Dict:
counts = {}
for build in Builds.get_enum_values():
counts[build] = {
"number_of_genes": db.query(func.count(SQLGene.id))
.filter(SQLGene.build == build)
.scalar(),
"number_of_transcripts": db.query(func.count(SQLTranscript.id))
.filter(SQLTranscript.build == build)
.scalar(),
"number_of_exons": db.query(func.count(SQLExon.id))
.filter(SQLExon.build == build)
.scalar(),
}
return counts
12 changes: 9 additions & 3 deletions src/chanjo2/endpoints/intervals.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
from typing import Iterator, List, Optional
from typing import Iterator, List

from fastapi import APIRouter, Depends, HTTPException, Response, status
from fastapi.responses import JSONResponse
from sqlalchemy.orm import Session

from chanjo2.constants import MULTIPLE_PARAMS_NOT_SUPPORTED_MSG
from chanjo2.crud.intervals import get_gene_intervals, get_genes
from chanjo2.crud.intervals import get_gene_intervals, get_genes, get_interval_counts
from chanjo2.dbutil import get_session
from chanjo2.meta.handle_bed import resource_lines
from chanjo2.meta.handle_load_intervals import (
update_exons,
update_genes,
update_transcripts,
)
from chanjo2.models import SQLExon, SQLGene, SQLTranscript
from chanjo2.models import SQLExon, SQLTranscript
from chanjo2.models.pydantic_models import (
Builds,
ExonBase,
Expand Down Expand Up @@ -184,3 +184,9 @@ async def exons(query: GeneIntervalQuery, session: Session = Depends(get_session
limit=query.limit if nr_filters == 0 else None,
interval_type=SQLExon,
)


@router.get("/intervals/intervals_count_by_build", response_model=dict)
def intervals_count_by_build(session: Session = Depends(get_session)):
"""Returns the number of genes, transcripts and exons available in the database for each genome build."""
return get_interval_counts(db=session)
5 changes: 5 additions & 0 deletions src/chanjo2/models/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ class IntervalType(str, Enum):
EXONS = "exons"
CUSTOM = "custom_intervals"

@staticmethod
def get_enum_values() -> List[str]:
"""Returns the values of the available interval types."""
return [member.value for member in IntervalType]


class TranscriptTag(str, Enum):
REFSEQ_MRNA = "refseq_mrna"
Expand Down
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class Endpoints(str):
TRANSCRIPTS = "/intervals/transcripts"
LOAD_EXONS = "/intervals/load/exons/"
EXONS = "/intervals/exons"
INTERVALS_BY_BUILD = "/intervals/intervals_count_by_build"
INTERVAL_COVERAGE = "/coverage/d4/interval/"
INTERVALS_FILE_COVERAGE = "/coverage/d4/interval_file/"
GENES_COVERAGE_SUMMARY = "/coverage/d4/genes/summary"
Expand Down
28 changes: 27 additions & 1 deletion tests/src/chanjo2/endpoints/test_intervals.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@

from chanjo2.constants import MULTIPLE_PARAMS_NOT_SUPPORTED_MSG
from chanjo2.meta.handle_bed import resource_lines
from chanjo2.models.pydantic_models import Builds, ExonBase, GeneBase, TranscriptBase
from chanjo2.models.pydantic_models import (
Builds,
ExonBase,
GeneBase,
IntervalType,
TranscriptBase,
)
from chanjo2.populate_demo import (
BUILD_EXONS_RESOURCE,
BUILD_GENES_RESOURCE,
Expand Down Expand Up @@ -458,3 +464,23 @@ def test_exons_by_hgnc_symbols(
assert response.status_code == status.HTTP_200_OK
exons = response.json()
assert ExonBase(**exons[0])


def test_intervals_count_by_build(demo_client: TestClient, endpoints: Type):
"""Tests the endpoint that returns the number of genes, transcripts and exons available in the database for each genome build."""

# GIVEN a populated demo database
# WHEN sending a GET request to the "intervals_count_by_build" endpoint
response: Response = demo_client.get(endpoints.INTERVALS_BY_BUILD)

# THEN response should be successful
assert response.status_code == status.HTTP_200_OK
result: dict = response.json()

# AND contain number of intervals by build
for build in Builds.get_enum_values():
for itype in IntervalType.get_enum_values():
if itype == "custom_intervals":
continue
count = result[build][f"number_of_{itype}"]
assert isinstance(count, int)

0 comments on commit 5fc1747

Please sign in to comment.