Skip to content

MANE transcripts report #365

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Oct 7, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
- Improve report explanation to better interpret average coverage and coverage completeness stats shown on the coverage report
- Check that provided d4 files when running queries using `/coverage/d4/genes/summary` endpoint are valid, with test
- General report with coverage over the entire genome when no genes or genes panels are provided
- A MANE coverage report, showing coverage and coverage completeness only on MANE transcripts for the provided list of genes
### Changed
- Do not use stored cases/samples any more and run stats exclusively on d4 files paths provided by the user in real time
- How parameters are passed to starlette.templating since it was raising a deprecation warning.
Expand Down
10 changes: 5 additions & 5 deletions src/chanjo2/crud/intervals.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from typing import List, Optional, Union

from sqlalchemy import delete
from sqlalchemy import delete, or_
from sqlalchemy.orm import Session, query
from sqlalchemy.sql.expression import Delete

Expand Down Expand Up @@ -127,11 +127,11 @@ def _filter_transcripts_by_tag(
transcripts: query.Query, transcript_tags: List[TranscriptTag] = []
) -> query.Query:
"""Return transcripts which contain one or more RefSeq tag."""

not_null_filters = []
for tag in transcript_tags:
transcripts: query.Query = transcripts.filter(
getattr(SQLTranscript, tag).isnot(None)
)
return transcripts
not_null_filters.append(getattr(SQLTranscript, tag).isnot(None))
return transcripts.filter(or_(*not_null_filters))


def set_sql_intervals(
Expand Down
47 changes: 47 additions & 0 deletions src/chanjo2/endpoints/overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from chanjo2.demo import DEMO_COVERAGE_QUERY_FORM
from chanjo2.meta.handle_report_contents import (
get_gene_overview_coverage_stats,
get_mane_overview_coverage_stats,
get_report_data,
)
from chanjo2.models.pydantic_models import (
Expand Down Expand Up @@ -118,3 +119,49 @@ async def gene_overview(
"levels": gene_overview_content["levels"],
},
)


@router.get("/mane_overview/demo", response_class=HTMLResponse)
async def demo_mane_overview(
request: Request,
db: Session = Depends(get_session),
):
"""Returns coverage overview stats for a group of samples over MANE transcripts of a demo list of genes."""
overview_query = ReportQuery.as_form(DEMO_COVERAGE_QUERY_FORM)
overview_query.interval_type = IntervalType.TRANSCRIPTS
overview_query.build = Builds.build_38

return templates.TemplateResponse(
request=request,
name="mane-overview.html",
context=get_mane_overview_coverage_stats(query=overview_query, session=db),
)


@router.post("/mane_overview", response_class=HTMLResponse)
async def mane_overview(
request: Request,
samples=Annotated[str, Form(...)],
completeness_thresholds=Annotated[Optional[str], Form(None)],
ensembl_gene_ids=Annotated[Optional[str], Form(None)],
hgnc_gene_ids=Annotated[Optional[str], Form(None)],
hgnc_gene_symbols=Annotated[Optional[str], Form(None)],
case_display_name=Annotated[Optional[str], Form(None)],
panel_name=Annotated[Optional[str], Form("Custom panel")],
db: Session = Depends(get_session),
):
"""Returns coverage overview stats for a group of samples over MANE transcripts of a list of genes."""
try:
overview_query = ReportQuery.as_form(await request.form())

except ValidationError as ve:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=ve.json(),
)

return templates.TemplateResponse(
request=request,
name="mane-overview.html",
context=get_mane_overview_coverage_stats(query=overview_query, session=db),
)
82 changes: 81 additions & 1 deletion src/chanjo2/meta/handle_report_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)
from chanjo2.models import SQLExon, SQLGene, SQLTranscript
from chanjo2.models.pydantic_models import (
GeneCoverage,
Builds,
GeneReportForm,
IntervalType,
ReportQuery,
Expand Down Expand Up @@ -232,3 +232,83 @@ def get_gene_overview_coverage_stats(form_data: GeneReportForm, session: Session
completeness_thresholds=form_data.completeness_thresholds,
)
return gene_stats


def get_mane_overview_coverage_stats(query: ReportQuery, session: Session) -> Dict:
"""Returns coverage stats over the MANE transcripts of a list of genes."""

set_samples_coverage_files(session=session, samples=query.samples)
genes = []
if any([query.ensembl_gene_ids, query.hgnc_gene_ids, query.hgnc_gene_symbols]):
genes: List[SQLGene] = get_genes(
db=session,
build=Builds.build_38,
ensembl_ids=query.ensembl_gene_ids,
hgnc_ids=query.hgnc_gene_ids,
hgnc_symbols=query.hgnc_gene_symbols,
limit=None,
)

gene_mappings = {}
hgnc_gene_ids = []
for gene in genes:
hgnc_gene_ids.append(gene.hgnc_id)
gene_mappings[gene.ensembl_id] = gene

mane_stats = {
"levels": get_ordered_levels(threshold_levels=query.completeness_thresholds),
"extras": {
"hgnc_gene_ids": hgnc_gene_ids
or query.hgnc_gene_ids
or query.hgnc_gene_symbols
or query.ensembl_gene_ids,
"interval_type": query.interval_type.value,
"completeness_thresholds": query.completeness_thresholds,
"samples": [_serialize_sample(sample) for sample in query.samples],
"panel_name": query.panel_name,
},
"interval_type": IntervalType.TRANSCRIPTS,
"mane_coverage_stats": [],
}

sql_intervals = []
if genes:
sql_intervals = set_sql_intervals(
db=session,
interval_type=SQLTranscript,
genes=genes,
transcript_tags=[
TranscriptTag.REFSEQ_MANE_SELECT,
TranscriptTag.REFSEQ_MANE_PLUS_CLINICAL,
],
)

mane_samples_coverage_stats_by_transcript = get_gene_overview_stats(
sql_intervals=sql_intervals,
samples=query.samples,
completeness_thresholds=query.completeness_thresholds,
)

existing_transcripts = []

for transcript in sql_intervals:
transcript_dict = {
"mane_select": transcript.refseq_mane_select,
"mane_plus_clinical": transcript.refseq_mane_plus_clinical,
}
if transcript_dict in existing_transcripts:
continue

existing_transcripts.append(transcript_dict)
gene_symbol: str = gene_mappings[transcript.ensembl_gene_id].hgnc_symbol
data_dict: dict = {
"gene": {
"hgnc_id": gene_mappings[transcript.ensembl_gene_id].hgnc_id,
"ensembl_id": gene_mappings[transcript.ensembl_gene_id].ensembl_id,
},
"transcript": transcript_dict,
"stats": mane_samples_coverage_stats_by_transcript[transcript.ensembl_id],
}
mane_stats["mane_coverage_stats"].append((gene_symbol, data_dict))

return mane_stats
114 changes: 114 additions & 0 deletions src/chanjo2/templates/mane-overview.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
{% extends "base-layout.html" %}

{% block css %}
{{ super() }}
<style>
.badge {
background-color: black;
color: white;
padding: 4px 8px;
text-align: center;
border-radius: 5px;
float: right;
}
</style>
{% endblock %}

{% macro report_filters() %}
<div class="accordion" id="filter-accordion">
<div class="accordion-item">
<h2 class="accordion-header" id="flush-headingOne">
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#flush-collapseOne" aria-expanded="false" aria-controls="flush-collapseOne">
Customize
</button>
</h2>
<div id="flush-collapseOne" class="accordion-collapse collapse" aria-labelledby="flush-headingOne" data-bs-parent="#accordionFlushExample">
<div class="accordion-body">
<!-- hidden fields passed from previous query -->
<form name="customizeForm" action="{{url_for('mane_overview')}}" method="post">
<input type="hidden" name="build" value="GRCh38"/>
<input type="hidden" name="interval_type" value="transcripts"/>
<input type="hidden" name="samples" value="{{extras.samples|safe}}"/>

<div class="row">
<div class="col-6">
<label class="form-label">Included genes (Comma separated list HGNC IDs, HGNC symbols or Ensembl IDs)
<input class="form-control" type="text" name="hgnc_gene_ids" value="{{ extras.hgnc_gene_ids|join(', ') }}" placeholder="17284, 21022,..">
</label>
</div>
<div class="col-4">
<label class="form-label">Gene panel name to display
<input class="form-control" id="panel_name" name="panel_name" type="text" placeholder="Skeletal dysplasia 3.2" value="{{ extras.panel_name or '' }}">
</label>
</div>
<div class="col-2">
<button class="btn btn-primary mt-4" type="submit">Update</button>
</div>
</div>
</form>
</div>
</div>
</div>
</div>
{% endmacro %}


{% macro mane_stats_macro() %}
<h2>MANE Transcripts coverage report</h2>
{% if extras.panel_name %}
<p>Based on gene panel: <strong>{{ extras.panel_name }}</strong></p>
{% endif %}
<br>
{% for gene_id, samples_stats in mane_coverage_stats|sort(attribute='0') %}
<tr>
<td class="row">
<div class="panel-default">
<div class="panel-heading">
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code identations seems off here and below

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks OK on the editor though, weird!

image

I think I'll leave it as it is

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be mixed tabs and spaces? (Anyway, not a big issue)

<strong>Gene {{ gene_id or samples_stats.gene.hgnc_id }}</strong>
{% if samples_stats.transcript.mane_select %}
<span class="badge">MANE Select: {{samples_stats.transcript.mane_select}}</span>
{% endif %}
{% if samples_stats.transcript.mane_plus_clinical %}
<span class="badge">MANE Plus Clinical: {{samples_stats.transcript.mane_plus_clinical}}</span>
{% endif %}

</div>
</div>
<div class="table-responsive">
<table class="table table-bordered">
<thead>
<th>Sample</th>
<th>Mean coverage</th>
{% for level, _ in levels.items() %}
<th>Completeness {{ level }}x [%]</th>
{% endfor %}
</thead>
<tbody>
{% for data_tuple in samples_stats.stats %}
<tr>
<td>{{data_tuple[0]}}</td>
<td>{{data_tuple[1]|round(2) }}</td>
{% for level, _ in levels.items() %}
<td>{{ (data_tuple[2][level] * 100)|round(2) }}</td>
{% endfor %}
</tr>
{% endfor %}
</tbody>
</table>
</div>
</td>
</tr>
<br>
{% else %}
No MANE transcripts found in database for the provided gene list.
{% endfor %}
{% endmacro %}

{% block title %}
<title>Chanjo2 MANE transcripts coverage overview</title>
{% endblock %}

{% block content %}
{{report_filters() }}
{{ mane_stats_macro() }}
{% endblock %}
4 changes: 3 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ class Endpoints(str):
REPORT = "/report"
GENE_OVERVIEW = "/gene_overview"
OVERVIEW = "/overview"
OVERVIEW_DEMO = "/overview/demo/"
OVERVIEW_DEMO = "/overview/demo"
MANE_OVERVIEW_DEMO = "/mane_overview/demo"
MANE_OVERVIEW = "/mane_overview"


@pytest.fixture
Expand Down
43 changes: 42 additions & 1 deletion tests/src/chanjo2/endpoints/test_overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from fastapi.testclient import TestClient
from requests.models import Response

from chanjo2.constants import BUILD_37, DEFAULT_COMPLETENESS_LEVELS
from chanjo2.constants import BUILD_37, BUILD_38, DEFAULT_COMPLETENESS_LEVELS
from chanjo2.demo import DEMO_COVERAGE_QUERY_FORM


Expand Down Expand Up @@ -63,3 +63,44 @@ def test_gene_overview(

# And return an HTML page
assert response.template.name == "gene-overview.html"


def test_demo_mane_overview(client: TestClient, endpoints: Type):
"""Test the endpoint that shows coverage over the MANE transcripts of a list of genes."""

# GIVEN a query to the demo genes coverage overview endpoint
response: Response = client.get(endpoints.MANE_OVERVIEW_DEMO)

# Then the request should be successful
assert response.status_code == status.HTTP_200_OK

# And return an HTML page
assert response.template.name == "mane-overview.html"


def test_mane_overview(
client: TestClient, endpoints: Type, genomic_ids_per_build: Dict[str, List]
):
"""Test the endpoint that shows coverage over the MANE transcripts for a custom list of genes."""

# GIVEN a POST request containing form data:
form_data = {
"build": BUILD_38,
"completeness_thresholds": DEFAULT_COMPLETENESS_LEVELS,
"hgnc_gene_id": genomic_ids_per_build[BUILD_38]["hgnc_ids"],
"samples": str(DEMO_COVERAGE_QUERY_FORM["samples"]),
"interval_type": "transcripts",
}

# GIVEN a query to the mane overview endpoint
response: Response = client.post(
endpoints.MANE_OVERVIEW,
data=form_data,
headers={"Content-Type": "application/x-www-form-urlencoded"},
)

# Then the request should be successful
assert response.status_code == status.HTTP_200_OK

# And return an HTML page
assert response.template.name == "mane-overview.html"
Loading