Skip to content

Commit a4a3024

Browse files
Merge pull request #59 from databio/metadata
Added metadata and gen valid tables
2 parents cdb952a + 94e60d0 commit a4a3024

File tree

5 files changed

+83
-4
lines changed

5 files changed

+83
-4
lines changed

bbconf/db_utils.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,16 @@
33
from typing import List, Optional
44

55
import pandas as pd
6-
from sqlalchemy import TIMESTAMP, BigInteger, ForeignKey, Result, Select, event, select
6+
from sqlalchemy import (
7+
TIMESTAMP,
8+
BigInteger,
9+
ForeignKey,
10+
Result,
11+
Select,
12+
event,
13+
select,
14+
UniqueConstraint,
15+
)
716
from sqlalchemy.dialects.postgresql import JSON
817
from sqlalchemy.engine import URL, Engine, create_engine
918
from sqlalchemy.event import listens_for
@@ -118,6 +127,10 @@ class Bed(Base):
118127
)
119128
license_mapping: Mapped["License"] = relationship("License", back_populates="bed")
120129

130+
ref_classifier: Mapped["GenomeRefStats"] = relationship(
131+
"GenomeRefStats", back_populates="bed", cascade="all, delete-orphan"
132+
)
133+
121134

122135
class BedMetadata(Base):
123136
__tablename__ = "bed_metadata"
@@ -351,6 +364,32 @@ class License(Base):
351364
bed: Mapped[List["Bed"]] = relationship("Bed", back_populates="license_mapping")
352365

353366

367+
class GenomeRefStats(Base):
368+
__tablename__ = "genome_ref_stats"
369+
370+
id: Mapped[int] = mapped_column(primary_key=True, index=True)
371+
372+
bed_id: Mapped[str] = mapped_column(
373+
ForeignKey("bed.id", ondelete="CASCADE"),
374+
index=True,
375+
nullable=False,
376+
)
377+
provided_genome: Mapped[str]
378+
compared_genome: Mapped[str] = mapped_column(
379+
nullable=False, comment="Compared Genome"
380+
)
381+
382+
xs: Mapped[float] = mapped_column(nullable=True, default=None)
383+
oobr: Mapped[float] = mapped_column(nullable=True, default=None)
384+
sequence_fit: Mapped[float] = mapped_column(nullable=True, default=None)
385+
assigned_points: Mapped[int] = mapped_column(nullable=False)
386+
tier_ranking: Mapped[int] = mapped_column(nullable=False)
387+
388+
bed: Mapped["Bed"] = relationship("Bed", back_populates="ref_classifier")
389+
390+
__table_args__ = (UniqueConstraint("bed_id", "compared_genome"),)
391+
392+
354393
@listens_for(Universes, "after_insert")
355394
@listens_for(Universes, "after_update")
356395
def add_bed_universe(mapper, connection, target):

bbconf/models/bed_models.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,3 +219,15 @@ class TokenizedPathResponse(BaseModel):
219219
universe_id: str
220220
file_path: str
221221
endpoint_url: str
222+
223+
224+
class RefGenValidModel(BaseModel):
225+
provided_genome: str
226+
compared_genome: str
227+
xs: float = 0.0
228+
oobr: Union[float, None] = None
229+
sequence_fit: Union[float, None] = None
230+
assigned_points: int
231+
tier_ranking: int
232+
233+
model_config = ConfigDict(extra="forbid")

bbconf/modules/bedfiles.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
from logging import getLogger
33
from typing import Dict, Union
4+
from pydantic import BaseModel
45

56
import numpy as np
67
from geniml.bbclient import BBClient
@@ -14,7 +15,15 @@
1415

1516
from bbconf.config_parser.bedbaseconfig import BedBaseConfig
1617
from bbconf.const import DEFAULT_LICENSE, PKG_NAME, ZARR_TOKENIZED_FOLDER
17-
from bbconf.db_utils import Bed, BedStats, Files, TokenizedBed, Universes, BedMetadata
18+
from bbconf.db_utils import (
19+
Bed,
20+
BedStats,
21+
Files,
22+
TokenizedBed,
23+
Universes,
24+
BedMetadata,
25+
GenomeRefStats,
26+
)
1827
from bbconf.exceptions import (
1928
BedBaseConfError,
2029
BedFIleExistsError,
@@ -43,6 +52,7 @@
4352
TokenizedPathResponse,
4453
UniverseMetadata,
4554
StandardMeta,
55+
RefGenValidModel,
4656
)
4757

4858
_LOGGER = getLogger(PKG_NAME)
@@ -393,6 +403,7 @@ def add(
393403
plots: dict = None,
394404
files: dict = None,
395405
classification: dict = None,
406+
ref_validation: Dict[str, BaseModel] = None,
396407
license_id: str = DEFAULT_LICENSE,
397408
upload_qdrant: bool = False,
398409
upload_pephub: bool = False,
@@ -410,6 +421,7 @@ def add(
410421
:param plots: bed file plots
411422
:param files: bed file files
412423
:param classification: bed file classification
424+
:param ref_validation: reference validation data. RefGenValidModel
413425
:param license_id: bed file license id (default: 'DUO:0000042'). Full list of licenses:
414426
https://raw.githubusercontent.com/EBISPOT/DUO/master/duo.csv
415427
:param upload_qdrant: add bed file to qdrant indexs
@@ -532,6 +544,16 @@ def add(
532544
session.add(new_bedstat)
533545
session.add(new_metadata)
534546

547+
for ref_gen_check, data in ref_validation.items():
548+
new_gen_ref = GenomeRefStats(
549+
**RefGenValidModel(
550+
**data.model_dump(),
551+
provided_genome=classification.genome_alias,
552+
compared_genome=ref_gen_check,
553+
).model_dump(),
554+
bed_id=identifier,
555+
)
556+
session.add(new_gen_ref)
535557
session.commit()
536558

537559
return None

docs/changelog.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22

33
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.
44

5+
# [0.7.0] - 2024-09-20
6+
## Added
7+
- Table and methods for reference genome validator
8+
- Table with standard metadata schema
9+
- Bed file opening improvements
10+
511
# [0.6.1] - 2024-08-21
612
## Added
713

requirements/requirements-all.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
yacman >= 0.9.1
22
sqlalchemy >= 2.0.0
3-
geniml >= 0.4.0
3+
geniml >= 0.4.1
44
psycopg >= 3.1.15
55
colorlogs
66
pydantic >= 2.6.4
77
botocore
88
boto3 >= 1.34.54
9-
pephubclient >= 0.4.1
9+
pephubclient >= 0.4.4
1010
sqlalchemy_schemadisplay
1111
zarr
1212
pyyaml >= 6.0.1 # for s3fs because of the errors

0 commit comments

Comments
 (0)