Skip to content

Commit

Permalink
Merge pull request #47 from databio/dev
Browse files Browse the repository at this point in the history
release 0.5.1
  • Loading branch information
khoroshevskyi authored Apr 11, 2024
2 parents 6599131 + 4ca33d1 commit 0ca45e1
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 59 deletions.
2 changes: 1 addition & 1 deletion bbconf/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.0"
__version__ = "0.5.1"
6 changes: 6 additions & 0 deletions bbconf/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,9 @@ class BedSetNotFoundError(BedBaseConfError):
"""Error type for missing bedset"""

pass


class BedSetExistsError(BedBaseConfError):
"""Error type for existing bedset"""

pass
2 changes: 0 additions & 2 deletions bbconf/models/bedset_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,4 @@ class BedSetListResult(BaseModel):

class BedSetBedFiles(BaseModel):
count: int
limit: int
offset: int
results: List[BedMetadata]
13 changes: 9 additions & 4 deletions bbconf/modules/bedfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,10 +384,15 @@ def add(
_LOGGER.info("upload_pephub set to false. Skipping pephub..")

if upload_qdrant:
self.upload_file_qdrant(
identifier, files.bed_file.path, {"bed_id": identifier}
)
_LOGGER.info(f"File uploaded to qdrant. {identifier}")
if classification.genome_alias == "hg38":
self.upload_file_qdrant(
identifier, files.bed_file.path, {"bed_id": identifier}
)
_LOGGER.info(f"File uploaded to qdrant. {identifier}")
else:
_LOGGER.warning(
f"Could not upload to qdrant. Genome: {classification.genome_alias} is not supported."
)
else:
_LOGGER.info("upload_qdrant set to false. Skipping qdrant..")

Expand Down
100 changes: 50 additions & 50 deletions bbconf/modules/bedsets.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import logging

# TODO: will be available in the next geniml release
# from geniml.io.utils import compute_md5sum_bedset
from hashlib import md5
from geniml.io.utils import compute_md5sum_bedset
from typing import Dict, List

from sqlalchemy import Float, Numeric, func, or_, select
Expand All @@ -11,7 +9,10 @@
from bbconf.config_parser import BedBaseConfig
from bbconf.const import PKG_NAME
from bbconf.db_utils import BedFileBedSetRelation, BedSets, BedStats, Files
from bbconf.exceptions import BEDFileNotFoundError, BedSetNotFoundError
from bbconf.exceptions import (
BedSetNotFoundError,
BedSetExistsError,
)
from bbconf.models.bed_models import BedStatsModel
from bbconf.models.bedset_models import (
BedSetBedFiles,
Expand All @@ -20,8 +21,8 @@
BedSetPlots,
BedSetStats,
FileModel,
BedMetadata,
)
from bbconf.modules.bedfiles import BedAgentBedFile

_LOGGER = logging.getLogger(PKG_NAME)

Expand Down Expand Up @@ -166,6 +167,7 @@ def create(
upload_s3: bool = False,
local_path: str = "",
no_fail: bool = False,
overwrite: bool = False,
) -> None:
"""
Create bedset in the database.
Expand All @@ -180,6 +182,7 @@ def create(
:param upload_s3: upload bedset to s3
:param local_path: local path to the output files
:param no_fail: do not raise an error if bedset already exists
:param overwrite: overwrite the record in the database
:return: None
"""
_LOGGER.info(f"Creating bedset '{identifier}'")
Expand All @@ -188,6 +191,10 @@ def create(
stats = self._calculate_statistics(bedid_list)
else:
stats = None
if self.exists(identifier):
if not overwrite and not no_fail:
raise BedSetExistsError(identifier)
self.delete(identifier)

if upload_pephub:
try:
Expand All @@ -203,8 +210,7 @@ def create(
description=description,
bedset_means=stats.mean.model_dump() if stats else None,
bedset_standard_deviation=stats.sd.model_dump() if stats else None,
# md5sum=compute_md5sum_bedset(bedid_list),
md5sum=md5("".join(bedid_list).encode()).hexdigest(),
md5sum=compute_md5sum_bedset(bedid_list),
)

if upload_s3:
Expand All @@ -213,24 +219,31 @@ def create(
identifier, files=plots, base_path=local_path, type="bedsets"
)

with Session(self._db_engine.engine) as session:
session.add(new_bedset)

for bedfile in bedid_list:
session.add(
BedFileBedSetRelation(bedset_id=identifier, bedfile_id=bedfile)
)
if upload_s3:
for k, v in plots:
if v:
new_file = Files(
**v.model_dump(exclude_none=True, exclude_unset=True),
bedset_id=identifier,
type="plot",
)
session.add(new_file)

session.commit()
try:
with Session(self._db_engine.engine) as session:
session.add(new_bedset)

if no_fail:
bedid_list = list(set(bedid_list))
for bedfile in bedid_list:
session.add(
BedFileBedSetRelation(bedset_id=identifier, bedfile_id=bedfile)
)
if upload_s3:
for k, v in plots:
if v:
new_file = Files(
**v.model_dump(exclude_none=True, exclude_unset=True),
bedset_id=identifier,
type="plot",
)
session.add(new_file)

session.commit()
except Exception as e:
_LOGGER.error(f"Failed to create bedset: {e}")
if not no_fail:
raise e

_LOGGER.info(f"Bedset '{identifier}' was created successfully")
return None
Expand Down Expand Up @@ -263,8 +276,10 @@ def _calculate_statistics(self, bed_ids: List[str]) -> BedSetStats:
).cast(Float)
).where(BedStats.id.in_(bed_ids))

bedset_sd[column_name] = session.execute(mean_bedset_statement).one()[0]
bedset_mean[column_name] = session.execute(sd_bedset_statement).one()[0]
bedset_sd[column_name] = session.execute(sd_bedset_statement).one()[0]
bedset_mean[column_name] = session.execute(mean_bedset_statement).one()[
0
]

bedset_stats = BedSetStats(
mean=bedset_mean,
Expand Down Expand Up @@ -341,41 +356,26 @@ def get_ids_list(
results=result_list,
)

def get_bedset_bedfiles(
self, identifier: str, full: bool = False, limit: int = 100, offset: int = 0
) -> BedSetBedFiles:
def get_bedset_bedfiles(self, identifier: str) -> BedSetBedFiles:
"""
Get list of bedfiles in bedset.
:param identifier: bedset identifier
:param full: return full records with stats, plots, files and metadata
:param limit: limit of results
:param offset: offset of results
:return: list of bedfiles
"""
bed_object = BedAgentBedFile(self.config)

statement = (
select(BedFileBedSetRelation)
.where(BedFileBedSetRelation.bedset_id == identifier)
.limit(limit)
.offset(offset)
)
statement = select(BedSets).where(BedSets.id == identifier)

with Session(self._db_engine.engine) as session:
bedfiles = session.execute(statement).all()
results = []
for bedfile in bedfiles:
try:
results.append(bed_object.get(bedfile[0].bedfile_id, full=full))
except BEDFileNotFoundError as _:
_LOGGER.error(f"Bedfile {bedfile[0].bedfile_id} not found")
bedset_obj = session.scalar(statement)
bedfiles_list = bedset_obj.bedfiles

results = [
BedMetadata(**bedfile.bedfile.__dict__) for bedfile in bedfiles_list
]

return BedSetBedFiles(
count=len(results),
limit=limit,
offset=offset,
results=results,
)

Expand Down
7 changes: 7 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

# [0.5.1] - 2024-04-09
## Changed

- updated qdrant uploader
- bedset bedfile list query improvement
- other minor fixes in uploading

# [0.5.0] - 2024-04-08
### Changed

Expand Down
2 changes: 0 additions & 2 deletions tests/test_bedset.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,6 @@ def test_get_get_bedset_bedfiles(self, bbagent_obj):
result = bbagent_obj.bedset.get_bedset_bedfiles(BEDSET_TEST_ID)

assert result.count == 1
assert result.limit == 100
assert result.offset == 0
assert len(result.results) == 1

def test_delete(self, bbagent_obj, mocker):
Expand Down

0 comments on commit 0ca45e1

Please sign in to comment.