Skip to content

Commit

Permalink
1. Added get_genome_list method
Browse files Browse the repository at this point in the history
2. Added get_missing_plots method
3. updated changelog
4. updated version and requirements
  • Loading branch information
khoroshevskyi committed Nov 6, 2024
1 parent 3b5ef50 commit 75b84d2
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 8 deletions.
2 changes: 1 addition & 1 deletion bbconf/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.8.1"
__version__ = "0.9.0"
11 changes: 11 additions & 0 deletions bbconf/bbagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,17 @@ def get_stats(self) -> StatsReturn:
genomes_number=number_of_genomes,
)

def get_list_genomes(self) -> List[str]:
"""
Get list of genomes from the database
:return: list of genomes
"""
statement = select(distinct(Bed.genome_alias))
with Session(self.config.db_engine.engine) as session:
genomes = session.execute(statement).all()
return [result[0] for result in genomes]

@cached_property
def list_of_licenses(self) -> List[str]:
"""
Expand Down
6 changes: 4 additions & 2 deletions bbconf/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ class TokenizedBed(Base):
nullable=False,
)
universe_id: Mapped[str] = mapped_column(
ForeignKey("universes.id", ondelete="CASCADE"),
ForeignKey("universes.id", ondelete="CASCADE", passive_deletes=True),
primary_key=True,
index=True,
nullable=False,
Expand All @@ -350,7 +350,9 @@ class TokenizedBed(Base):

bed: Mapped["Bed"] = relationship("Bed", back_populates="tokenized")
universe: Mapped["Universes"] = relationship(
"Universes", back_populates="tokenized"
"Universes",
back_populates="tokenized",
passive_deletes=True,
)


Expand Down
2 changes: 1 addition & 1 deletion bbconf/models/bedset_models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import List, Union
import datetime
from typing import List, Union

from pydantic import BaseModel, ConfigDict, model_validator

Expand Down
42 changes: 40 additions & 2 deletions bbconf/modules/bedfiles.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from logging import getLogger
from typing import Dict, Union
from typing import Dict, List, Union

import numpy as np
from geniml.bbclient import BBClient
Expand All @@ -10,7 +10,7 @@
from pydantic import BaseModel
from qdrant_client.models import Distance, PointIdsList, VectorParams
from sqlalchemy import and_, delete, func, select
from sqlalchemy.orm import Session
from sqlalchemy.orm import Session, aliased
from tqdm import tqdm

from bbconf.config_parser.bedbaseconfig import BedBaseConfig
Expand Down Expand Up @@ -1179,3 +1179,41 @@ def get_tokenized_link(
bed_id=bed_id,
universe_id=universe_id,
)

def get_missing_plots(
self, plot_name: str, limit: int = 1000, offset: int = 0
) -> List[str]:
"""
Get list of bed files that are missing plot
:param plot_name: plot name
:param limit: number of results to return
:param offset: offset to start from
:return: list of bed file identifiers
"""
if plot_name not in list(BedPlots.model_fields.keys()):
raise BedBaseConfError(
f"Plot name: {plot_name} is not valid. Valid names: {list(BedPlots.model_fields.keys())}"
)

with Session(self._sa_engine) as session:
# Alias for subquery
t2_alias = aliased(Files)

# Define the subquery
subquery = select(t2_alias).where(t2_alias.name == plot_name).subquery()

query = (
select(Bed.id)
.outerjoin(subquery, Bed.id == subquery.c.bedfile_id)
.where(subquery.c.bedfile_id.is_(None))
.limit(limit)
.offset(offset)
)

results = session.scalars(query)

results = [result for result in results]

return results
9 changes: 9 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

# [0.9.0] - 2024-11-06
## Changed
- Fixed bug with uploading tss dist plot\

## Added
- Added annotations to bedsets (author, source)
- get_genome_list method to bedfiles, that lists all available genomes
- Added method that lists all missing plots for bedfiles (get_missing_plots)

# [0.8.0] - 2024-10-23
## Changed
- Updated text to bed search (now using bivec)
Expand Down
13 changes: 12 additions & 1 deletion manual_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,20 @@ def get_pep():
prj


def get_id_plots_missing():
from bbconf import BedBaseAgent

agent = BedBaseAgent(config="/home/bnt4me/virginia/repos/bedhost/config.yaml")

results = agent.bed.get_missing_plots("gccontent", limit=5000)
print(results)
print(agent.get_list_genomes())


if __name__ == "__main__":
# zarr_s3()
# add_s3()
# get_from_s3()
# biocframe()
get_pep()
# get_pep()
get_id_plots_missing()
2 changes: 1 addition & 1 deletion requirements/requirements-all.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
yacman >= 0.9.1
sqlalchemy >= 2.0.0
geniml[ml] >= 0.5.2
geniml[ml] >= 0.5.1
psycopg >= 3.1.15
colorlogs
pydantic >= 2.9.0
Expand Down

0 comments on commit 75b84d2

Please sign in to comment.