Skip to content

Commit

Permalink
Updated tests and qdrant upload function
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Oct 19, 2023
1 parent 45b538c commit d03163b
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 12 deletions.
30 changes: 24 additions & 6 deletions bbconf/bbconf.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
CFG_QDRANT_COLLECTION_NAME_KEY,
DEFAULT_HF_MODEL,
DEFAULT_VEC2VEC_MODEL,
DEFAULT_REGION2_VEC_MODEL,
)
from bbconf.exceptions import MissingConfigDataError, BedBaseConfError
from bbconf.helpers import raise_missing_key, get_bedbase_cfg
Expand Down Expand Up @@ -103,14 +104,19 @@ def __init__(self, config_path: str = None, database_only: bool = False):
self._t2bsi = self._create_t2bsi_object()
else:
if not self.config[CFG_PATH_KEY].get(CFG_PATH_REGION2VEC_KEY):
_LOGGER.error(
f"{CFG_PATH_REGION2VEC_KEY} was not provided in config file!"
_LOGGER.debug(
f"{CFG_PATH_REGION2VEC_KEY} was not provided in config file! Using default.."
)
if not self.config[CFG_PATH_KEY].get(CFG_PATH_VEC2VEC_KEY):
self.config[CFG_PATH_KEY][
CFG_PATH_VEC2VEC_KEY
] = DEFAULT_VEC2VEC_MODEL

if self.config[CFG_QDRANT_KEY].get(CFG_QDRANT_API_KEY, None):
os.environ["QDRANT_API_KEY"] = self.config[CFG_QDRANT_KEY].get(
CFG_QDRANT_API_KEY
)

except qdrant_client.http.exceptions.ResponseHandlingException as err:
_LOGGER.error(f"error in Connection to qdrant! skipping... Error: {err}")

Expand Down Expand Up @@ -470,22 +476,34 @@ def _create_t2bsi_object(self) -> text2bednn.Text2BEDSearchInterface:
def add_bed_to_qdrant(
self,
bed_id: str,
bed_file_path: str,
bed_file: Union[str, RegionSet],
payload: dict = None,
region_to_vec: Region2VecExModel = None,
) -> None:
"""
Convert bed file to vector and add it to qdrant database
:param bed_id: bed file id
:param bed_file_path: path to the bed file
:param bed_file: path to the bed file, or RegionSet object
:param payload: additional metadata to store alongside vectors
:param region_to_vec: initiated region to vector model. If None, new object will be created.
:return: None
"""

_LOGGER.info(f"Adding bed file to qdrant. bed_id: {bed_id}")
# Convert bedfile to vector
bed_region_set = RegionSet(bed_file_path)
reg_2_vec_obj = Region2VecExModel("databio/r2v-ChIP-atlas-hg38")
if isinstance(bed_file, str):
bed_region_set = RegionSet(bed_file)
elif isinstance(bed_file, RegionSet):
bed_region_set = bed_file
else:
raise BedBaseConfError(
"Could not add add region to qdrant. Invalid type, or path. "
)
if not region_to_vec:
reg_2_vec_obj = Region2VecExModel(DEFAULT_REGION2_VEC_MODEL)
else:
reg_2_vec_obj = region_to_vec
bed_embedding = reg_2_vec_obj.encode(
bed_region_set,
pool="mean",
Expand Down
1 change: 1 addition & 0 deletions bbconf/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,4 @@

DEFAULT_HF_MODEL = "sentence-transformers/all-MiniLM-L12-v2"
DEFAULT_VEC2VEC_MODEL = "databio/v2v-ChIP-atlas-hg38-ATAC"
DEFAULT_REGION2_VEC_MODEL = "databio/r2v-ChIP-atlas-hg38"
15 changes: 10 additions & 5 deletions tests/data/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,18 @@ path:
bedstat_dir: bedstat_output
bedbuncher_dir: bedbuncher_output
remote_url_base: null
region2vec: "add/path/here"
vec2vec: "add/path/here"
server:
host: 0.0.0.0
port: 8000
qdrant:
host: test_localhost
host: localhost
port: 6333
api_key: default_api_key
collection: bedbase
api_key: None
collection: bedbase
remotes:
http:
prefix: https://data2.bedbase.org/
description: HTTP compatible path
s3:
prefix: s3://data2.bedbase.org/
description: S3 compatible path
2 changes: 1 addition & 1 deletion tests/test_bbconf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from bbconf import BedBaseConf, get_bedbase_cfg
from bbconf.exceptions import *
from sqlmodel import SQLModel, create_engine
from sqlmodel import Session, SQLModel, create_engine
from sqlmodel.main import default_registry


Expand Down

0 comments on commit d03163b

Please sign in to comment.