diff --git a/bbconf/bbconf.py b/bbconf/bbconf.py index 4cdfe8b..c31a26d 100644 --- a/bbconf/bbconf.py +++ b/bbconf/bbconf.py @@ -38,6 +38,7 @@ CFG_QDRANT_COLLECTION_NAME_KEY, DEFAULT_HF_MODEL, DEFAULT_VEC2VEC_MODEL, + DEFAULT_REGION2_VEC_MODEL, ) from bbconf.exceptions import MissingConfigDataError, BedBaseConfError from bbconf.helpers import raise_missing_key, get_bedbase_cfg @@ -103,14 +104,19 @@ def __init__(self, config_path: str = None, database_only: bool = False): self._t2bsi = self._create_t2bsi_object() else: if not self.config[CFG_PATH_KEY].get(CFG_PATH_REGION2VEC_KEY): - _LOGGER.error( - f"{CFG_PATH_REGION2VEC_KEY} was not provided in config file!" + _LOGGER.debug( + f"{CFG_PATH_REGION2VEC_KEY} was not provided in config file! Using default.." ) if not self.config[CFG_PATH_KEY].get(CFG_PATH_VEC2VEC_KEY): self.config[CFG_PATH_KEY][ CFG_PATH_VEC2VEC_KEY ] = DEFAULT_VEC2VEC_MODEL + if self.config[CFG_QDRANT_KEY].get(CFG_QDRANT_API_KEY, None): + os.environ["QDRANT_API_KEY"] = self.config[CFG_QDRANT_KEY].get( + CFG_QDRANT_API_KEY + ) + except qdrant_client.http.exceptions.ResponseHandlingException as err: _LOGGER.error(f"error in Connection to qdrant! skipping... Error: {err}") @@ -470,22 +476,34 @@ def _create_t2bsi_object(self) -> text2bednn.Text2BEDSearchInterface: def add_bed_to_qdrant( self, bed_id: str, - bed_file_path: str, + bed_file: Union[str, RegionSet], payload: dict = None, + region_to_vec: Region2VecExModel = None, ) -> None: """ Convert bed file to vector and add it to qdrant database :param bed_id: bed file id - :param bed_file_path: path to the bed file + :param bed_file: path to the bed file, or RegionSet object :param payload: additional metadata to store alongside vectors + :param region_to_vec: initiated region to vector model. If None, new object will be created. :return: None """ _LOGGER.info(f"Adding bed file to qdrant. bed_id: {bed_id}") # Convert bedfile to vector - bed_region_set = RegionSet(bed_file_path) - reg_2_vec_obj = Region2VecExModel("databio/r2v-ChIP-atlas-hg38") + if isinstance(bed_file, str): + bed_region_set = RegionSet(bed_file) + elif isinstance(bed_file, RegionSet): + bed_region_set = bed_file + else: + raise BedBaseConfError( + "Could not add add region to qdrant. Invalid type, or path. " + ) + if not region_to_vec: + reg_2_vec_obj = Region2VecExModel(DEFAULT_REGION2_VEC_MODEL) + else: + reg_2_vec_obj = region_to_vec bed_embedding = reg_2_vec_obj.encode( bed_region_set, pool="mean", diff --git a/bbconf/const.py b/bbconf/const.py index 8ef2cb5..8eba947 100644 --- a/bbconf/const.py +++ b/bbconf/const.py @@ -94,3 +94,4 @@ DEFAULT_HF_MODEL = "sentence-transformers/all-MiniLM-L12-v2" DEFAULT_VEC2VEC_MODEL = "databio/v2v-ChIP-atlas-hg38-ATAC" +DEFAULT_REGION2_VEC_MODEL = "databio/r2v-ChIP-atlas-hg38" diff --git a/tests/data/config.yaml b/tests/data/config.yaml index b6d7a65..3ae10f4 100644 --- a/tests/data/config.yaml +++ b/tests/data/config.yaml @@ -11,13 +11,18 @@ path: bedstat_dir: bedstat_output bedbuncher_dir: bedbuncher_output remote_url_base: null - region2vec: "add/path/here" - vec2vec: "add/path/here" server: host: 0.0.0.0 port: 8000 qdrant: - host: test_localhost + host: localhost port: 6333 - api_key: default_api_key - collection: bedbase \ No newline at end of file + api_key: None + collection: bedbase +remotes: + http: + prefix: https://data2.bedbase.org/ + description: HTTP compatible path + s3: + prefix: s3://data2.bedbase.org/ + description: S3 compatible path \ No newline at end of file diff --git a/tests/test_bbconf.py b/tests/test_bbconf.py index 3e53022..e3a135b 100644 --- a/tests/test_bbconf.py +++ b/tests/test_bbconf.py @@ -6,7 +6,7 @@ from bbconf import BedBaseConf, get_bedbase_cfg from bbconf.exceptions import * -from sqlmodel import SQLModel, create_engine +from sqlmodel import Session, SQLModel, create_engine from sqlmodel.main import default_registry