4
4
from textwrap import indent
5
5
6
6
import yacman
7
- import pipestat
8
- from pipestat .exceptions import RecordNotFoundError
7
+ from pipestat import PipestatManager
8
+ from pipestat .exceptions import RecordNotFoundError , SchemaError
9
9
10
10
from sqlmodel import SQLModel , Field , select
11
11
import qdrant_client
17
17
CFG_PATH_KEY ,
18
18
CFG_PATH_PIPELINE_OUTPUT_KEY ,
19
19
CFG_PATH_BEDSTAT_DIR_KEY ,
20
+ CFG_PATH_SENTENCE2VEC_KEY ,
20
21
DEFAULT_SECTION_VALUES ,
21
22
CFG_PATH_BEDBUNCHER_DIR_KEY ,
22
23
BED_TABLE ,
34
35
CFG_QDRANT_API_KEY ,
35
36
CFG_QDRANT_HOST_KEY ,
36
37
CFG_QDRANT_COLLECTION_NAME_KEY ,
37
- DEFAULT_HF_MODEL ,
38
+ DEFAULT_SENTENCE2VEC_MODEL ,
38
39
DEFAULT_VEC2VEC_MODEL ,
39
40
DEFAULT_REGION2_VEC_MODEL ,
40
41
CFG_ACCESS_METHOD_KEY ,
49
50
from bbconf .helpers import raise_missing_key , get_bedbase_cfg
50
51
from bbconf .models import DRSModel , AccessMethod , AccessURL
51
52
52
- # os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # to suppress verbose warnings tensorflow
53
53
from geniml .text2bednn import text2bednn
54
54
from geniml .search import QdrantBackend
55
55
from fastembed .embedding import FlagEmbedding
@@ -85,12 +85,12 @@ def __init__(self, config_path: str = None, database_only: bool = False):
85
85
# Create Pipestat objects and tables if they do not exist
86
86
_LOGGER .debug ("Creating pipestat objects..." )
87
87
self .__pipestats = {
88
- BED_TABLE : pipestat . PipestatManager (
88
+ BED_TABLE : PipestatManager (
89
89
config_file = cfg_path ,
90
90
schema_path = BED_TABLE_SCHEMA ,
91
91
database_only = database_only ,
92
92
),
93
- BEDSET_TABLE : pipestat . PipestatManager (
93
+ BEDSET_TABLE : PipestatManager (
94
94
config_file = cfg_path ,
95
95
schema_path = BEDSET_TABLE_SCHEMA ,
96
96
database_only = database_only ,
@@ -102,6 +102,9 @@ def __init__(self, config_path: str = None, database_only: bool = False):
102
102
# setup t2bsi object
103
103
self ._t2bsi = None
104
104
try :
105
+ self ._senta2vec_hg_model_name = self .config [CFG_PATH_KEY ].get (
106
+ CFG_PATH_SENTENCE2VEC_KEY , DEFAULT_SENTENCE2VEC_MODEL
107
+ )
105
108
_LOGGER .debug ("Setting up qdrant database connection..." )
106
109
if self .config [CFG_QDRANT_KEY ].get (CFG_QDRANT_API_KEY , None ):
107
110
os .environ ["QDRANT_API_KEY" ] = self .config [CFG_QDRANT_KEY ].get (
@@ -125,7 +128,7 @@ def __init__(self, config_path: str = None, database_only: bool = False):
125
128
except qdrant_client .http .exceptions .ResponseHandlingException as err :
126
129
_LOGGER .error (f"error in Connection to qdrant! skipping... Error: { err } " )
127
130
128
- def _read_config_file (self , config_path : str ) -> yacman . YAMLConfigManager :
131
+ def _read_config_file (self , config_path : str ) -> dict :
129
132
"""
130
133
Read configuration file and insert default values if not set
131
134
@@ -218,7 +221,7 @@ def config(self) -> yacman.YAMLConfigManager:
218
221
return self ._config
219
222
220
223
@property
221
- def bed (self ) -> pipestat . PipestatManager :
224
+ def bed (self ) -> PipestatManager :
222
225
"""
223
226
PipestatManager of the bedfiles table
224
227
@@ -227,7 +230,7 @@ def bed(self) -> pipestat.PipestatManager:
227
230
return self .__pipestats [BED_TABLE ]
228
231
229
232
@property
230
- def bedset (self ) -> pipestat . PipestatManager :
233
+ def bedset (self ) -> PipestatManager :
231
234
"""
232
235
PipestatManager of the bedsets table
233
236
@@ -460,9 +463,7 @@ def select_unique(self, table_name: str, column: str = None) -> List[dict]:
460
463
with self .bedset .backend .session :
461
464
values = self .bedset .backend .select_records (columns = column )["records" ]
462
465
else :
463
- raise pipestat .exceptions .SchemaError (
464
- f"Incorrect table name provided { table_name } "
465
- )
466
+ raise SchemaError (f"Incorrect table name provided { table_name } " )
466
467
467
468
return [i for n , i in enumerate (values ) if i not in values [n + 1 :]]
468
469
@@ -510,9 +511,7 @@ def _create_t2bsi_object(self) -> Union[text2bednn.Text2BEDSearchInterface, None
510
511
511
512
try :
512
513
return text2bednn .Text2BEDSearchInterface (
513
- nl2vec_model = FlagEmbedding (
514
- model_name = os .getenv ("HF_MODEL" , DEFAULT_HF_MODEL )
515
- ),
514
+ nl2vec_model = FlagEmbedding (model_name = self ._senta2vec_hg_model_name ),
516
515
vec2vec_model = self ._config [CFG_PATH_KEY ][CFG_PATH_VEC2VEC_KEY ],
517
516
search_backend = self .qdrant_backend ,
518
517
)
@@ -654,7 +653,11 @@ def get_result(
654
653
return result
655
654
656
655
def get_drs_metadata (
657
- self , record_type : str , record_id : str , result_id : str , base_uri : str
656
+ self ,
657
+ record_type : Literal ["bed" , "bedset" ],
658
+ record_id : str ,
659
+ result_id : str ,
660
+ base_uri : str ,
658
661
) -> DRSModel :
659
662
"""
660
663
Get DRS metadata for a bed- or bedset-associated file
@@ -700,4 +703,3 @@ def get_drs_metadata(
700
703
)
701
704
702
705
return drs_dict
703
-
0 commit comments