Skip to content

Commit 5b71c7a

Browse files
committed
improved reindexing batching
1 parent a6d5119 commit 5b71c7a

File tree

1 file changed

+21
-10
lines changed

1 file changed

+21
-10
lines changed

bbconf/modules/bedfiles.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,7 +1258,7 @@ def _sql_search_count(self, query: str) -> int:
12581258
count = session.execute(statement).one()
12591259
return count[0]
12601260

1261-
def reindex_qdrant(self, batch: int = 1000) -> None:
1261+
def reindex_qdrant(self, batch: int = 100) -> None:
12621262
"""
12631263
Re-upload all files to quadrant.
12641264
!Warning: only hg38 genome can be added to qdrant!
@@ -1271,7 +1271,9 @@ def reindex_qdrant(self, batch: int = 1000) -> None:
12711271
"""
12721272
bb_client = BBClient()
12731273

1274-
annotation_result = self.get_ids_list(limit=100000, genome=QDRANT_GENOME)
1274+
annotation_result = self.get_ids_list(
1275+
limit=100000, genome=QDRANT_GENOME, offset=0
1276+
)
12751277

12761278
if not annotation_result.results:
12771279
_LOGGER.error("No bed files found.")
@@ -1280,6 +1282,7 @@ def reindex_qdrant(self, batch: int = 1000) -> None:
12801282

12811283
with tqdm(total=len(results), position=0, leave=True) as pbar:
12821284
points_list = []
1285+
processed_number = 0
12831286
for record in results:
12841287
try:
12851288
bed_region_set_obj = GRegionSet(bb_client.seek(record.id))
@@ -1298,18 +1301,26 @@ def reindex_qdrant(self, batch: int = 1000) -> None:
12981301
),
12991302
)
13001303
)
1304+
processed_number += 1
1305+
if processed_number % batch == 0:
1306+
pbar.set_description(f"Uploading points to qdrant using batch...")
1307+
operation_info = self._config.qdrant_engine.qd_client.upsert(
1308+
collection_name=self._config.config.qdrant.file_collection,
1309+
points=points_list,
1310+
)
1311+
pbar.write("Uploaded batch to qdrant.")
1312+
points_list = []
1313+
assert operation_info.status == "completed"
1314+
13011315
pbar.write(f"File: {record.id} successfully indexed.")
13021316
pbar.update(1)
13031317

13041318
_LOGGER.info(f"Uploading points to qdrant using batches...")
1305-
for i in range(0, len(points_list), batch):
1306-
operation_info = self._config.qdrant_engine.qd_client.upsert(
1307-
collection_name=self._config.config.qdrant.file_collection,
1308-
points=points_list[i : i + batch],
1309-
)
1310-
1311-
assert operation_info.status == "completed"
1312-
1319+
operation_info = self._config.qdrant_engine.qd_client.upsert(
1320+
collection_name=self._config.config.qdrant.file_collection,
1321+
points=points_list,
1322+
)
1323+
assert operation_info.status == "completed"
13131324
return None
13141325

13151326
def delete_qdrant_point(self, identifier: str) -> None:

0 commit comments

Comments
 (0)