From 975f7e219ea41d56234806d0c2886883b0b619ca Mon Sep 17 00:00:00 2001 From: Ansh5461 Date: Fri, 3 May 2024 16:14:47 +0530 Subject: [PATCH] Releasing IngestedImages from image ingestor too --- querent/ingestors/images/image_ingestor.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/querent/ingestors/images/image_ingestor.py b/querent/ingestors/images/image_ingestor.py index 29d8d519..5c68e65d 100644 --- a/querent/ingestors/images/image_ingestor.py +++ b/querent/ingestors/images/image_ingestor.py @@ -1,5 +1,8 @@ from typing import List, AsyncGenerator +import base64 +import uuid from querent.common.types.collected_bytes import CollectedBytes +from querent.common.types.ingested_images import IngestedImages from querent.ingestors.base_ingestor import BaseIngestor from querent.ingestors.ingestor_factory import IngestorFactory from querent.processors.async_processor import AsyncProcessor @@ -53,6 +56,7 @@ async def ingest( CollectedBytes(file=current_file, data=collected_bytes) ) yield IngestedTokens(file=current_file, data=[text], error=None, doc_source=chunk_bytes.doc_source) + yield IngestedImages(file=current_file, image=base64.b64encode(collected_bytes.data).decode('utf-8'), image_name=f"{str(uuid.uuid4())}.{chunk_bytes.extension}", page_num=0, text=[], ocr_text=[text], doc_source=chunk_bytes.doc_source) yield IngestedTokens(file=current_file, data=None, error=None, doc_source=chunk_bytes.doc_source) current_file = chunk_bytes.file @@ -65,6 +69,7 @@ async def ingest( CollectedBytes(file=current_file, data=collected_bytes) ) yield IngestedTokens(file=current_file, data=[text], error=None, doc_source=chunk_bytes.doc_source) + yield IngestedImages(file=current_file, image=base64.b64encode(collected_bytes.data).decode('utf-8'), image_name=f"{str(uuid.uuid4())}.{chunk_bytes.extension}", page_num=0, text=[], ocr_text=[text], doc_source=chunk_bytes.doc_source) yield IngestedTokens(file=current_file, data=None, error=None, doc_source=chunk_bytes.doc_source) except Exception as e: