Skip to content

Commit

Permalink
Releasing IngestedImages from image ingestor too
Browse files Browse the repository at this point in the history
  • Loading branch information
Ansh5461 committed May 3, 2024
1 parent e895dba commit 975f7e2
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions querent/ingestors/images/image_ingestor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from typing import List, AsyncGenerator
import base64
import uuid
from querent.common.types.collected_bytes import CollectedBytes
from querent.common.types.ingested_images import IngestedImages
from querent.ingestors.base_ingestor import BaseIngestor
from querent.ingestors.ingestor_factory import IngestorFactory
from querent.processors.async_processor import AsyncProcessor
Expand Down Expand Up @@ -53,6 +56,7 @@ async def ingest(
CollectedBytes(file=current_file, data=collected_bytes)
)
yield IngestedTokens(file=current_file, data=[text], error=None, doc_source=chunk_bytes.doc_source)
yield IngestedImages(file=current_file, image=base64.b64encode(collected_bytes.data).decode('utf-8'), image_name=f"{str(uuid.uuid4())}.{chunk_bytes.extension}", page_num=0, text=[], ocr_text=[text], doc_source=chunk_bytes.doc_source)
yield IngestedTokens(file=current_file, data=None, error=None, doc_source=chunk_bytes.doc_source)

current_file = chunk_bytes.file
Expand All @@ -65,6 +69,7 @@ async def ingest(
CollectedBytes(file=current_file, data=collected_bytes)
)
yield IngestedTokens(file=current_file, data=[text], error=None, doc_source=chunk_bytes.doc_source)
yield IngestedImages(file=current_file, image=base64.b64encode(collected_bytes.data).decode('utf-8'), image_name=f"{str(uuid.uuid4())}.{chunk_bytes.extension}", page_num=0, text=[], ocr_text=[text], doc_source=chunk_bytes.doc_source)
yield IngestedTokens(file=current_file, data=None, error=None, doc_source=chunk_bytes.doc_source)

except Exception as e:
Expand Down

0 comments on commit 975f7e2

Please sign in to comment.