Skip to content

Commit 4296608

Browse files
restore function that was needlessly edited
1 parent 940a574 commit 4296608

File tree

4 files changed

+35
-41
lines changed

4 files changed

+35
-41
lines changed

mindee/extraction/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from mindee.extraction.common.extracted_image import ExtractedImage
22
from mindee.extraction.common.image_extractor import (
3+
attach_image_as_new_file,
34
extract_multiple_images_from_source,
45
)
56
from mindee.extraction.multi_receipts_extractor import multi_receipts_extractor

mindee/extraction/common/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from mindee.extraction.common.extracted_image import ExtractedImage
22
from mindee.extraction.common.image_extractor import (
3+
attach_image_as_new_file,
34
extract_multiple_images_from_source,
45
)

mindee/extraction/common/image_extractor.py

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,38 @@
1212
from mindee.input.sources.local_input_source import LocalInputSource
1313

1414

15+
def attach_image_as_new_file( # type: ignore
16+
input_buffer: BinaryIO,
17+
) -> pdfium.PdfDocument:
18+
"""
19+
Attaches an image as a new page in a PdfDocument object.
20+
21+
:param input_buffer: Input buffer.
22+
:return: A PdfDocument handle.
23+
"""
24+
# Create a new page in the PdfDocument
25+
input_buffer.seek(0)
26+
image = Image.open(input_buffer)
27+
image.convert("RGB")
28+
image_buffer = io.BytesIO()
29+
image.save(image_buffer, format="JPEG")
30+
31+
pdf = pdfium.PdfDocument.new()
32+
33+
image_pdf = pdfium.PdfImage.new(pdf)
34+
image_pdf.load_jpeg(image_buffer)
35+
width, height = image_pdf.get_size()
36+
37+
matrix = pdfium.PdfMatrix().scale(width, height)
38+
image_pdf.set_matrix(matrix)
39+
40+
page = pdf.new_page(width, height)
41+
page.insert_obj(image_pdf)
42+
page.gen_content()
43+
image.close()
44+
return pdf
45+
46+
1547
def extract_image_from_polygon(
1648
page_content: Image.Image,
1749
polygon: List[Point],
@@ -129,35 +161,4 @@ def load_pdf_doc(input_file: LocalInputSource) -> pdfium.PdfDocument: # type: i
129161
input_file.file_object.seek(0)
130162
return pdfium.PdfDocument(input_file.file_object.read())
131163

132-
return attach_images_as_new_file([input_file.file_object])
133-
134-
135-
def attach_images_as_new_file( # type: ignore
136-
input_buffer_list: List[BinaryIO],
137-
) -> pdfium.PdfDocument:
138-
"""
139-
Attaches a list of images as new pages in a PdfDocument object.
140-
141-
:param input_buffer_list: List of images, represented as buffers.
142-
:return: A PdfDocument handle.
143-
"""
144-
pdf = pdfium.PdfDocument.new()
145-
for input_buffer in input_buffer_list:
146-
input_buffer.seek(0)
147-
image = Image.open(input_buffer)
148-
image.convert("RGB")
149-
image_buffer = io.BytesIO()
150-
image.save(image_buffer, format="JPEG")
151-
152-
image_pdf = pdfium.PdfImage.new(pdf)
153-
image_pdf.load_jpeg(image_buffer)
154-
width, height = image_pdf.get_size()
155-
156-
matrix = pdfium.PdfMatrix().scale(width, height)
157-
image_pdf.set_matrix(matrix)
158-
159-
page = pdf.new_page(width, height)
160-
page.insert_obj(image_pdf)
161-
page.gen_content()
162-
image.close()
163-
return pdf
164+
return attach_image_as_new_file(input_file.file_object)

mindee/pdf/pdf_compressor.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -219,24 +219,15 @@ def collect_images_as_pdf(image_list: List[bytes]) -> pdfium.PdfDocument: # typ
219219
:param image_list: A list of bytes representing JPEG images.
220220
:return: A PdfDocument handle containing the images as pages.
221221
"""
222-
# Create a new, empty PdfDocument
223222
out_pdf = pdfium.PdfDocument.new()
224223

225224
for image_bytes in image_list:
226-
# Load the JPEG image into a PdfImage object
227225
pdf_image = pdfium.PdfImage.new(out_pdf)
228226
pdf_image.load_jpeg(io.BytesIO(image_bytes))
229227

230-
# Get the dimensions of the image
231228
width, height = pdf_image.get_size()
232-
233-
# Create a new page in the PDF with the same dimensions as the image
234229
page = out_pdf.new_page(width, height)
235-
236-
# Place the image on the page
237230
page.insert_obj(pdf_image)
238-
239-
# Generate content for the page to finalize it
240231
page.gen_content()
241232

242233
return out_pdf

0 commit comments

Comments
 (0)