|
1 | 1 | import io |
2 | | -from typing import BinaryIO, List |
| 2 | +from typing import List |
3 | 3 |
|
4 | 4 | import pypdfium2 as pdfium |
5 | 5 | from PIL import Image |
|
8 | 8 | from mindee.extraction.common.extracted_image import ExtractedImage |
9 | 9 | from mindee.geometry.point import Point |
10 | 10 | from mindee.geometry.polygon import get_min_max_x, get_min_max_y |
11 | | -from mindee.input.sources import BytesInput, LocalInputSource |
12 | | - |
13 | | - |
14 | | -def attach_images_as_new_file( # type: ignore |
15 | | - input_buffer_list: List[BinaryIO], |
16 | | -) -> pdfium.PdfDocument: |
17 | | - """ |
18 | | - Attaches a list of images as new pages in a PdfDocument object. |
19 | | -
|
20 | | - :param input_buffer_list: List of images, represented as buffers. |
21 | | - :return: A PdfDocument handle. |
22 | | - """ |
23 | | - pdf = pdfium.PdfDocument.new() |
24 | | - for input_buffer in input_buffer_list: |
25 | | - input_buffer.seek(0) |
26 | | - image = Image.open(input_buffer) |
27 | | - image.convert("RGB") |
28 | | - image_buffer = io.BytesIO() |
29 | | - image.save(image_buffer, format="JPEG") |
30 | | - |
31 | | - image_pdf = pdfium.PdfImage.new(pdf) |
32 | | - image_pdf.load_jpeg(image_buffer) |
33 | | - width, height = image_pdf.get_size() |
34 | | - |
35 | | - matrix = pdfium.PdfMatrix().scale(width, height) |
36 | | - image_pdf.set_matrix(matrix) |
37 | | - |
38 | | - page = pdf.new_page(width, height) |
39 | | - page.insert_obj(image_pdf) |
40 | | - page.gen_content() |
41 | | - image.close() |
42 | | - return pdf |
| 11 | +from mindee.input.sources.bytes_input import BytesInput |
| 12 | +from mindee.input.sources.local_input_source import LocalInputSource |
| 13 | +from mindee.pdf.pdf_utils import attach_images_as_new_file |
43 | 14 |
|
44 | 15 |
|
45 | 16 | def extract_image_from_polygon( |
@@ -157,6 +128,6 @@ def load_pdf_doc(input_file: LocalInputSource) -> pdfium.PdfDocument: # type: i |
157 | 128 | """ |
158 | 129 | if input_file.is_pdf(): |
159 | 130 | input_file.file_object.seek(0) |
160 | | - return pdfium.PdfDocument(input_file.file_object) |
| 131 | + return pdfium.PdfDocument(input_file.file_object.read()) |
161 | 132 |
|
162 | 133 | return attach_images_as_new_file([input_file.file_object]) |
0 commit comments