Skip to content

Commit e866e84

Browse files
temporary (not working) version
1 parent 56ca783 commit e866e84

33 files changed

+387
-98
lines changed

examples/auto_invoice_splitter_extraction_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from mindee import Client
22
from mindee.extraction.pdf_extractor import PdfExtractor
3-
from mindee.input.sources import PathInput
3+
from mindee.input.sources.path_input import PathInput
44
from mindee.product.invoice.invoice_v4 import InvoiceV4
55
from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1
66

mindee/cli.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from mindee.client import Client, Endpoint
88
from mindee.error.mindee_error import MindeeClientError
99
from mindee.input.page_options import PageOptions
10-
from mindee.input.sources import LocalInputSource, UrlInputSource
10+
from mindee.input.sources.local_input_source import LocalInputSource
11+
from mindee.input.sources.url_input_source import UrlInputSource
1112
from mindee.parsing.common.async_predict_response import AsyncPredictResponse
1213
from mindee.parsing.common.document import Document, serialize_for_json
1314
from mindee.parsing.common.feedback_response import FeedbackResponse

mindee/client.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,12 @@
77
from mindee.input import WorkflowOptions
88
from mindee.input.local_response import LocalResponse
99
from mindee.input.page_options import PageOptions
10-
from mindee.input.sources import (
11-
Base64Input,
12-
BytesInput,
13-
FileInput,
14-
LocalInputSource,
15-
PathInput,
16-
UrlInputSource,
17-
)
10+
from mindee.input.sources.base_64_input import Base64Input
11+
from mindee.input.sources.bytes_input import BytesInput
12+
from mindee.input.sources.file_input import FileInput
13+
from mindee.input.sources.local_input_source import LocalInputSource
14+
from mindee.input.sources.path_input import PathInput
15+
from mindee.input.sources.url_input_source import UrlInputSource
1816
from mindee.logger import logger
1917
from mindee.mindee_http.endpoint import CustomEndpoint, Endpoint
2018
from mindee.mindee_http.mindee_api import MindeeApi

mindee/extraction/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from mindee.extraction.common.extracted_image import ExtractedImage
22
from mindee.extraction.common.image_extractor import (
3-
attach_images_as_new_file,
43
extract_multiple_images_from_source,
54
)
65
from mindee.extraction.multi_receipts_extractor import multi_receipts_extractor
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from mindee.extraction.common.extracted_image import ExtractedImage
22
from mindee.extraction.common.image_extractor import (
3-
attach_images_as_new_file,
43
extract_multiple_images_from_source,
54
)

mindee/extraction/common/extracted_image.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
from PIL import Image
66

77
from mindee.error.mindee_error import MindeeError
8-
from mindee.input.sources import FileInput, LocalInputSource
8+
from mindee.input.sources.file_input import FileInput
9+
from mindee.input.sources.local_input_source import LocalInputSource
910
from mindee.logger import logger
1011

1112

mindee/extraction/common/image_extractor.py

Lines changed: 5 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import io
2-
from typing import BinaryIO, List
2+
from typing import List
33

44
import pypdfium2 as pdfium
55
from PIL import Image
@@ -8,38 +8,9 @@
88
from mindee.extraction.common.extracted_image import ExtractedImage
99
from mindee.geometry.point import Point
1010
from mindee.geometry.polygon import get_min_max_x, get_min_max_y
11-
from mindee.input.sources import BytesInput, LocalInputSource
12-
13-
14-
def attach_images_as_new_file( # type: ignore
15-
input_buffer_list: List[BinaryIO],
16-
) -> pdfium.PdfDocument:
17-
"""
18-
Attaches a list of images as new pages in a PdfDocument object.
19-
20-
:param input_buffer_list: List of images, represented as buffers.
21-
:return: A PdfDocument handle.
22-
"""
23-
pdf = pdfium.PdfDocument.new()
24-
for input_buffer in input_buffer_list:
25-
input_buffer.seek(0)
26-
image = Image.open(input_buffer)
27-
image.convert("RGB")
28-
image_buffer = io.BytesIO()
29-
image.save(image_buffer, format="JPEG")
30-
31-
image_pdf = pdfium.PdfImage.new(pdf)
32-
image_pdf.load_jpeg(image_buffer)
33-
width, height = image_pdf.get_size()
34-
35-
matrix = pdfium.PdfMatrix().scale(width, height)
36-
image_pdf.set_matrix(matrix)
37-
38-
page = pdf.new_page(width, height)
39-
page.insert_obj(image_pdf)
40-
page.gen_content()
41-
image.close()
42-
return pdf
11+
from mindee.input.sources.bytes_input import BytesInput
12+
from mindee.input.sources.local_input_source import LocalInputSource
13+
from mindee.pdf.pdf_utils import attach_images_as_new_file
4314

4415

4516
def extract_image_from_polygon(
@@ -157,6 +128,6 @@ def load_pdf_doc(input_file: LocalInputSource) -> pdfium.PdfDocument: # type: i
157128
"""
158129
if input_file.is_pdf():
159130
input_file.file_object.seek(0)
160-
return pdfium.PdfDocument(input_file.file_object)
131+
return pdfium.PdfDocument(input_file.file_object.read())
161132

162133
return attach_images_as_new_file([input_file.file_object])

mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from mindee.extraction.common.image_extractor import (
66
extract_multiple_images_from_source,
77
)
8-
from mindee.input.sources import LocalInputSource
8+
from mindee.input.sources.local_input_source import LocalInputSource
99
from mindee.parsing.common.inference import Inference
1010

1111

mindee/extraction/pdf_extractor/extracted_pdf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pypdfium2 as pdfium
55

66
from mindee.error.mindee_error import MindeeError
7-
from mindee.input.sources import BytesInput
7+
from mindee.input.sources.bytes_input import BytesInput
88

99

1010
class ExtractedPdf:

mindee/extraction/pdf_extractor/pdf_extractor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from mindee.error.mindee_error import MindeeError
99
from mindee.extraction.pdf_extractor.extracted_pdf import ExtractedPdf
10-
from mindee.input.sources import LocalInputSource
10+
from mindee.input.sources.local_input_source import LocalInputSource
1111
from mindee.product.invoice_splitter.invoice_splitter_v1_page_group import (
1212
InvoiceSplitterV1PageGroup,
1313
)

0 commit comments

Comments
 (0)