-
Notifications
You must be signed in to change notification settings - Fork 6
Closed
Description
Our entire receipt parsing system is currently down, so excuse the terse error report, but I will quickly post this here while I try to work around the issue on our side. I will come back and clean up this issue as soon as I have extinguished some fires on our end.
Here is (a really simplified) example for how we use the Mindee SDK:
from __future__ import annotations
from typing import BinaryIO
from mindee import Client
from mindee.product import InvoiceV4
from some_place import MINDEE_API_KEY
def parse(file: BinaryIO):
client = Client(api_key=MINDEE_API_KEY)
input_source = client.source_from_bytes(
input_bytes=file.read(),
filename=file.name,
)
file.seek(0)
return client.parse( # <-- This fails
product_class=InvoiceV4,
input_source=input_source,
)Here is a screenshot that shows Mindee's JSON API response:
This response is not gracefully handled by the Python SDK client for mindee:
File ~/crdbrd/hub/src/hub/receipts/mindee.py:108, in parse(file, media_type)
103 raise exceptions.ReceiptParsingError from exc
105 file.seek(0)
106 parser_result = cast(
107 PredictResponse[InvoiceV4],
--> 108 client.parse( # pyright: ignore[reportUnknownMemberType]
109 product_class=InvoiceV4,
110 input_source=input_source,
111 ),
112 )
113 document = cast(
114 Document[InvoiceV4Document, Page[InvoiceV4Document]] | None,
115 parser_result.document,
116 )
117 if document is None:
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/client.py:126, in Client.parse(self, product_class, input_source, include_words, close_file, page_options, cropper, endpoint, full_text)
120 if page_options and input_source.is_pdf():
121 input_source.process_pdf(
122 page_options.operation,
123 page_options.on_min_pages,
124 page_options.page_indexes,
125 )
--> 126 return self._make_request(
127 product_class,
128 input_source,
129 endpoint,
130 include_words,
131 close_file,
132 cropper,
133 full_text,
134 )
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/client.py:427, in Client._make_request(self, product_class, input_source, endpoint, include_words, close_file, cropper, full_text)
421 clean_response = clean_request_json(response)
422 raise handle_error(
423 str(product_class.endpoint_name),
424 clean_response,
425 )
--> 427 return PredictResponse(product_class, dict_response)
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/parsing/common/predict_response.py:28, in PredictResponse.__init__(self, inference_type, raw_response)
21 """
22 Container for the raw API response and the parsed document.
23
24 :param inference_type: Type of the inference.
25 :param raw_response: json response from HTTP call.
26 """
27 super().__init__(raw_response)
---> 28 self.document = Document(inference_type, raw_response["document"])
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/parsing/common/document.py:50, in Document.__init__(self, inference_type, raw_response)
48 if "extras" in raw_response and raw_response["inference"]["extras"]:
49 self.extras = Extras(raw_response["extras"])
---> 50 self._inject_full_text_ocr(raw_response)
51 self.inference = inference_type(raw_response["inference"])
52 self.n_pages = raw_response["n_pages"]
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/parsing/common/document.py:72, in Document._inject_full_text_ocr(self, raw_prediction)
65 if (
66 not pages
67 or "extras" not in pages[0]
68 or "full_text_ocr" not in pages[0]["extras"]
69 ):
70 return
---> 72 full_text_content = "\n".join(
73 page["extras"]["full_text_ocr"]["content"]
74 for page in pages
75 if "extras" in page and "full_text_ocr" in page["extras"]
76 )
78 artificial_text_obj = {"content": full_text_content}
80 if not hasattr(self, "extras") or not self.extras:
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/parsing/common/document.py:73, in <genexpr>(.0)
65 if (
66 not pages
67 or "extras" not in pages[0]
68 or "full_text_ocr" not in pages[0]["extras"]
69 ):
70 return
72 full_text_content = "\n".join(
---> 73 page["extras"]["full_text_ocr"]["content"]
74 for page in pages
75 if "extras" in page and "full_text_ocr" in page["extras"]
76 )
78 artificial_text_obj = {"content": full_text_content}
80 if not hasattr(self, "extras") or not self.extras:
TypeError: 'NoneType' object is not subscriptable
These keys should not have been inserted in the response or the client should check for None before processing the data.
GribouilleVert and anael-megna
Metadata
Metadata
Assignees
Labels
No labels
