Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions mindee/parsing/v2/inference_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,22 @@

from mindee.parsing.common.string_dict import StringDict
from mindee.parsing.v2.field.inference_result_fields import InferenceResultFields
from mindee.parsing.v2.inference_result_options import InferenceResultOptions
from mindee.parsing.v2.raw_text import RawText


class InferenceResult:
"""Inference result info."""

fields: InferenceResultFields
"""Fields contained in the inference."""
options: Optional[InferenceResultOptions]
raw_text: Optional[RawText] = None
"""Potential options retrieved alongside the inference."""

def __init__(self, raw_response: StringDict) -> None:
self.fields = InferenceResultFields(raw_response["fields"])
self.options = (
InferenceResultOptions(raw_response["options"])
if raw_response.get("options")
else None
)
if raw_response.get("raw_text"):
self.raw_text = RawText(raw_response["raw_text"])

def __str__(self) -> str:
out_str = f"\n\nFields\n======{self.fields}"
if self.options:
out_str += f"\n\nOptions\n====={self.options}"
return out_str
14 changes: 0 additions & 14 deletions mindee/parsing/v2/inference_result_options.py

This file was deleted.

6 changes: 3 additions & 3 deletions mindee/parsing/v2/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,6 @@ def __init__(self, raw_response: StringDict) -> None:
self.filename = raw_response["filename"]
self.result_url = raw_response["result_url"]
self.alias = raw_response["alias"]
self.webhooks = []
for webhook in raw_response["webhooks"]:
self.webhooks.append(JobWebhook(webhook))
self.webhooks = [
JobWebhook(webhook) for webhook in raw_response.get("webhooks", [])
]
10 changes: 5 additions & 5 deletions mindee/parsing/v2/raw_text.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from typing import List

from mindee.parsing.common.string_dict import StringDict
from mindee.parsing.v2.raw_text_page import RawTextPage


class RawText:
"""Raw text extracted from the document."""

page: int
pages: List[RawTextPage]
"""Page the raw text was found on."""
content: str
"""Content of the raw text."""

def __init__(self, raw_response: StringDict):
self.page = raw_response["page"]
self.content = raw_response["content"]
self.pages = [RawTextPage(page) for page in raw_response.get("pages", [])]
11 changes: 11 additions & 0 deletions mindee/parsing/v2/raw_text_page.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from mindee.parsing.common.string_dict import StringDict


class RawTextPage:
"""Raw text extracted from the page."""

content: str
"""Content of the raw text."""

def __init__(self, raw_response: StringDict):
self.content = raw_response["content"]
10 changes: 5 additions & 5 deletions tests/test_client_v2_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pytest

from mindee import ClientV2, InferenceParameters
from mindee import ClientV2, InferenceParameters, PathInput, UrlInputSource
from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2
from mindee.parsing.v2.inference_response import InferenceResponse
from tests.test_inputs import FILE_TYPES_DIR, PRODUCT_DATA_DIR
Expand Down Expand Up @@ -39,7 +39,7 @@ def test_parse_file_empty_multiple_pages_must_succeed(
input_path: Path = FILE_TYPES_DIR / "pdf" / "multipage_cut-2.pdf"
assert input_path.exists(), f"sample file missing: {input_path}"

input_doc = v2_client.source_from_path(input_path)
input_doc = PathInput(input_path)
options = InferenceParameters(findoc_model_id)

response: InferenceResponse = v2_client.enqueue_and_get_inference(
Expand Down Expand Up @@ -67,7 +67,7 @@ def test_parse_file_filled_single_page_must_succeed(
input_path: Path = PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg"
assert input_path.exists(), f"sample file missing: {input_path}"

input_doc = v2_client.source_from_path(input_path)
input_doc = PathInput(input_path)
options = InferenceParameters(findoc_model_id)

response: InferenceResponse = v2_client.enqueue_and_get_inference(
Expand Down Expand Up @@ -98,7 +98,7 @@ def test_invalid_uuid_must_throw_error_422(v2_client: ClientV2) -> None:
input_path: Path = FILE_TYPES_DIR / "pdf" / "multipage_cut-2.pdf"
assert input_path.exists()

input_doc = v2_client.source_from_path(input_path)
input_doc = PathInput(input_path)
options = InferenceParameters("INVALID MODEL ID")

with pytest.raises(MindeeHTTPErrorV2) as exc_info:
Expand All @@ -119,7 +119,7 @@ def test_url_input_source_must_not_raise_errors(
"""
url = os.getenv("MINDEE_V2_SE_TESTS_BLANK_PDF_URL")

input_doc = v2_client.source_from_url(url)
input_doc = UrlInputSource(url)
options = InferenceParameters(findoc_model_id)
response: InferenceResponse = v2_client.enqueue_and_get_inference(
input_doc, options
Expand Down
73 changes: 36 additions & 37 deletions tests/v2/test_inference_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,62 +41,60 @@ def test_deep_nested_fields():
inference_result = InferenceResponse(json_sample)
assert isinstance(inference_result.inference, Inference)
assert isinstance(
inference_result.inference.result.fields.field_simple, SimpleField
inference_result.inference.result.fields["field_simple"], SimpleField
)
assert isinstance(
inference_result.inference.result.fields.field_object, ObjectField
inference_result.inference.result.fields["field_object"], ObjectField
)
assert isinstance(
inference_result.inference.result.fields.field_object.fields["sub_object_list"],
inference_result.inference.result.fields["field_object"].fields[
"sub_object_list"
],
ListField,
)
assert isinstance(
inference_result.inference.result.fields.field_object.fields[
inference_result.inference.result.fields["field_object"].fields[
"sub_object_object"
],
ObjectField,
)
assert isinstance(
inference_result.inference.result.fields.field_object.fields[
"sub_object_object"
].fields,
inference_result.inference.result.fields["field_object"]
.fields["sub_object_object"]
.fields,
dict,
)
assert isinstance(
inference_result.inference.result.fields.field_object.fields[
"sub_object_object"
].fields["sub_object_object_sub_object_list"],
inference_result.inference.result.fields["field_object"]
.fields["sub_object_object"]
.fields["sub_object_object_sub_object_list"],
ListField,
)
assert isinstance(
inference_result.inference.result.fields.field_object.fields[
"sub_object_object"
]
inference_result.inference.result.fields["field_object"]
.fields["sub_object_object"]
.fields["sub_object_object_sub_object_list"]
.items,
list,
)
assert isinstance(
inference_result.inference.result.fields.field_object.fields[
"sub_object_object"
]
inference_result.inference.result.fields["field_object"]
.fields["sub_object_object"]
.fields["sub_object_object_sub_object_list"]
.items[0],
ObjectField,
)
assert isinstance(
inference_result.inference.result.fields.field_object.fields[
"sub_object_object"
]
inference_result.inference.result.fields["field_object"]
.fields["sub_object_object"]
.fields["sub_object_object_sub_object_list"]
.items[0]
.fields["sub_object_object_sub_object_list_simple"],
SimpleField,
)
assert (
inference_result.inference.result.fields.field_object.fields[
"sub_object_object"
]
inference_result.inference.result.fields["field_object"]
.fields["sub_object_object"]
.fields["sub_object_object_sub_object_list"]
.items[0]
.fields["sub_object_object_sub_object_list_simple"]
Expand All @@ -110,30 +108,32 @@ def test_standard_field_types():
json_sample, rst_sample = _get_inference_samples("standard_field_types")
inference_result = InferenceResponse(json_sample)
assert isinstance(inference_result.inference, Inference)
field_simple_string = inference_result.inference.result.fields.field_simple_string
field_simple_string = inference_result.inference.result.fields[
"field_simple_string"
]
assert isinstance(field_simple_string, SimpleField)
assert field_simple_string.value == "field_simple_string-value"
assert field_simple_string.confidence == FieldConfidence.CERTAIN
assert str(field_simple_string) == "field_simple_string-value"

field_simple_bool = inference_result.inference.result.fields.field_simple_bool
field_simple_bool = inference_result.inference.result.fields["field_simple_bool"]
assert isinstance(field_simple_bool, SimpleField)
assert field_simple_bool.value is True
assert str(field_simple_bool) == "True"

field_simple_null = inference_result.inference.result.fields.field_simple_null
field_simple_null = inference_result.inference.result.fields["field_simple_null"]
assert isinstance(field_simple_null, SimpleField)
assert field_simple_null.value is None
assert str(field_simple_null) == ""

assert isinstance(
inference_result.inference.result.fields.field_object, ObjectField
inference_result.inference.result.fields["field_object"], ObjectField
)
assert isinstance(
inference_result.inference.result.fields.field_simple_list, ListField
inference_result.inference.result.fields["field_simple_list"], ListField
)
assert isinstance(
inference_result.inference.result.fields.field_object_list, ListField
inference_result.inference.result.fields["field_object_list"], ListField
)
assert rst_sample == str(inference_result)

Expand All @@ -144,11 +144,10 @@ def test_raw_texts():
inference_result = InferenceResponse(json_sample)
assert isinstance(inference_result.inference, Inference)

assert inference_result.inference.result.options
assert len(inference_result.inference.result.options.raw_texts) == 2
assert inference_result.inference.result.options.raw_texts[0].page == 0
assert inference_result.inference.result.raw_text
assert len(inference_result.inference.result.raw_text.pages) == 2
assert (
inference_result.inference.result.options.raw_texts[0].content
inference_result.inference.result.raw_text.pages[0].content
== "This is the raw text of the first page..."
)

Expand All @@ -161,13 +160,13 @@ def test_full_inference_response():
assert isinstance(inference_result.inference, Inference)
assert inference_result.inference.id == "12345678-1234-1234-1234-123456789abc"
assert isinstance(inference_result.inference.result.fields.date, SimpleField)
assert inference_result.inference.result.fields.date.value == "2019-11-02"
assert inference_result.inference.result.fields["date"].value == "2019-11-02"
assert isinstance(inference_result.inference.result.fields.taxes, ListField)
assert isinstance(
inference_result.inference.result.fields.taxes.items[0], ObjectField
inference_result.inference.result.fields["taxes"].items[0], ObjectField
)
assert (
inference_result.inference.result.fields.customer_address.fields.city.value
inference_result.inference.result.fields["customer_address"].fields.city.value
== "New York"
)
assert (
Expand All @@ -183,7 +182,7 @@ def test_full_inference_response():
assert inference_result.inference.file.page_count == 1
assert inference_result.inference.file.mime_type == "image/jpeg"
assert not inference_result.inference.file.alias
assert not inference_result.inference.result.options
assert not inference_result.inference.result.raw_text


@pytest.mark.v2
Expand All @@ -198,7 +197,7 @@ def test_field_locations_and_confidence() -> None:

inference_result = InferenceResponse(json_sample)

date_field: SimpleField = inference_result.inference.result.fields.date
date_field: SimpleField = inference_result.inference.result.fields["date"]

assert date_field.locations, "date field should expose locations"
loc0 = date_field.locations[0]
Expand Down