1+ from typing import Callable , Any
2+
3+ """
4+ IMPORTANT INFORMATION ABOUT THIS CLASS:
5+
6+ This is not the final version of the object, namespace, or intended use.
7+
8+ For this reason, I am not creating an interface, etc. Add code here as soon as possible
9+ along with further integrations, and once we have gained sufficient experience, we will
10+ undertake a refactor.
11+
12+ Currently, the object's purpose is to replace the use of a primitive type, a string, for
13+ extract returns. The limitation of this approach became evident when returning only the
14+ resulting string caused us to lose valuable metadata about the document. Thanks to this
15+ class, we retain DoclingDocument and foresee that other converters/OCRs may have similar
16+ metadata.
17+ """
18+ class ExtractResult :
19+ def __init__ (
20+ self ,
21+ value : Any ,
22+ text_gatherer : Callable [[Any ], str ] = None
23+ ):
24+ """
25+ Initializes a UnifiedText instance.
26+
27+ Args:
28+ value (Any): The object containing or representing the text.
29+ text_gatherer (Callable[[Any], str], optional): A callable that extracts text
30+ from the `data`. Defaults to the `_default_text_gatherer`.
31+
32+ Raises:
33+ ValueError: If `text_gatherer` is not callable or not provided when `value` is not a string.
34+
35+ Examples:
36+ Using the default text gatherer
37+
38+ >>> unified = ExtractResult("Example text")
39+ >>> print(unified.text())
40+ Example text
41+
42+ Using a custom text gatherer
43+
44+ >>> def custom_gatherer(value): return f"Custom: {value}"
45+ >>> unified = ExtractResult(123, custom_gatherer)
46+ >>> print(unified.text())
47+ Custom: 123
48+ """
49+
50+ if text_gatherer is not None and not callable (text_gatherer ):
51+ raise ValueError ("The `text_gatherer` provided to UnifiedText must be a callable." )
52+
53+ if not isinstance (value , str ) and not callable (text_gatherer ):
54+ raise ValueError ("If `value` is not a string, `text_gatherer` must be provided." )
55+
56+ self .value = value
57+ self .text_gatherer = text_gatherer or self ._default_text_gatherer
58+
59+ @staticmethod
60+ def from_text (value : str ) -> 'ExtractResult' :
61+ return ExtractResult (value )
62+
63+ @property
64+ def text (self ) -> str :
65+ """
66+ Retrieves text using the text gatherer.
67+
68+ Returns:
69+ str: The extracted text from `value`.
70+ """
71+ return self .text_gatherer (self .value )
72+
73+ @staticmethod
74+ def _default_text_gatherer (value : Any ) -> str :
75+ """
76+ Default method to extract str from str.
77+ So it just return value, obviously.
78+
79+ Args:
80+ value (Any): The input value.
81+
82+ Returns:
83+ str: The text representation of the input value.
84+
85+ Raises:
86+ TypeError: If the `value` is not a string.
87+ """
88+ if isinstance (value , str ):
89+ return value
90+ raise TypeError ("Default text gatherer only supports strings." )
0 commit comments