Skip to content

Commit 4b2242c

Browse files
style: formatted with black and isort
1 parent a927a52 commit 4b2242c

File tree

2 files changed

+13
-6
lines changed

2 files changed

+13
-6
lines changed

examples/invoice.png

105 KB
Loading

text_extract_api/extract/strategies/docling.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,26 @@
1+
import tempfile
2+
3+
from docling.document_converter import DocumentConverter
4+
from docling_core.types.doc.document import ( # Assuming a compatible Docling library or module
5+
DoclingDocument,
6+
)
7+
18
from text_extract_api.extract.extract_result import ExtractResult
29
from text_extract_api.extract.strategies.strategy import Strategy
310
from text_extract_api.files.file_formats import FileFormat, PdfFileFormat
4-
from docling_core.types.doc.document import DoclingDocument # Assuming a compatible Docling library or module
5-
from docling.document_converter import DocumentConverter
6-
import tempfile
11+
712

813
class DoclingStrategy(Strategy):
914
"""
1015
Extraction strategy for processing PDF documents using Docling.
1116
"""
1217

13-
def name(self) -> str:
18+
def name(self) -> str:
1419
return "docling"
1520

16-
def extract_text(self, file_format: FileFormat, language: str = 'en') -> ExtractResult:
21+
def extract_text(
22+
self, file_format: FileFormat, language: str = "en"
23+
) -> ExtractResult:
1724
"""
1825
Extracts text from a PDF file using Docling and returns an ExtractResult.
1926
@@ -62,6 +69,6 @@ def _save_to_temp_file(self, file_format: FileFormat) -> str:
6269
:param file_format: Instance of FileFormat.
6370
:return: Path to the temporary file containing the file content.
6471
"""
65-
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
72+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
6673
temp_file.write(file_format.binary)
6774
return temp_file.name

0 commit comments

Comments
 (0)