File tree Expand file tree Collapse file tree 2 files changed +13
-6
lines changed
Expand file tree Collapse file tree 2 files changed +13
-6
lines changed Original file line number Diff line number Diff line change 1+ import tempfile
2+
3+ from docling .document_converter import DocumentConverter
4+ from docling_core .types .doc .document import ( # Assuming a compatible Docling library or module
5+ DoclingDocument ,
6+ )
7+
18from text_extract_api .extract .extract_result import ExtractResult
29from text_extract_api .extract .strategies .strategy import Strategy
310from text_extract_api .files .file_formats import FileFormat , PdfFileFormat
4- from docling_core .types .doc .document import DoclingDocument # Assuming a compatible Docling library or module
5- from docling .document_converter import DocumentConverter
6- import tempfile
11+
712
813class DoclingStrategy (Strategy ):
914 """
1015 Extraction strategy for processing PDF documents using Docling.
1116 """
1217
13- def name (self ) -> str :
18+ def name (self ) -> str :
1419 return "docling"
1520
16- def extract_text (self , file_format : FileFormat , language : str = 'en' ) -> ExtractResult :
21+ def extract_text (
22+ self , file_format : FileFormat , language : str = "en"
23+ ) -> ExtractResult :
1724 """
1825 Extracts text from a PDF file using Docling and returns an ExtractResult.
1926
@@ -62,6 +69,6 @@ def _save_to_temp_file(self, file_format: FileFormat) -> str:
6269 :param file_format: Instance of FileFormat.
6370 :return: Path to the temporary file containing the file content.
6471 """
65- with tempfile .NamedTemporaryFile (delete = False , suffix = ' .pdf' ) as temp_file :
72+ with tempfile .NamedTemporaryFile (delete = False , suffix = " .pdf" ) as temp_file :
6673 temp_file .write (file_format .binary )
6774 return temp_file .name
You can’t perform that action at this time.
0 commit comments