File tree Expand file tree Collapse file tree 3 files changed +8
-10
lines changed
Expand file tree Collapse file tree 3 files changed +8
-10
lines changed Original file line number Diff line number Diff line change @@ -11,8 +11,8 @@ strategies:
1111 class : text_extract_api.extract.strategies.easyocr.EasyOCRStrategy
1212 docling :
1313 class : text_extract_api.extract.strategies.docling.DoclingStrategy
14-
15- # remote strategy example:
16- # remote:
17- # class: text_extract_api.extract.strategies.remote.RemoteStrategy
18- # url:
14+ model : llama3.1
15+ prompt : You are OCR. Convert image to markdown. Return only the markdown with no explanation text. Do not exclude any content from the page.
16+ remote :
17+ class : text_extract_api.extract.strategies.remote.RemoteStrategy
18+ url :
Original file line number Diff line number Diff line change @@ -48,7 +48,7 @@ def text_gatherer(self, docling_document: DoclingDocument) -> str:
4848
4949 def _convert_to_docling (self , file_path : str ) -> DoclingDocument :
5050 """
51- Converts a PDF file into a DoclingDocument instance.
51+ Converts a file into a DoclingDocument instance.
5252
5353 :param file_path: Path to the PDF file to be converted.
5454 :return: DoclingDocument instance.
Original file line number Diff line number Diff line change @@ -9,20 +9,18 @@ class DoclingFileFormat(FileFormat):
99 @staticmethod
1010 def accepted_mime_types () -> list [str ]:
1111 return [
12- "application/pdf" , # PDF documents
1312 "application/vnd.docling" , # Docling documents
13+ # Do not put all formats handled by docling here - only those that are not supported by dedicated file formats"
1414 "text/plain" ,
1515 "text/markdown" ,
16- "text/html" , # HTML documents
16+ "text/html" ,
1717 "application/msword" ,
1818 "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ,
1919 "application/vnd.oasis.opendocument.text" ,
2020 "application/vnd.ms-excel" ,
2121 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ,
2222 "application/vnd.ms-powerpoint" ,
2323 "application/vnd.openxmlformats-officedocument.presentationml.presentation" ,
24- "image/jpeg" ,
25- "image/png" ,
2624 "text/csv" ,
2725 "application/json" ,
2826 "application/xml" ,
You can’t perform that action at this time.
0 commit comments