[fix] readme fixes

pkarw · pkarw · commit 3947265401c6 · 2025-01-21T11:31:25.000+01:00
diff --git a/.env.example b/.env.example
@@ -3,6 +3,7 @@ REDIS_CACHE_URL=redis://redis:6379/1
 OLLAMA_HOST=http://ollama:11434
 STORAGE_PROFILE_PATH=./storage_profiles
 LLAMA_VISION_PROMPT="You are OCR. Convert image to markdown."
+REMOTE_API_URL=
 
 # CLI settings
 OCR_URL=http://localhost:8000/ocr/upload
@@ -15,3 +16,4 @@ LOAD_FILE_URL=http://localhost:8000/storage/load
 DELETE_FILE_URL=http://localhost:8000/storage/delete
 OCR_REQUEST_URL=http://localhost:8000/ocr/request
 OCR_UPLOAD_URL=http://localhost:8000/ocr/upload
+
diff --git a/.env.localhost.example b/.env.localhost.example
@@ -2,6 +2,7 @@
 REDIS_CACHE_URL=redis://localhost:6379/1
 LLAMA_VISION_PROMPT="You are OCR. Convert image to markdown."
 DISABLE_LOCAL_OLLAMA=0
+REMOTE_API_URL=
 
 # CLI settings
 OCR_URL=http://localhost:8000/ocr/upload
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ The API is built with FastAPI and uses Celery for asynchronous task processing.
 
 ## Features:
 - **No Cloud/external dependencies** all you need: PyTorch based OCR (EasyOCR) + Ollama are shipped and configured via `docker-compose` no data is sent outside your dev/server environment,
-- **PDF/Office to Markdown** conversion with very high accuracy using different OCR strategies including [llama3.2-vision](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/), [easyOCR](https://github.com/JaidedAI/EasyOCR), [minicpm-v](https://github.com/OpenBMB/MiniCPM-o?tab=readme-ov-file#minicpm-v-26), [marker-pdf](https://github.com/VikParuchuri/marker)
+- **PDF/Office to Markdown** conversion with very high accuracy using different OCR strategies including [llama3.2-vision](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/), [easyOCR](https://github.com/JaidedAI/EasyOCR), [minicpm-v](https://github.com/OpenBMB/MiniCPM-o?tab=readme-ov-file#minicpm-v-26), remote URL strategies including [marker-pdf](https://github.com/VikParuchuri/marker)
 - **PDF/Office to JSON** conversion using Ollama supported models (eg. LLama 3.1)
 - **LLM Improving OCR results** LLama is pretty good with fixing spelling and text issues in the OCR text
 - **Removing PII** This tool can be used for removing Personally Identifiable Information out of document - see `examples`
@@ -215,12 +215,20 @@ pip install -U uvicorn fastapi python-multipart
 marker_server --port 8002
 ```
 
-**Note: *** you might run `marker_server` on different port - then just make sure you export a proper env setting beffore starting off `text-extract-api` server:
+Set the Remote API Url:
+
+**Note: *** you might run `marker_server` on different port or server - then just make sure you export a proper env setting beffore starting off `text-extract-api` server:
 
 ```bash
 export REMOTE_API_URL=http://localhost:8002/marker/upload
 ```
 
+Run the `text-extract-api`:
+
+```bash
+make run
+```
+
 Please do use the `strategy=remote` CLI and URL parameters to use it. For example:
 
 ```bash
@@ -477,7 +485,7 @@ apiClient.uploadFile(formData).then(response => {
 - **Method**: POST
 - **Parameters**:
   - **file**: PDF, image or Office file to be processed.
-  - **strategy**: OCR strategy to use (`llama_vision`, `minicpm_v`, `marker` or `easyocr`). See the [available strategies](#text-extract-stratgies)
+  - **strategy**: OCR strategy to use (`llama_vision`, `minicpm_v`, `remote` or `easyocr`). See the [available strategies](#text-extract-stratgies)
   - **ocr_cache**: Whether to cache the OCR result (true or false).
   - **prompt**: When provided, will be used for Ollama processing the OCR result
   - **model**: When provided along with the prompt - this model will be used for LLM processing
@@ -496,7 +504,7 @@ curl -X POST -H "Content-Type: multipart/form-data" -F "file=@examples/example-m
 - **Method**: POST
 - **Parameters** (JSON body):
   - **file**: Base64 encoded PDF file content.
-  - **strategy**: OCR strategy to use (`llama_vision`, `minicpm_v`, marker or `easyocr`). See the [available strategies](#text-extract-stratgies)
+  - **strategy**: OCR strategy to use (`llama_vision`, `minicpm_v`, `remote` or `easyocr`). See the [available strategies](#text-extract-stratgies)
   - **ocr_cache**: Whether to cache the OCR result (true or false).
   - **prompt**: When provided, will be used for Ollama processing the OCR result.
   - **model**: When provided along with the prompt - this model will be used for LLM processing.
diff --git a/config/strategies.yaml b/config/strategies.yaml
@@ -6,4 +6,4 @@ strategies:
    easyocr:
       class: text_extract_api.extract.strategies.easyocr.EasyOCRStrategy
    remote:
-      class: text_extract_api.extract.strategies.marker.RemoteStrategy
+      class: text_extract_api.extract.strategies.remote.RemoteStrategy
diff --git a/docker-compose.gpu.yml b/docker-compose.gpu.yml
@@ -19,6 +19,7 @@ services:
       - LOAD_FILE_URL=${LOAD_FILE_URL-http://localhost:8000/storage/load}
       - DELETE_FILE_URL=${DELETE_FILE_URL-http://localhost:8000/storage/delete}
       - LLAMA_VISION_PROMPT=${LLAMA_VISION_PROMPT-"You are OCR. Convert image to markdown."}
+      - REMOTE_API_URL=${REMOTE_API_URL}
     depends_on:
       - redis
       - ollama
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -19,6 +19,7 @@ services:
       - LOAD_FILE_URL=${LOAD_FILE_URL-http://localhost:8000/storage/load}
       - DELETE_FILE_URL=${DELETE_FILE_URL-http://localhost:8000/storage/delete}
       - LLAMA_VISION_PROMPT=${LLAMA_VISION_PROMPT-"You are OCR. Convert image to markdown."}      
+      - REMOTE_API_URL=${REMOTE_API_URL}
     depends_on:
       - redis
       - ollama
diff --git a/text_extract_api/extract/strategies/remote.py b/text_extract_api/extract/strategies/remote.py
@@ -16,7 +16,7 @@ class RemoteStrategy(Strategy):
 
     @classmethod
     def name(cls) -> str:
-        return "marker"
+        return "remote"
 
     def extract_text(self, file_format: FileFormat, language: str = 'en') -> ExtractResult:
 
@@ -40,7 +40,9 @@ def extract_text(self, file_format: FileFormat, language: str = 'en') -> Extract
             raise ValueError("No PDF file found - conversion error.")
 
         try: 
-            url = os.getenv("REMOTE_API_URL", "http://localhost:8002/marker/upload")
+            url = os.getenv("REMOTE_API_URL", "")
+            if not url:
+                raise Exception('Please do set the REMOTE_API_URL environment variable: export REMOTE_API_URL=http://...')
             files = {'file': ('document.pdf', pdf_files[0].binary, 'application/pdf')}
             data = {
                 'page_range': None,
@@ -64,6 +66,6 @@ def extract_text(self, file_format: FileFormat, language: str = 'en') -> Extract
             extracted_text = response.json().get('output', '')
         except Exception as e:
             print('Error:', e)
-            raise Exception("Failed to generate text with Marker PDF API. Make sure marker-pdf server is up and running: marker_server --port 8002. Details: https://github.com/VikParuchuri/marker")
+            raise Exception("Failed to generate text with Remote API. Make sure the remote server is up and running")
             
         return ExtractResult.from_text(extracted_text)