22from celery_config import celery
33from ocr_strategies .marker import MarkerOCRStrategy
44from ocr_strategies .tesseract import TesseractOCRStrategy
5+ from ocr_strategies .llama_vision import LlamaVisionOCRStrategy
56import redis
67import os
78import ollama
89from storage_manager import StorageManager
910
1011OCR_STRATEGIES = {
1112 'marker' : MarkerOCRStrategy (),
12- 'tesseract' : TesseractOCRStrategy ()
13+ 'tesseract' : TesseractOCRStrategy (),
14+ 'llama_vision' : LlamaVisionOCRStrategy ()
1315}
1416
1517# Connect to Redis
@@ -23,17 +25,17 @@ def ocr_task(self, pdf_bytes, strategy_name, pdf_filename, pdf_hash, ocr_cache,
2325 """
2426 start_time = time .time ()
2527 if strategy_name not in OCR_STRATEGIES :
26- raise ValueError (f"Unknown strategy '{ strategy_name } '. Available: marker, tesseract" )
28+ raise ValueError (f"Unknown strategy '{ strategy_name } '. Available: marker, tesseract, llama_vision " )
2729
2830 ocr_strategy = OCR_STRATEGIES [strategy_name ]
2931 self .update_state (state = 'PROGRESS' , status = "File uploaded successfully" , meta = {'progress' : 10 }) # Example progress update
3032
3133 extracted_text = None
32- if ocr_cache :
33- cached_result = redis_client .get (pdf_hash )
34- if cached_result :
35- # Return cached result if available
36- extracted_text = cached_result .decode ('utf-8' )
34+ # if ocr_cache:
35+ # cached_result = redis_client.get(pdf_hash)
36+ # if cached_result:
37+ # # Return cached result if available
38+ # extracted_text = cached_result.decode('utf-8')
3739
3840 if extracted_text is None :
3941 print ("Extracting text from PDF..." )
0 commit comments