Skip to content

Commit fbd31e1

Browse files
authored
Merge pull request #48 from CatchTheTornado/fix_46
fix(#46) - fixed the way new ollama handles images
2 parents 49c50ed + 832748c commit fbd31e1

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

app/ocr_strategies/llama_vision.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import base64
2+
import tempfile
23
from ocr_strategies.ocr_strategy import OCRStrategy
34
import ollama
45
import io
@@ -20,14 +21,21 @@ def extract_text_from_pdf(self, pdf_bytes):
2021
# Convert image to base64
2122
buffered = io.BytesIO()
2223
image.save(buffered, format="JPEG")
23-
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
24+
#img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
25+
# Save image to a temporary file and get its path
26+
temp_filename = None
27+
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
28+
image.save(temp_file, format="JPEG")
29+
temp_filename = temp_file.name
2430

2531
# Generate text using the Llama 3.2 Vision model
2632
try:
2733
response = ollama.chat("llama3.2-vision", [{
34+
'role': 'user',
2835
'content': os.getenv('LLAMA_VISION_PROMPT', "You are OCR. Convert image to markdown."),
29-
'images': [img_str]
36+
'images': [temp_filename]
3037
}], stream=True)
38+
os.remove(temp_filename)
3139
num_chunk = 1
3240
for chunk in response:
3341
self.update_state_callback(state='PROGRESS', meta={'progress': str(30 + ocr_percent_done), 'status': 'OCR Processing (page ' + str(i+1) + ' of ' + str(num_pages) +') chunk no: ' + str(num_chunk), 'start_time': start_time, 'elapsed_time': time.time() - start_time}) # Example progress update

0 commit comments

Comments
 (0)