Skip to content

Commit 1efd508

Browse files
committed
[feat] model pulling added to the docker entrypoint
1 parent ac960ce commit 1efd508

File tree

2 files changed

+23
-9
lines changed

2 files changed

+23
-9
lines changed

client/cli.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import requests
44
import time
55
import os
6+
import math
7+
from ollama import pull
68

79
def ocr_upload(file_path, ocr_cache, prompt, prompt_file=None, model='llama3.1', strategy='llama_vision', storage_profile='default', storage_filename=None):
810
ocr_url = os.getenv('OCR_UPLOAD_URL', 'http://localhost:8000/ocr/upload')
@@ -114,13 +116,13 @@ def clear_cache():
114116
else:
115117
print(f"Failed to clear OCR cache: {response.text}")
116118

117-
def llm_pull(model = 'llama3.1'):
118-
ollama_pull_url = os.getenv('LLM_PULL_API_URL', 'http://localhost:8000/llm/pull')
119-
response = requests.post(ollama_pull_url, json={"model": model})
120-
if response.status_code == 200:
121-
print("Model pulled successfully.")
122-
else:
123-
print(f"Failed to pull the model: {response.text}")
119+
def llm_pull(model = 'llama3.1'):
120+
response = pull(model, stream=True)
121+
for chunk in response:
122+
if chunk.completed and chunk.total:
123+
print(f'Pulling {model} - {chunk.status} - {math.floor((chunk.completed / chunk.total) * 100)}% completed')
124+
else:
125+
print(f'Pulling {model} - {chunk.status}')
124126

125127
def llm_generate(prompt, model = 'llama3.1'):
126128
ollama_url = os.getenv('LLM_GENERATE_API_URL', 'http://localhost:8000/llm/generate')

scripts/entrypoint.sh

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,21 @@ if [ "$APP_TYPE" = "celery" ]; then
2222
exec celery -A text_extract_api.celery_app worker --loglevel=info --pool=solo
2323
else
2424
echo "Starting FastAPI app..."
25+
pid=''
2526
if [ "$APP_ENV" = "production" ]; then
26-
exec uvicorn text_extract_api.main:app --host 0.0.0.0 --port 8000
27+
exec uvicorn text_extract_api.main:app --host 0.0.0.0 --port 8000 &
28+
pid=$!
2729
else
28-
exec uvicorn text_extract_api.main:app --host 0.0.0.0 --port 8000 --reload
30+
exec uvicorn text_extract_api.main:app --host 0.0.0.0 --port 8000 --reload &
31+
pid=$!
2932
fi
33+
34+
sleep 5
35+
36+
echo "Pulling LLM models, please wait until this process is done..."
37+
exec python client/cli.py llm_pull --model llama3.1
38+
exec python client/cli.py llm_pull --model llama3.2-vision
39+
echo "LLM models are ready!"
40+
41+
wait $pid
3042
fi

0 commit comments

Comments
 (0)