parse image resposeDocument compatible - Adithya S K

adithya-s-k · adithya-s-k · commit f8ea62907b0d · 2024-06-30T13:02:00.000Z
diff --git a/omniparse/__init__.py b/omniparse/__init__.py
@@ -23,7 +23,7 @@ def load_omnimodel(load_documents: bool, load_media: bool, load_web: bool):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     if load_documents:
         print("[LOG] ✅ Loading OCR Model")
-        # shared_state.model_list = load_all_models()
+        shared_state.model_list = load_all_models()
         print("[LOG] ✅ Loading Vision Model")
         # if device == "cuda":
         shared_state.vision_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device)
diff --git a/omniparse/image/router.py b/omniparse/image/router.py
@@ -11,8 +11,8 @@
 async def parse_image_endpoint(file: UploadFile = File(...)):
     try:
         file_bytes = await file.read()
-        result = parse_image(file_bytes, model_state)
-        return JSONResponse(content=result)
+        result : responseDocument = parse_image(file_bytes, model_state)
+        return JSONResponse(content=result.model_dump())
 
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
diff --git a/omniparse/utils.py b/omniparse/utils.py
@@ -4,7 +4,6 @@
 from omniparse.models import responseDocument
 
 def encode_images(images, inputDocument:responseDocument):
-    image_data = []
     for i, (filename, image) in enumerate(images.items()):
         # print(f"Processing image {filename}")
         # Save image as PNG
@@ -14,13 +13,11 @@ def encode_images(images, inputDocument:responseDocument):
             image_bytes = f.read()
         # Convert image to base64
         image_base64 = base64.b64encode(image_bytes).decode('utf-8')
-        image_data[f'{filename}'] = image_base64
         
         inputDocument.add_image(image_name=filename,image_data=image_base64)
         
         # Remove the temporary image file
         os.remove(filename)
-    return image_data
 
 
 def print_omniparse_text_art(suffix=None):