|
9 | 9 |
|
10 | 10 | single_task_list = [ |
11 | 11 | 'Caption', 'Detailed Caption', 'More Detailed Caption', |
12 | | - 'OCR', 'OCR with Region', |
13 | | - 'Object Detection', |
14 | | - 'Dense Region Caption', 'Region Proposal', 'Caption to Phrase Grounding', |
15 | | - 'Referring Expression Segmentation', 'Region to Segmentation', |
16 | | - 'Open Vocabulary Detection', 'Region to Category', 'Region to Description', |
| 12 | + 'OCR', 'OCR with Region' |
17 | 13 | ] |
| 14 | +# single_task_list = [ |
| 15 | +# 'Caption', 'Detailed Caption', 'More Detailed Caption', |
| 16 | +# 'OCR', 'OCR with Region', |
| 17 | +# 'Object Detection', |
| 18 | +# 'Dense Region Caption', 'Region Proposal', 'Caption to Phrase Grounding', |
| 19 | +# 'Referring Expression Segmentation', 'Region to Segmentation', |
| 20 | +# 'Open Vocabulary Detection', 'Region to Category', 'Region to Description', |
| 21 | +# ] |
18 | 22 |
|
19 | 23 | header_markdown = """ |
20 | 24 |
|
@@ -181,11 +185,12 @@ def parse_document(input_file_path, parameters, request: gr.Request): |
181 | 185 |
|
182 | 186 | document_response = response.json() |
183 | 187 |
|
184 | | - # print(document_response) |
185 | | - images = document_response["images"] |
186 | | - pil_images = [decode_base64_to_pil(base64_str) for base64_str in images.values()] |
| 188 | + images = document_response.get('images', []) |
| 189 | + |
| 190 | + # Decode each base64-encoded image to a PIL image |
| 191 | + pil_images = [decode_base64_to_pil(image_dict['image']) for image_dict in images] |
187 | 192 |
|
188 | | - return str(document_response["markdown"]) , gr.Gallery(value=pil_images , visible=True) , str(document_response["markdown"]) , gr.JSON(value=document_response , visible=True) |
| 193 | + return str(document_response["text"]) , gr.Gallery(value=pil_images , visible=True) , str(document_response["text"]) , gr.JSON(value=document_response , visible=True) |
189 | 194 |
|
190 | 195 |
|
191 | 196 | except Exception as e: |
@@ -235,14 +240,17 @@ def process_image(input_file_path, parameters, request: gr.Request): |
235 | 240 |
|
236 | 241 |
|
237 | 242 | image_process_response = response.json() |
238 | | - print(image_process_response) |
| 243 | + |
| 244 | + images = image_process_response.get('images', []) |
| 245 | + # Decode each base64-encoded image to a PIL image |
| 246 | + pil_images = [decode_base64_to_pil(image_dict['image']) for image_dict in images] |
239 | 247 |
|
240 | 248 | # Decode the image if present in the response |
241 | 249 | # images = document_response.get('image', {}) |
242 | 250 | # pil_images = [decode_base64_to_pil(base64_str) for base64_str in images.values()] |
243 | 251 |
|
244 | | - return (gr.update(value=image_process_response["results"]), |
245 | | - gr.Gallery(visible=False), |
| 252 | + return (gr.update(value=image_process_response["text"]), |
| 253 | + gr.Gallery(value=pil_images, visible=(len(images) != 0)), |
246 | 254 | gr.JSON(value=image_process_response, visible=True)) |
247 | 255 |
|
248 | 256 | except Exception as e: |
@@ -283,12 +291,14 @@ def parse_image(input_file_path, parameters, request: gr.Request): |
283 | 291 | document_response = response.json() |
284 | 292 |
|
285 | 293 | # Decode the image if present in the response |
286 | | - images = document_response.get('image', {}) |
287 | | - pil_images = [decode_base64_to_pil(base64_str) for base64_str in images.values()] |
| 294 | + images = document_response.get('images', []) |
| 295 | + |
| 296 | + # Decode each base64-encoded image to a PIL image |
| 297 | + pil_images = [decode_base64_to_pil(image_dict['image']) for image_dict in images] |
288 | 298 |
|
289 | | - return (gr.update(value=document_response["markdown"]), |
| 299 | + return (gr.update(value=document_response["text"]), |
290 | 300 | gr.Gallery(value=pil_images, visible=True), |
291 | | - gr.update(value=document_response["markdown"]), |
| 301 | + gr.update(value=document_response["text"]), |
292 | 302 | gr.update(value=document_response, visible=True)) |
293 | 303 |
|
294 | 304 | except Exception as e: |
@@ -374,10 +384,15 @@ def parse_website(url, request: gr.Request): |
374 | 384 | base64_image = result.get("screenshot", "") |
375 | 385 |
|
376 | 386 | screenshot = [decode_base64_to_pil(base64_image)] if base64_image else [] |
| 387 | + |
| 388 | + images = website_response.get('images', []) |
| 389 | + |
| 390 | + # Decode each base64-encoded image to a PIL image |
| 391 | + pil_images = [decode_base64_to_pil(image_dict['image']) for image_dict in images] |
377 | 392 |
|
378 | 393 | return (gr.update(value=markdown, visible=True), |
379 | 394 | gr.update(value=html, visible=True), |
380 | | - gr.update(value=screenshot, visible=bool(screenshot)), |
| 395 | + gr.update(value=pil_images, visible=bool(screenshot)), |
381 | 396 | gr.JSON(value=website_response , visible=True)) |
382 | 397 |
|
383 | 398 | except requests.RequestException as e: |
@@ -422,7 +437,7 @@ def parse_website(url, request: gr.Request): |
422 | 437 | image_process_button = gr.Button("Process Image") |
423 | 438 | with gr.Column(scale=200): |
424 | 439 | image_process_output_text = gr.Textbox(label="Output Text") |
425 | | - image_process_output_image = gr.Image(label="Output Image ⌛" , interactive=False) |
| 440 | + image_process_output_image = gr.Gallery(label="Output Image ⌛" , interactive=False) |
426 | 441 | with gr.Accordion("JSON Output"): |
427 | 442 | image_process_json = gr.JSON(label="Output JSON", visible=False) |
428 | 443 | with gr.Accordion("Use API", open=True): |
|
0 commit comments