Project-MONAI
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎THIRD_PARTY_NOTICES.txt‎
Lines changed: 19 additions & 0 deletions b/‎THIRD_PARTY_NOTICES.txt‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎agents/annotation_agent.py‎
Lines changed: 122 additions & 0 deletions b/‎agents/annotation_agent.py‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎agents/base_agent.py‎
Lines changed: 203 additions & 0 deletions b/‎agents/base_agent.py‎
Lines changed: 203 additions & 0 deletions
@@ -0,0 +1,2 @@
+models/*
+!models/.gitignore
@@ -0,0 +1,19 @@
+------------------------------------------------------------
+Bootswatch v5.3.1 (https://bootswatch.com)
+Theme: darkly_green
+Copyright 2012-2023 Thomas Park
+Licensed under MIT
+Based on Bootstrap
+------------------------------------------------------------
+Bootstrap v5.3.1 (https://getbootstrap.com/)
+Copyright 2011-2023 The Bootstrap Authors
+Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)
+------------------------------------------------------------
+Bootstrap Bundle v5.3.1 (https://getbootstrap.com/)
+Copyright 2011-2023 The Bootstrap Authors
+Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)
+------------------------------------------------------------
+jQuery v3.6.3
+Copyright (c) OpenJS Foundation and other contributors
+Licensed under the jQuery license (jquery.org/license)
+------------------------------------------------------------
@@ -0,0 +1,122 @@
+"""
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" 
+
+import threading
+import time
+import logging
+import os
+import json
+import queue
+from typing import List
+from pydantic import BaseModel
+from .base_agent import Agent
+
+class SurgeryAnnotation(BaseModel):
+    timestamp: str
+    elapsed_time_seconds: float
+    tools: List[str]
+    anatomy: List[str]
+    surgical_phase: str
+    description: str
+
+class AnnotationAgent(Agent):
+    def __init__(self, settings_path, response_handler, frame_queue, agent_key=None, procedure_start_str=None):
+        super().__init__(settings_path, response_handler, agent_key=agent_key)
+        self._logger = logging.getLogger(__name__)
+        self.frame_queue = frame_queue  
+        self.time_step = self.agent_settings.get("time_step_seconds", 10)
+
+        if procedure_start_str is None:
+            procedure_start_str = time.strftime("%Y_%m_%d__%H_%M_%S", time.localtime())
+        self.procedure_start_str = procedure_start_str
+        self.procedure_start = time.time()
+
+
+        base_output_dir = self.agent_settings.get("annotation_output_dir", "procedure_outputs")
+        subfolder = os.path.join(base_output_dir, f"procedure_{self.procedure_start_str}")
+        os.makedirs(subfolder, exist_ok=True)
+
+        self.annotation_filepath = os.path.join(subfolder, "annotation.json")
+        self._logger.info(f"AnnotationAgent writing annotations to: {self.annotation_filepath}")
+
+        self.annotations = []
+        self.stop_event = threading.Event()
+
+        # Start the background loop in a separate thread.
+        self.thread = threading.Thread(target=self._background_loop, daemon=True)
+        self.thread.start()
+        self._logger.info(f"AnnotationAgent background thread started (interval={self.time_step}s).")
+
+    def _background_loop(self):
+        while not self.stop_event.is_set():
+            try:
+                # Attempt to get image data from the frame queue.
+                try:
+                    frame_data = self.frame_queue.get_nowait()
+                except queue.Empty:
+                    self._logger.debug("No image data available; skipping annotation generation.")
+                    time.sleep(self.time_step)
+                    continue
+
+                annotation = self._generate_annotation(frame_data)
+                if annotation:
+                    self.annotations.append(annotation)
+                    self.append_json_to_file(annotation, self.annotation_filepath)
+                    self._logger.debug(f"New annotation appended: {annotation}")
+            except Exception as e:
+                self._logger.error(f"Error generating annotation: {e}", exc_info=True)
+            time.sleep(self.time_step)
+
+    def _generate_annotation(self, frame_data):
+        messages = []
+        if self.agent_prompt:
+            messages.append({"role": "system", "content": self.agent_prompt})
+        user_content = "Please produce an annotation of the surgical scene based on the provided image, following the required schema."
+        messages.append({"role": "user", "content": user_content})
+        try:
+            guided_params = {"guided_json": json.loads(self.grammar)}
+            raw_json_str = self.stream_image_response(
+                prompt=self.generate_prompt(user_content, []),
+                image_b64=frame_data,
+                temperature=0.3,
+                extra_body=guided_params
+            )
+            self._logger.debug(f"Raw annotation response: {raw_json_str}")
+
+            try:
+                parsed = SurgeryAnnotation.model_validate_json(raw_json_str)
+            except Exception as e:
+                self._logger.warning(f"Annotation parse error: {e}")
+                return None
+
+            annotation_dict = parsed.dict()
+            timestamp_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+            annotation_dict["timestamp"] = timestamp_str
+            annotation_dict["elapsed_time_seconds"] = time.time() - self.procedure_start
+
+            return annotation_dict
+
+        except Exception as e:
+            self._logger.warning(f"Annotation generation error: {e}")
+            return None
+
+    def process_request(self, input_data, chat_history):
+        return {
+            "name": "AnnotationAgent",
+            "response": "AnnotationAgent runs in the background and generates annotations only when image data is available."
+        }
+
+    def stop(self):
+        self.stop_event.set()
+        self._logger.info("Stopping AnnotationAgent background thread.")
+        self.thread.join()
@@ -0,0 +1,203 @@
+"""
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" 
+
+from abc import ABC, abstractmethod
+import json
+import logging
+import yaml
+import time
+import tiktoken
+from threading import Lock
+import base64
+import tempfile
+import os
+import requests
+from openai import OpenAI
+
+class Agent(ABC):
+    _llm_lock = Lock()
+    
+    def __init__(self, settings_path, response_handler, agent_key=None):
+        self._logger = logging.getLogger(f"{__name__}.{type(self).__name__}")        
+        self.load_settings(settings_path, agent_key=agent_key)
+        self.response_handler = response_handler
+        self.tokenizer = tiktoken.get_encoding("cl100k_base")
+        self.client = OpenAI(api_key="EMPTY", base_url=self.llm_url)
+        self._wait_for_server()
+
+    def load_settings(self, settings_path, agent_key=None):
+        with open(settings_path, 'r') as f:
+            full_config = yaml.safe_load(f)
+        if agent_key and agent_key in full_config:
+            self.agent_settings = full_config[agent_key]
+        else:
+            self.agent_settings = full_config
+        self.description = self.agent_settings.get('description', '')
+        self.max_prompt_tokens = self.agent_settings.get('max_prompt_tokens', 3000)
+        self.ctx_length = self.agent_settings.get('ctx_length', 2048)
+        self.agent_prompt = self.agent_settings.get('agent_prompt', '').strip()
+        self.user_prefix = self.agent_settings.get('user_prefix', '')
+        self.bot_prefix = self.agent_settings.get('bot_prefix', '')
+        self.bot_rule_prefix = self.agent_settings.get('bot_rule_prefix', '')
+        self.end_token = self.agent_settings.get('end_token', '')
+        self.grammar = self.agent_settings.get('grammar', None)
+        self.model_name = self.agent_settings.get('model_name', 'llama3.2')
+        self.publish_settings = self.agent_settings.get('publish', {})
+        self.llm_url = self.agent_settings.get('llm_url', "http://localhost:8000/v1")
+        self.tools = self.agent_settings.get('tools', {})
+        self._logger.debug(f"Agent config loaded. llm_url={self.llm_url}, model_name={self.model_name}")
+
+    def _wait_for_server(self, timeout=30):
+        attempts = 0
+        check_url = f"{self.llm_url}/models"
+        while attempts < timeout:
+            try:
+                r = requests.get(check_url)
+                if r.status_code == 200:
+                    self._logger.debug(f"Connected to vLLM server at {self.llm_url}")
+                    return
+            except Exception as e:
+                self._logger.debug(f"Waiting for vLLM server (attempt {attempts+1}): {e}")
+            time.sleep(1)
+            attempts += 1
+        raise ConnectionError(f"Unable to connect to vLLM server at {self.llm_url} after {timeout} seconds")
+
+    def stream_response(self, prompt, grammar=None, temperature=0.0, display_output=True):
+        with Agent._llm_lock:
+            user_message = prompt.split("<|im_start|>user\n")[-1].split("<|im_end|>")[0].strip()
+            request_messages = []
+            if self.agent_prompt:
+                request_messages.append({"role": "system", "content": self.agent_prompt})
+            request_messages.append({"role": "user", "content": user_message})
+            self._logger.debug(
+                f"Sending chat request to vLLM/OpenAI client. Model={self.model_name}, temperature={temperature}\nUser message:\n{user_message[:500]}"
+            )
+            try:
+                completion = self.client.chat.completions.create(
+                    model=self.model_name,
+                    messages=request_messages,
+                    temperature=temperature,
+                    max_tokens=self.ctx_length
+                )
+                response_text = completion.choices[0].message.content if completion.choices else ""
+                if display_output and self.response_handler:
+                    self.response_handler.add_response(response_text)
+                    self.response_handler.end_response()
+                return response_text
+            except Exception as e:
+                self._logger.error(f"vLLM chat request failed: {e}", exc_info=True)
+                return ""
+
+    def stream_image_response(self, prompt, image_b64, grammar=None, temperature=0.0, display_output=True, extra_body=None):
+        self._logger.debug(f"stream_image_response with model={self.model_name}")
+        if not image_b64:
+            raise ValueError("No image data provided for image response")
+        user_message = prompt.split("<|im_start|>user\n")[-1].split("<|im_end|>")[0].strip()
+        try:
+            raw_b64 = self._extract_raw_base64(image_b64)
+            with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
+                file_path = tmp_file.name
+                tmp_file.write(base64.b64decode(raw_b64))
+            self._logger.debug(f"Temp image file created: {file_path}")
+            messages = []
+            if self.agent_prompt:
+                messages.append({"role": "system", "content": self.agent_prompt})
+            messages.append({
+                "role": "user",
+                "content": user_message,
+                "images": [file_path]
+            })
+            request_kwargs = {
+                "model": self.model_name,
+                "messages": messages,
+                "temperature": temperature,
+                "max_tokens": self.ctx_length
+            }
+            if extra_body is not None:
+                request_kwargs["extra_body"] = extra_body
+            result = self.client.chat.completions.create(**request_kwargs)
+            raw_text = result.choices[0].message.content
+            if display_output and self.response_handler:
+                self.response_handler.add_response(raw_text)
+                self.response_handler.end_response()
+            os.remove(file_path)
+            return raw_text
+        except Exception as e:
+            self._logger.error(f"vLLM vision request failed: {e}", exc_info=True)
+            raise
+
+    def _extract_raw_base64(self, image_b64: str) -> str:
+        prefix = "data:image/"
+        if image_b64.startswith(prefix):
+            parts = image_b64.split(',', 1)
+            if len(parts) == 2:
+                return parts[1]
+            else:
+                return image_b64
+        else:
+            return image_b64
+
+    def generate_prompt(self, text, chat_history):
+        system_prompt = f"{self.bot_rule_prefix}\n{self.agent_prompt}\n{self.end_token}"
+        user_prompt = f"\n{self.user_prefix}\n{text}\n{self.end_token}"
+        token_usage = self.calculate_token_usage(system_prompt + user_prompt)
+        chat_prompt = self.create_conversation_str(chat_history, token_usage)
+        prompt = system_prompt + chat_prompt + user_prompt
+        prompt += f"\n{self.bot_prefix}\n"
+        return prompt
+
+    def create_conversation_str(self, chat_history, token_usage, conversation_length=2):
+        total_tokens = token_usage
+        msg_hist = []
+        for user_msg, bot_msg in chat_history[:-1][-conversation_length:][::-1]:
+            if bot_msg:
+                bot_msg_str = f"\n{self.bot_prefix}\n{bot_msg}\n{self.end_token}"
+                bot_tokens = self.calculate_token_usage(bot_msg_str)
+                if total_tokens + bot_tokens > self.max_prompt_tokens:
+                    break
+                total_tokens += bot_tokens
+                msg_hist.append(bot_msg_str)
+            if user_msg:
+                user_msg_str = f"\n{self.user_prefix}\n{user_msg}\n{self.end_token}"
+                user_tokens = self.calculate_token_usage(user_msg_str)
+                if total_tokens + user_tokens > self.max_prompt_tokens:
+                    break
+                total_tokens += user_tokens
+                msg_hist.append(user_msg_str)
+        return "".join(msg_hist[::-1])
+
+    def calculate_token_usage(self, text):
+        return len(self.tokenizer.encode(text))
+
+    @abstractmethod
+    def process_request(self, input_data, chat_history):
+        pass
+
+    def append_json_to_file(self, json_object, file_path):
+        try:
+            if not os.path.isfile(file_path):
+                with open(file_path, 'w') as f:
+                    json.dump([json_object], f, indent=2)
+            else:
+                with open(file_path, 'r') as f:
+                    try:
+                        data = json.load(f)
+                    except json.JSONDecodeError:
+                        data = []
+                if not isinstance(data, list):
+                    data = []
+                data.append(json_object)
+                with open(file_path, 'w') as f:
+                    json.dump(data, f, indent=2)
+        except Exception as e:
+            self._logger.error(f"append_json_to_file error: {e}", exc_info=True)