diff --git a/hugegraph-llm/.gitignore b/hugegraph-llm/.gitignore index 2c4cd3e01..34891691f 100644 --- a/hugegraph-llm/.gitignore +++ b/hugegraph-llm/.gitignore @@ -3,3 +3,4 @@ src/hugegraph_llm/resources/demo/questions.xlsx src/hugegraph_llm/resources/backup-graph-data-4020/ uv.lock +config.yaml \ No newline at end of file diff --git a/hugegraph-llm/pyproject.toml b/hugegraph-llm/pyproject.toml index 8d2a83cf8..a8fff49a3 100644 --- a/hugegraph-llm/pyproject.toml +++ b/hugegraph-llm/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "retry~=0.9.2", "tiktoken>=0.7.0", "nltk~=3.9.1", - "gradio>5.0.0", + "gradio>5.29.1", "jieba>=0.42.1", "numpy~=1.24.4", "python-docx~=1.1.2", @@ -55,7 +55,8 @@ dependencies = [ "rich~=13.9.4", "apscheduler~=3.10.4", "litellm~=1.61.13", - "hugegraph-python-client" + "hugegraph-python-client", + "OmegaConf~=2.3" ] [project.urls] homepage = "https://hugegraph.apache.org/" diff --git a/hugegraph-llm/requirements.txt b/hugegraph-llm/requirements.txt index 5074a157e..c9999118a 100644 --- a/hugegraph-llm/requirements.txt +++ b/hugegraph-llm/requirements.txt @@ -18,3 +18,4 @@ openpyxl~=3.1.5 pydantic-settings~=2.6.1 apscheduler~=3.10.4 litellm~=1.61.13 +OmegaConf~=2.3 \ No newline at end of file diff --git a/hugegraph-llm/src/hugegraph_llm/config/models/base_config.py b/hugegraph-llm/src/hugegraph_llm/config/models/base_config.py index dfe9d1056..8d98be94d 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/models/base_config.py +++ b/hugegraph-llm/src/hugegraph_llm/config/models/base_config.py @@ -18,118 +18,139 @@ import os -from dotenv import dotenv_values, set_key from pydantic_settings import BaseSettings +import yaml from hugegraph_llm.utils.log import log dir_name = os.path.dirname -env_path = os.path.join(os.getcwd(), ".env") # Load .env from the current working directory - +yaml_path = os.path.join(os.getcwd(), "config.yaml") class BaseConfig(BaseSettings): class Config: - env_file = env_path + yaml_file = yaml_path case_sensitive = False - extra = 'ignore' # ignore extra fields to avoid ValidationError + extra = 'ignore' # Ignore extra fields to avoid ValidationError env_ignore_empty = True - def generate_env(self): - if os.path.exists(env_path): - log.info("%s already exists, do you want to override with the default configuration? (y/n)", env_path) - update = input() - if update.lower() != "y": - return - self.update_env() - else: - config_dict = self.model_dump() - config_dict = {k.upper(): v for k, v in config_dict.items()} - with open(env_path, "w", encoding="utf-8") as f: - for k, v in config_dict.items(): - if v is None: - f.write(f"{k}=\n") - else: - f.write(f"{k}={v}\n") - log.info("Generate %s successfully!", env_path) - - def update_env(self): + def generate_yaml(self): + # Generate a YAML file based on the current configuration config_dict = self.model_dump() config_dict = {k.upper(): v for k, v in config_dict.items()} - env_config = dotenv_values(f"{env_path}") - - # dotenv_values make None to '', while pydantic make None to None - # dotenv_values make integer to string, while pydantic make integer to integer + current_class_name = self.__class__.__name__ + yaml_data = {} + yaml_section_data = {} for k, v in config_dict.items(): - if k in env_config: - if not (env_config[k] or v): - continue - if env_config[k] == str(v): - continue - log.info("Update %s: %s=%s", env_path, k, v) - set_key(env_path, k, v if v else "", quote_mode="never") - - def check_env(self): - """Synchronize configs between .env file and object. + yaml_section_data[k] = v + yaml_data[current_class_name] = yaml_section_data + with open(yaml_path, "w", encoding="utf-8") as f: + yaml.dump(yaml_data, f, sort_keys=False) + log.info("Generate %s successfully!", yaml_path) + + def update_configs(self): + """Update the configurations of subclasses to the config.yaml files.""" + config_dict = self.model_dump() + config_dict = {k.upper(): v for k, v in config_dict.items()} + + try: + current_class_name = self.__class__.__name__ + with open(yaml_path, "r", encoding="utf-8") as f: + content = f.read() + yaml_config = yaml.safe_load(content) if content.strip() else {} + for k, v in config_dict.items(): + if k in yaml_config[current_class_name]: + yaml_config[current_class_name][k] = v + with open(yaml_path, "w", encoding="utf-8") as f: + yaml.dump(yaml_config, f) + except yaml.YAMLError as e: + log.error("Error parsing YAML from %s: %s", yaml_path, e) + except Exception as e: + log.error("Error loading %s: %s", yaml_path, e) + - This method performs two steps: - 1. Updates object attributes from .env file values when they differ - 2. Adds missing configuration items to the .env file + def check_yaml_configs(self): + """ + Synchronize configs between config.yaml file and object. + Updates object attributes from config.yaml and adds missing items to config.yaml. """ + object_config_dict = {k.upper(): v for k, v in self.model_dump().items()} try: - # Read the.env file and prepare object config - env_config = dotenv_values(env_path) - config_dict = {k.upper(): v for k, v in self.model_dump().items()} + current_class_name = self.__class__.__name__ + # Read the yaml.config file and prepare object config + with open(yaml_path, "r", encoding="utf-8") as f: + content = f.read() + if not content.strip(): + yaml_file_config = {current_class_name: {}} + else: + yaml_file_config = yaml.safe_load(content) + if not isinstance(yaml_file_config, dict): + log.error("Invalid YAML content in %s. Expected a dictionary.", yaml_path) + yaml_file_config = {current_class_name: {}} # Reset to a safe state + elif current_class_name not in yaml_file_config: + yaml_file_config[current_class_name] = {} + + # Step 1: Update the object from yaml.config + if yaml_file_config.get(current_class_name): + self._sync_yaml_to_object(yaml_file_config, object_config_dict) + + # Step 2: Add missing onfig items from object to yaml.config + # Re-fetch object_config_after_sync as _sync_yaml_to_object might have changed it + object_config_after_sync = {k.upper(): v for k, v in self.model_dump().items()} + self._sync_object_to_yaml(yaml_file_config, object_config_after_sync) - # Step 1: Update the object from .env when values differ - self._sync_env_to_object(env_config, config_dict) - # Step 2: Add missing config items to .env - self._sync_object_to_env(env_config, config_dict) except Exception as e: - log.error("An error occurred when checking the .env variable file: %s", str(e)) + log.error("An error occurred when checking the yaml.config variable file: %s", str(e)) raise - def _sync_env_to_object(self, env_config, config_dict): - """Update object attributes from .env file values when they differ.""" - for env_key, env_value in env_config.items(): - if env_key in config_dict: - obj_value = config_dict[env_key] - obj_value_str = str(obj_value) if obj_value is not None else "" - - if env_value != obj_value_str: - log.info("Update configuration from the file: %s=%s (Original value: %s)", - env_key, env_value, obj_value_str) + def _sync_yaml_to_object(self, yaml_file_config, object_config): + """Update object attributes from yaml.config file values when they differ""" + current_class_name = self.__class__.__name__ + if current_class_name not in yaml_file_config or not isinstance(yaml_file_config[current_class_name], dict): + return + + for obj_key, obj_value in object_config.items(): + if obj_key in yaml_file_config[current_class_name]: + yaml_value = yaml_file_config[current_class_name][obj_key] + + if obj_value != yaml_value: + log.info("Update configuration from YAML file: %s=%s (Original value: %s)", + obj_key, yaml_value, obj_value) # Update the object attribute (using lowercase key) - setattr(self, env_key.lower(), env_value) - - def _sync_object_to_env(self, env_config, config_dict): - """Add missing configuration items to the .env file.""" - for obj_key, obj_value in config_dict.items(): - if obj_key not in env_config: - obj_value_str = str(obj_value) if obj_value is not None else "" - log.info("Add configuration items to the environment variable file: %s=%s", - obj_key, obj_value) - # Add to .env - set_key(env_path, obj_key, obj_value_str, quote_mode="never") + setattr(self, obj_key.lower(), yaml_value) + + def _sync_object_to_yaml(self, yaml_file_config, object_config): + """Add missing configuration items to the .yaml file.""" + current_class_name = self.__class__.__name__ + + if current_class_name not in yaml_file_config or not isinstance(yaml_file_config[current_class_name], dict): + yaml_file_config[current_class_name] = {} # Ensure the section exists + + for obj_key, obj_value in object_config.items(): + if obj_key not in yaml_file_config[current_class_name] or \ + yaml_file_config[current_class_name][obj_key] != obj_value: + log.info("Add/Update configuration item in YAML structure for %s: %s=%s", + current_class_name, obj_key, obj_value) + # Add to yaml.config + yaml_file_config[current_class_name][obj_key] = obj_value + with open(yaml_path, "w", encoding="utf-8") as f: + yaml.safe_dump(yaml_file_config, f, sort_keys=False) + return yaml_file_config def __init__(self, **data): try: - file_exists = os.path.exists(env_path) - # Step 1: Load environment variables if file exists - if file_exists: - env_config = dotenv_values(env_path) - for k, v in env_config.items(): - os.environ[k] = v + yaml_file_exists = os.path.exists(yaml_path) - # Step 2: Init the parent class with loaded environment variables + # Initialize the parent class with loaded environment variables super().__init__(**data) - # Step 3: Handle environment file operations after initialization - if not file_exists: - self.generate_env() + + # Handle environment file operations after initialization + if not yaml_file_exists: + self.generate_yaml() else: - # Synchronize configurations between the object and .env file - self.check_env() - - log.info("The %s file was loaded. Class: %s", env_path, self.__class__.__name__) + # Synchronize configurations between the object and yaml file + self.check_yaml_configs() + + log.info("The %s file was loaded. Class: %s", yaml_path, self.__class__.__name__) except Exception as e: log.error("An error occurred when initializing the configuration object: %s", str(e)) raise diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py index cb3677709..51c644116 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py @@ -137,7 +137,7 @@ def apply_embedding_config(arg1, arg2, arg3, origin_call=None) -> int: llm_settings.litellm_embedding_api_base = arg2 llm_settings.litellm_embedding_model = arg3 status_code = test_litellm_embedding(arg1, arg2, arg3) - llm_settings.update_env() + llm_settings.update_configs() gr.Info("Configured!") return status_code @@ -175,7 +175,7 @@ def apply_reranker_config( headers=headers, origin_call=origin_call, ) - llm_settings.update_env() + llm_settings.update_configs() gr.Info("Configured!") return status_code @@ -198,7 +198,7 @@ def apply_graph_config(url, name, user, pwd, gs, origin_call=None) -> int: auth = HTTPBasicAuth(user, pwd) # for http api return status response = test_api_connection(test_url, auth=auth, origin_call=origin_call) - huge_settings.update_env() + huge_settings.update_configs() return response @@ -243,7 +243,7 @@ def apply_llm_config(current_llm_config, arg1, arg2, arg3, arg4, origin_call=Non status_code = test_litellm_chat(arg1, arg2, arg3, int(arg4)) gr.Info("Configured!") - llm_settings.update_env() + llm_settings.update_configs() return status_code