From df23e9d1ccb10cbd2e84745df7108ef67fd37599 Mon Sep 17 00:00:00 2001
From: "Hsekumsti@gmail.com" <Hsekumsti@gmail.com>
Date: Fri, 28 Nov 2025 07:11:41 +0530
Subject: [PATCH 1/2] Fix Qdrant cloud indexing: Add keyword index for 'type'
 field during collection creation

---
 src/intugle/core/semantic_search/crud.py | 10 +++++++++-
 src/intugle/core/vector_store/qdrant.py  |  2 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/intugle/core/semantic_search/crud.py b/src/intugle/core/semantic_search/crud.py
index c2236a5..6bd26a0 100644
--- a/src/intugle/core/semantic_search/crud.py
+++ b/src/intugle/core/semantic_search/crud.py
@@ -50,7 +50,15 @@ def configuration(self):
             }
             embeddings_configurations = {**embeddings_configurations, **config}
 
-        configuration = QdrantVectorConfiguration(vectors_config=embeddings_configurations)
+        # Payload schema with keyword index for "type" field required for filtering
+        payload_schema = {
+            "type": models.PayloadSchemaType.KEYWORD,
+        }
+
+        configuration = QdrantVectorConfiguration(
+            vectors_config=embeddings_configurations,
+            payload_schema=payload_schema
+        )
 
         return configuration
 
diff --git a/src/intugle/core/vector_store/qdrant.py b/src/intugle/core/vector_store/qdrant.py
index f83301e..3d6eb55 100644
--- a/src/intugle/core/vector_store/qdrant.py
+++ b/src/intugle/core/vector_store/qdrant.py
@@ -21,6 +21,8 @@ class QdrantVectorConfiguration(BaseModel):
 
     sparse_vectors_config: Optional[Mapping[str, qdrant_types.SparseVectorParams]] = None
 
+    payload_schema: Optional[Mapping[str, models.PayloadSchemaType]] = None
+
 
 # Used for standardization
 

From 7644827e799f2d7a1a00580fa83794205249b47a Mon Sep 17 00:00:00 2001
From: "Hsekumsti@gmail.com" <Hsekumsti@gmail.com>
Date: Fri, 28 Nov 2025 07:12:30 +0530
Subject: [PATCH 2/2] Add auto-load datasets from directory feature to
 SemanticModel

---
 src/intugle/semantic_model.py | 59 ++++++++++++++++++++++++++++++++---
 1 file changed, 55 insertions(+), 4 deletions(-)

diff --git a/src/intugle/semantic_model.py b/src/intugle/semantic_model.py
index 31d1af0..ade9106 100644
--- a/src/intugle/semantic_model.py
+++ b/src/intugle/semantic_model.py
@@ -1,6 +1,7 @@
 import logging
+import pathlib
 
-from typing import TYPE_CHECKING, Any, Dict, List
+from typing import TYPE_CHECKING, Any, Dict, List, Union
 
 import pandas as pd
 import yaml
@@ -20,7 +21,7 @@
 
 
 class SemanticModel:
-    def __init__(self, data_input: Dict[str, Any] | List[DataSet], domain: str = ""):
+    def __init__(self, data_input: Union[Dict[str, Any], List[DataSet], str], domain: str = ""):
         self.datasets: Dict[str, DataSet] = {}
         self.links: list[PredictedLink] = []
         self.domain = domain
@@ -30,9 +31,11 @@ def __init__(self, data_input: Dict[str, Any] | List[DataSet], domain: str = "")
             self._initialize_from_dict(data_input)
         elif isinstance(data_input, list):
             self._initialize_from_list(data_input)
+        elif isinstance(data_input, str):
+            self._initialize_from_folder(data_input)
         else:
             raise TypeError(
-                "Input must be a dictionary of named dataframes or a list of DataSet objects."
+                "Input must be a dictionary of named dataframes, a list of DataSet objects, or a string path to a folder."
             )
 
     def _initialize_from_dict(self, data_dict: Dict[str, Any]):
@@ -50,6 +53,55 @@ def _initialize_from_list(self, data_list: List[DataSet]):
                 )
             self.datasets[dataset.name] = dataset
 
+    def _initialize_from_folder(self, folder_path: str):
+        """Scans a folder for supported data files (CSV, Parquet, Excel) and loads them as datasets."""
+        folder = pathlib.Path(folder_path)
+
+        if not folder.exists():
+            raise FileNotFoundError(f"Folder path does not exist: {folder_path}")
+
+        if not folder.is_dir():
+            raise NotADirectoryError(f"Path is not a directory: {folder_path}")
+
+        # Extension to DuckDB type mapping
+        extension_mapping = {
+            '.csv': 'csv',
+            '.parquet': 'parquet',
+            '.xlsx': 'xlsx',
+            '.xls': 'xlsx'
+        }
+
+        found_files = False
+        for file_path in folder.iterdir():
+            if file_path.is_file():
+                file_extension = file_path.suffix.lower()
+                if file_extension in extension_mapping:
+                    found_files = True
+
+                    # Use filename without extension as dataset name
+                    dataset_name = file_path.stem
+
+                    # Create DuckDB config for this file
+                    config = {
+                        "path": str(file_path.resolve()),
+                        "type": extension_mapping[file_extension]
+                    }
+
+                    # Create DataSet with DuckDB adapter
+                    dataset = DataSet(config, name=dataset_name)
+                    self.datasets[dataset_name] = dataset
+
+                    console.print(
+                        f"Loaded dataset '{dataset_name}' from {file_path.name}",
+                        style="green"
+                    )
+
+        if not found_files:
+            raise FileNotFoundError(
+                f"No supported files found in {folder_path}. Supported formats: "
+                f"{', '.join(extension_mapping.keys())}"
+            )
+
     def profile(self, force_recreate: bool = False):
         """Run profiling, datatype identification, and key identification for all datasets."""
         console.print(
@@ -262,4 +314,3 @@ def deploy(self, target: str, **kwargs):
                 f"Failed to deploy semantic model to '{target}': {e}", style="bold red"
             )
             raise
-