diff --git a/.crd-ref-docs.yaml b/.crd-ref-docs.yaml
new file mode 100644
index 000000000..ce81af8b6
--- /dev/null
+++ b/.crd-ref-docs.yaml
@@ -0,0 +1,68 @@
+processor:
+ # Ignore fields that are common to all Kubernetes resources
+ ignoreFields:
+ - "TypeMeta"
+ - "ObjectMeta"
+ - "ListMeta"
+
+ # Ignore types that are not relevant for documentation
+ ignoreTypes:
+ - "metav1.Time"
+ - "metav1.Duration"
+ - "metav1.ObjectMeta"
+ - "metav1.TypeMeta"
+ - "metav1.ListMeta"
+
+ # Source path for Go API types
+ sourcePath: "./src/semantic-router/pkg/apis/vllm.ai/v1alpha1"
+
+render:
+ # Enable Kubebuilder markers rendering
+ kubebuilderMarkers: true
+
+ # Group resources by Kind
+ groupByKind: true
+
+ # Include table of contents
+ includeTableOfContents: true
+
+ # Custom templates (optional)
+ # templatesDir: "./docs/crd-templates"
+
+ # Output format: markdown, asciidoc, or html
+ format: markdown
+
+# Custom configuration for your CRDs
+groups:
+ - name: vllm.ai
+ displayName: "vLLM Semantic Router"
+ description: |
+ Custom Resource Definitions for vLLM Semantic Router.
+ These CRDs enable declarative configuration of intelligent routing and model pools.
+
+ kinds:
+ - name: IntelligentRoute
+ displayName: "Intelligent Route"
+ description: |
+ IntelligentRoute defines intelligent routing rules and decisions for LLM requests.
+ It supports decision-based routing with rule combinations, model references, and plugins.
+
+ - name: IntelligentPool
+ displayName: "Intelligent Pool"
+ description: |
+ IntelligentPool defines a pool of LLM models with their configurations.
+ It manages model endpoints, reasoning families, and model-specific settings.
+
+# Markdown rendering options
+markdown:
+ # Header level for the main title
+ headerLevel: 1
+
+ # Include examples in the documentation
+ includeExamples: true
+
+ # Include status subresource documentation
+ includeStatus: true
+
+ # Code block language for YAML examples
+ codeBlockLanguage: yaml
diff --git a/.github/workflows/integration-test-docker.yml b/.github/workflows/integration-test-docker.yml
index 972c17106..3bff94a3f 100644
--- a/.github/workflows/integration-test-docker.yml
+++ b/.github/workflows/integration-test-docker.yml
@@ -1,9 +1,6 @@
name: Integration Test [Docker Compose]
on:
- pull_request:
- branches:
- - main
workflow_dispatch: # Allow manual triggering
jobs:
diff --git a/.github/workflows/integration-test-dynamic-config.yml b/.github/workflows/integration-test-dynamic-config.yml
new file mode 100644
index 000000000..b57ef76cd
--- /dev/null
+++ b/.github/workflows/integration-test-dynamic-config.yml
@@ -0,0 +1,166 @@
+name: Integration Test [Dynamic Config]
+
+on:
+ pull_request:
+ branches:
+ - main
+ push:
+ branches:
+ - main
+ workflow_dispatch: # Allow manual triggering
+
+jobs:
+ integration-test:
+ runs-on: ubuntu-latest
+ timeout-minutes: 60
+
+ steps:
+ - name: Check out the repo
+ uses: actions/checkout@v4
+
+ - name: Set up Go
+ uses: actions/setup-go@v5
+ with:
+ go-version: '1.24'
+
+ - name: Set up Rust
+ uses: actions-rust-lang/setup-rust-toolchain@v1
+ with:
+ toolchain: 1.90
+
+ - name: Install system dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y \
+ make \
+ curl \
+ build-essential \
+ pkg-config
+
+ - name: Install Kind
+ run: |
+ curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.22.0/kind-linux-amd64
+ chmod +x ./kind
+ sudo mv ./kind /usr/local/bin/kind
+
+ - name: Install kubectl
+ run: |
+ curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
+ chmod +x kubectl
+ sudo mv kubectl /usr/local/bin/kubectl
+
+ - name: Install Helm
+ run: |
+ curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
+
+ - name: Download E2E test dependencies
+ run: |
+ cd e2e && go mod download
+
+ - name: Build E2E test binary
+ run: |
+ make build-e2e
+
+ - name: Run Dynamic Config E2E tests
+ id: e2e-test
+ run: |
+ set +e # Don't exit on error, we want to capture the result
+ make e2e-test E2E_PROFILE=dynamic-config E2E_VERBOSE=true E2E_KEEP_CLUSTER=false
+ TEST_EXIT_CODE=$?
+ echo "test_exit_code=${TEST_EXIT_CODE}" >> $GITHUB_OUTPUT
+ exit ${TEST_EXIT_CODE}
+
+ - name: Upload test reports
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: test-reports-dynamic-config
+ path: |
+ test-report.json
+ test-report.md
+ semantic-router-logs.txt
+ retention-days: 30
+
+ - name: Create test summary from report
+ if: always()
+ run: |
+ if [ -f "test-report.md" ]; then
+ echo "=== Reading test report from test-report.md ==="
+ cat test-report.md >> $GITHUB_STEP_SUMMARY
+
+ # Add semantic-router logs section if available
+ if [ -f "semantic-router-logs.txt" ]; then
+ cat >> $GITHUB_STEP_SUMMARY << 'EOF'
+
+ ---
+
+ ### π Semantic Router Logs
+
+
+ Click to view semantic-router logs
+
+ ```
+ EOF
+ # Add first 500 lines of logs to summary (to avoid exceeding GitHub limits)
+ head -n 500 semantic-router-logs.txt >> $GITHUB_STEP_SUMMARY
+
+ # Check if there are more lines
+ TOTAL_LINES=$(wc -l < semantic-router-logs.txt)
+ if [ "$TOTAL_LINES" -gt 500 ]; then
+ cat >> $GITHUB_STEP_SUMMARY << EOF
+
+ ... (showing first 500 lines of $TOTAL_LINES total lines)
+
+ π¦ Full logs are available in the workflow artifacts: semantic-router-logs.txt
+ EOF
+ fi
+
+ cat >> $GITHUB_STEP_SUMMARY << 'EOF'
+ ```
+
+
+ EOF
+ fi
+
+ # Add additional context
+ cat >> $GITHUB_STEP_SUMMARY << 'EOF'
+
+ ---
+
+ ### π Additional Resources
+
+ - **Trigger:** ${{ github.event_name }}
+ - **Branch:** `${{ github.ref_name }}`
+ - **Commit:** `${{ github.sha }}`
+ - **Workflow Run:** [${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
+ - [E2E Test Framework Documentation](https://github.com/${{ github.repository }}/tree/main/e2e)
+ - [Dynamic Config Profile](https://github.com/${{ github.repository }}/tree/main/e2e/profiles/dynamic-config)
+
+ ### π¦ Artifacts
+
+ - **test-report.json** - Detailed test results in JSON format
+ - **test-report.md** - Human-readable test report
+ - **semantic-router-logs.txt** - Complete semantic-router pod logs
+ - All artifacts are retained for 30 days
+
+ ### π§ Dynamic Config Profile
+
+ This test validates the Kubernetes CRD-based dynamic configuration feature:
+ - IntelligentPool CRD for model configuration
+ - IntelligentRoute CRD for routing decisions
+ - Controller-runtime based reconciliation
+ - Automatic configuration updates on CRD changes
+ EOF
+ else
+ echo "β οΈ Test report file not found!" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "The E2E test framework did not generate a report file." >> $GITHUB_STEP_SUMMARY
+ echo "This might indicate that the test failed before report generation." >> $GITHUB_STEP_SUMMARY
+ fi
+
+ - name: Clean up
+ if: always()
+ run: |
+ make e2e-cleanup || true
+
+
diff --git a/config/config.yaml b/config/config.yaml
index 085f0cdf9..3454e0d13 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -54,8 +54,6 @@ model_config:
"qwen3":
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
preferred_endpoints: ["endpoint1"] # Optional: omit to let upstream handle endpoint selection
- pii_policy:
- allow_by_default: true
# Classifier configuration
classifier:
@@ -72,100 +70,346 @@ classifier:
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
-# Categories with new use_reasoning field structure
+# Categories define domain metadata only (no routing logic)
categories:
- name: business
- system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
- # jailbreak_enabled: true # Optional: Override global jailbreak detection per category
- # jailbreak_threshold: 0.8 # Optional: Override global jailbreak threshold per category
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false # Business performs better without reasoning
+ description: "Business and management related queries"
+ mmlu_categories: ["business"]
- name: law
- system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
- model_scores:
- - model: qwen3
- score: 0.4
- use_reasoning: false
+ description: "Legal questions and law-related topics"
+ mmlu_categories: ["law"]
- name: psychology
- system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.92 # High threshold for psychology - sensitive to nuances
- model_scores:
- - model: qwen3
- score: 0.6
- use_reasoning: false
+ description: "Psychology and mental health topics"
+ mmlu_categories: ["psychology"]
- name: biology
- system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
- model_scores:
- - model: qwen3
- score: 0.9
- use_reasoning: false
+ description: "Biology and life sciences questions"
+ mmlu_categories: ["biology"]
- name: chemistry
- system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
- model_scores:
- - model: qwen3
- score: 0.6
- use_reasoning: true # Enable reasoning for complex chemistry
+ description: "Chemistry and chemical sciences questions"
+ mmlu_categories: ["chemistry"]
- name: history
- system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false
+ description: "Historical questions and cultural topics"
+ mmlu_categories: ["history"]
- name: other
- system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.75 # Lower threshold for general chat - less sensitive
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false
+ description: "General knowledge and miscellaneous topics"
+ mmlu_categories: ["other"]
- name: health
- system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95 # High threshold for health - very sensitive to word changes
- model_scores:
- - model: qwen3
- score: 0.5
- use_reasoning: false
+ description: "Health and medical information queries"
+ mmlu_categories: ["health"]
- name: economics
- system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
- model_scores:
- - model: qwen3
- score: 1.0
- use_reasoning: false
+ description: "Economics and financial topics"
+ mmlu_categories: ["economics"]
- name: math
- system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
- model_scores:
- - model: qwen3
- score: 1.0
- use_reasoning: true # Enable reasoning for complex math
+ description: "Mathematics and quantitative reasoning"
+ mmlu_categories: ["math"]
- name: physics
- system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: true # Enable reasoning for physics
- - name: computer science
- system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
- model_scores:
- - model: qwen3
- score: 0.6
- use_reasoning: false
+ description: "Physics and physical sciences"
+ mmlu_categories: ["physics"]
+ - name: computer_science
+ description: "Computer science and programming"
+ mmlu_categories: ["computer_science"]
- name: philosophy
- system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
- model_scores:
- - model: qwen3
- score: 0.5
- use_reasoning: false
+ description: "Philosophy and ethical questions"
+ mmlu_categories: ["philosophy"]
- name: engineering
- system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
- model_scores:
- - model: qwen3
- score: 0.7
+ description: "Engineering and technical problem-solving"
+ mmlu_categories: ["engineering"]
+
+# Decisions define routing logic with domain-based conditions
+strategy: "priority"
+
+decisions:
+ - name: "business_decision"
+ description: "Business and management queries"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "law_decision"
+ description: "Legal questions and law-related topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "psychology_decision"
+ description: "Psychology and mental health topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "psychology"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.92
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "biology_decision"
+ description: "Biology and life sciences questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "biology"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "chemistry_decision"
+ description: "Chemistry and chemical sciences questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "history_decision"
+ description: "Historical questions and cultural topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "history"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "health_decision"
+ description: "Health and medical information queries"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "economics_decision"
+ description: "Economics and financial topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "economics"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "math_decision"
+ description: "Mathematics and quantitative reasoning"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "physics_decision"
+ description: "Physics and physical sciences"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "computer_science_decision"
+ description: "Computer science and programming"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "computer_science"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "philosophy_decision"
+ description: "Philosophy and ethical questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "philosophy"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "engineering_decision"
+ description: "Engineering and technical problem-solving"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "engineering"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "general_decision"
+ description: "General knowledge and miscellaneous topics"
+ priority: 50
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: "qwen3"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.75
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
# Router Configuration for Dual-Path Selection
router:
diff --git a/config/intelligent-routing/in-tree/bert_classification.yaml b/config/intelligent-routing/in-tree/bert_classification.yaml
index 2e1c1749c..e74706b78 100644
--- a/config/intelligent-routing/in-tree/bert_classification.yaml
+++ b/config/intelligent-routing/in-tree/bert_classification.yaml
@@ -54,8 +54,6 @@ model_config:
"qwen3":
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
preferred_endpoints: ["endpoint1"]
- pii_policy:
- allow_by_default: true
# Classifier configuration
classifier:
@@ -72,100 +70,346 @@ classifier:
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
-# Categories with new use_reasoning field structure
+# Categories define domain metadata only (no routing logic)
categories:
- name: business
- system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
- # jailbreak_enabled: true # Optional: Override global jailbreak detection per category
- # jailbreak_threshold: 0.8 # Optional: Override global jailbreak threshold per category
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false # Business performs better without reasoning
+ description: "Business and management related queries"
+ mmlu_categories: ["business"]
- name: law
- system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
- model_scores:
- - model: qwen3
- score: 0.4
- use_reasoning: false
+ description: "Legal questions and law-related topics"
+ mmlu_categories: ["law"]
- name: psychology
- system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.92 # High threshold for psychology - sensitive to nuances
- model_scores:
- - model: qwen3
- score: 0.6
- use_reasoning: false
+ description: "Psychology and mental health topics"
+ mmlu_categories: ["psychology"]
- name: biology
- system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
- model_scores:
- - model: qwen3
- score: 0.9
- use_reasoning: false
+ description: "Biology and life sciences questions"
+ mmlu_categories: ["biology"]
- name: chemistry
- system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
- model_scores:
- - model: qwen3
- score: 0.6
- use_reasoning: true # Enable reasoning for complex chemistry
+ description: "Chemistry and chemical sciences questions"
+ mmlu_categories: ["chemistry"]
- name: history
- system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false
+ description: "Historical questions and cultural topics"
+ mmlu_categories: ["history"]
- name: other
- system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.75 # Lower threshold for general chat - less sensitive
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false
+ description: "General knowledge and miscellaneous topics"
+ mmlu_categories: ["other"]
- name: health
- system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95 # High threshold for health - very sensitive to word changes
- model_scores:
- - model: qwen3
- score: 0.5
- use_reasoning: false
+ description: "Health and medical information queries"
+ mmlu_categories: ["health"]
- name: economics
- system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
- model_scores:
- - model: qwen3
- score: 1.0
- use_reasoning: false
+ description: "Economics and financial topics"
+ mmlu_categories: ["economics"]
- name: math
- system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
- model_scores:
- - model: qwen3
- score: 1.0
- use_reasoning: true # Enable reasoning for complex math
+ description: "Mathematics and quantitative reasoning"
+ mmlu_categories: ["math"]
- name: physics
- system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: true # Enable reasoning for physics
- - name: computer science
- system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
- model_scores:
- - model: qwen3
- score: 0.6
- use_reasoning: false
+ description: "Physics and physical sciences"
+ mmlu_categories: ["physics"]
+ - name: computer_science
+ description: "Computer science and programming"
+ mmlu_categories: ["computer_science"]
- name: philosophy
- system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
- model_scores:
- - model: qwen3
- score: 0.5
- use_reasoning: false
+ description: "Philosophy and ethical questions"
+ mmlu_categories: ["philosophy"]
- name: engineering
- system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
- model_scores:
- - model: qwen3
- score: 0.7
+ description: "Engineering and technical problem-solving"
+ mmlu_categories: ["engineering"]
+
+strategy: "priority"
+
+decisions:
+ - name: "business_decision"
+ description: "Business and management related queries"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: "qwen3"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "law_decision"
+ description: "Legal questions and law-related topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "psychology_decision"
+ description: "Psychology and mental health topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "psychology"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.92
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "biology_decision"
+ description: "Biology and life sciences questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "biology"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "chemistry_decision"
+ description: "Chemistry and chemical sciences questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "history_decision"
+ description: "Historical questions and cultural topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "history"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "health_decision"
+ description: "Health and medical information queries"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "economics_decision"
+ description: "Economics and financial topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "economics"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "math_decision"
+ description: "Mathematics and quantitative reasoning"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "physics_decision"
+ description: "Physics and physical sciences"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "computer_science_decision"
+ description: "Computer science and programming"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "computer_science"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "philosophy_decision"
+ description: "Philosophy and ethical questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "philosophy"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "engineering_decision"
+ description: "Engineering and technical problem-solving"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "engineering"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "general_decision"
+ description: "General knowledge and miscellaneous topics"
+ priority: 50
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.75
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
# Router Configuration for Dual-Path Selection
router:
diff --git a/config/intelligent-routing/in-tree/embedding.yaml b/config/intelligent-routing/in-tree/embedding.yaml
index f507f6997..8996d3a0a 100644
--- a/config/intelligent-routing/in-tree/embedding.yaml
+++ b/config/intelligent-routing/in-tree/embedding.yaml
@@ -54,8 +54,6 @@ model_config:
"qwen3":
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
preferred_endpoints: ["endpoint1"]
- pii_policy:
- allow_by_default: true
# Classifier configuration
classifier:
@@ -126,44 +124,115 @@ embedding_rules:
quality_priority: 0.5
latency_priority: 0.5
-# Categories with model scores
+# Categories define domain metadata only (no routing logic)
categories:
- # Embedding-based categories
- name: technical_support
- system_prompt: "You are a technical support specialist. Provide detailed, step-by-step guidance for technical issues. Use clear explanations and include relevant troubleshooting steps."
- model_scores:
- - model: qwen3
- score: 0.9
- use_reasoning: true
- jailbreak_enabled: true
- pii_detection_enabled: true
-
+ description: "Technical support and troubleshooting queries"
+ mmlu_categories: ["technical_support"]
- name: product_inquiry
- system_prompt: "You are a product specialist. Provide accurate information about products, features, pricing, and availability. Be helpful and informative."
- model_scores:
- - model: qwen3
- score: 0.85
- use_reasoning: false
- jailbreak_enabled: true
- pii_detection_enabled: false
-
+ description: "Product information and specifications"
+ mmlu_categories: ["product_inquiry"]
- name: account_management
- system_prompt: "You are an account management assistant. Help users with account-related tasks such as password resets, profile updates, and subscription management. Prioritize security and privacy."
- model_scores:
- - model: qwen3
- score: 0.88
- use_reasoning: false
- jailbreak_enabled: true
- pii_detection_enabled: true
-
+ description: "Account and subscription management"
+ mmlu_categories: ["account_management"]
- name: general_inquiry
- system_prompt: "You are a helpful general assistant. Answer questions clearly and concisely. If you need more information, ask clarifying questions."
- model_scores:
- - model: qwen3
- score: 0.75
+ description: "General questions and information requests"
+ mmlu_categories: ["general_inquiry"]
+
+strategy: "priority"
+
+decisions:
+ - name: "technical_support_decision"
+ description: "Technical support and troubleshooting queries"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "technical_support"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a technical support specialist. Provide detailed, step-by-step guidance for technical issues. Use clear explanations and include relevant troubleshooting steps."
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "product_inquiry_decision"
+ description: "Product information and specifications"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "product_inquiry"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a product specialist. Provide accurate information about products, features, pricing, and availability. Be helpful and informative."
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ - type: "pii"
+ configuration:
+ enabled: false
+ pii_types_allowed: []
+
+ - name: "account_management_decision"
+ description: "Account and subscription management"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "account_management"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are an account management assistant. Help users with account-related tasks such as password resets, profile updates, and subscription management. Prioritize security and privacy."
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "general_inquiry_decision"
+ description: "General questions and information requests"
+ priority: 50
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "general_inquiry"
+ modelRefs:
+ - model: "qwen3"
use_reasoning: false
- jailbreak_enabled: true
- pii_detection_enabled: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a helpful general assistant. Answer questions clearly and concisely. If you need more information, ask clarifying questions."
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ - type: "pii"
+ configuration:
+ enabled: false
+ pii_types_allowed: []
# Embedding Models Configuration
# These models provide intelligent embedding generation with automatic routing:
diff --git a/config/intelligent-routing/in-tree/generic_categories.yaml b/config/intelligent-routing/in-tree/generic_categories.yaml
index 04468a4a8..3307d5037 100644
--- a/config/intelligent-routing/in-tree/generic_categories.yaml
+++ b/config/intelligent-routing/in-tree/generic_categories.yaml
@@ -17,24 +17,47 @@ classifier:
# Define your generic categories and map them to MMLU-Pro categories.
# The classifier will translate predicted MMLU categories into these generic names.
+# Categories now only contain metadata - routing logic is defined in decisions below.
categories:
- name: tech
mmlu_categories: ["computer science", "engineering"]
- model_scores:
- - model: phi4
- score: 0.9
- - model: mistral-small3.1
- score: 0.7
- name: finance
mmlu_categories: ["economics"]
- model_scores:
- - model: gemma3:27b
- score: 0.8
- name: politics
# If omitted, identity mapping applies when this name matches MMLU
- model_scores:
+
+# Decisions define routing logic by combining rules and model selection
+decisions:
+ - name: tech
+ description: "Route technology-related queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "tech"
+ modelRefs:
+ - model: phi4
+ - name: finance
+ description: "Route finance and economics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "finance"
+ modelRefs:
+ - model: gemma3:27b
+ - name: politics
+ description: "Route politics-related queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "politics"
+ modelRefs:
- model: gemma3:27b
- score: 0.6
# A default model is recommended for fallback
default_model: mistral-small3.1
diff --git a/config/intelligent-routing/in-tree/keyword.yaml b/config/intelligent-routing/in-tree/keyword.yaml
index ec418a241..167aa5136 100644
--- a/config/intelligent-routing/in-tree/keyword.yaml
+++ b/config/intelligent-routing/in-tree/keyword.yaml
@@ -54,8 +54,6 @@ model_config:
"qwen3":
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
preferred_endpoints: ["endpoint1"]
- pii_policy:
- allow_by_default: true
# Classifier configuration
classifier:
@@ -90,128 +88,444 @@ keyword_rules:
keywords: ["user\\.name@domain\\.com", "C:\\Program Files\\\\"] # Keywords are treated as regex
case_sensitive: false
-# Categories with new use_reasoning field structure
+# Categories define domain metadata only (no routing logic)
categories:
- # Keyword-based categories
- name: urgent_request
- system_prompt: "You are a highly responsive assistant specialized in handling urgent requests. Prioritize speed and efficiency while maintaining accuracy. Provide concise, actionable responses and focus on immediate solutions."
- model_scores:
- - model: qwen3
- score: 0.8
- use_reasoning: false # Urgent requests need fast responses
+ description: "Urgent and time-sensitive requests"
+ mmlu_categories: ["urgent_request"]
- name: sensitive_data
- system_prompt: "You are a security-conscious assistant specialized in handling sensitive data. Exercise extreme caution with personal information, follow data protection best practices, and remind users about privacy considerations."
- jailbreak_enabled: true # Enable extra security for sensitive data
- jailbreak_threshold: 0.6 # Lower threshold for more strict detection
- model_scores:
- - model: qwen3
- score: 0.9
- use_reasoning: false
+ description: "Requests involving sensitive personal data"
+ mmlu_categories: ["sensitive_data"]
- name: exclude_spam
- system_prompt: "You are a content moderation assistant. This request has been flagged as potential spam. Please verify the legitimacy of the request before proceeding."
- model_scores:
- - model: qwen3
- score: 0.5
- use_reasoning: false
+ description: "Potential spam or suspicious requests"
+ mmlu_categories: ["exclude_spam"]
- name: regex_pattern_match
- system_prompt: "You are a technical assistant specialized in handling structured data and pattern-based requests. Provide precise, format-aware responses."
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false
- # Standard categories
+ description: "Structured data and pattern-based requests"
+ mmlu_categories: ["regex_pattern_match"]
- name: business
- system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
- # jailbreak_enabled: true # Optional: Override global jailbreak detection per category
- # jailbreak_threshold: 0.8 # Optional: Override global jailbreak threshold per category
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false # Business performs better without reasoning
+ description: "Business and management related queries"
+ mmlu_categories: ["business"]
- name: law
- system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
- model_scores:
- - model: qwen3
- score: 0.4
- use_reasoning: false
+ description: "Legal questions and law-related topics"
+ mmlu_categories: ["law"]
- name: psychology
- system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.92 # High threshold for psychology - sensitive to nuances
- model_scores:
- - model: qwen3
- score: 0.6
- use_reasoning: false
+ description: "Psychology and mental health topics"
+ mmlu_categories: ["psychology"]
- name: biology
- system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
- model_scores:
- - model: qwen3
- score: 0.9
- use_reasoning: false
+ description: "Biology and life sciences questions"
+ mmlu_categories: ["biology"]
- name: chemistry
- system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
- model_scores:
- - model: qwen3
- score: 0.6
- use_reasoning: true # Enable reasoning for complex chemistry
+ description: "Chemistry and chemical sciences questions"
+ mmlu_categories: ["chemistry"]
- name: history
- system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false
+ description: "Historical questions and cultural topics"
+ mmlu_categories: ["history"]
- name: other
- system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.75 # Lower threshold for general chat - less sensitive
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false
+ description: "General knowledge and miscellaneous topics"
+ mmlu_categories: ["other"]
- name: health
- system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95 # High threshold for health - very sensitive to word changes
- model_scores:
- - model: qwen3
- score: 0.5
- use_reasoning: false
+ description: "Health and medical information queries"
+ mmlu_categories: ["health"]
- name: economics
- system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
- model_scores:
- - model: qwen3
- score: 1.0
- use_reasoning: false
+ description: "Economics and financial topics"
+ mmlu_categories: ["economics"]
- name: math
- system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
- model_scores:
- - model: qwen3
- score: 1.0
- use_reasoning: true # Enable reasoning for complex math
+ description: "Mathematics and quantitative reasoning"
+ mmlu_categories: ["math"]
- name: physics
- system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: true # Enable reasoning for physics
- - name: computer science
- system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
- model_scores:
- - model: qwen3
- score: 0.6
- use_reasoning: false
+ description: "Physics and physical sciences"
+ mmlu_categories: ["physics"]
+ - name: computer_science
+ description: "Computer science and programming"
+ mmlu_categories: ["computer_science"]
- name: philosophy
- system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
- model_scores:
- - model: qwen3
- score: 0.5
- use_reasoning: false
+ description: "Philosophy and ethical questions"
+ mmlu_categories: ["philosophy"]
- name: engineering
- system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
- model_scores:
- - model: qwen3
- score: 0.7
+ description: "Engineering and technical problem-solving"
+ mmlu_categories: ["engineering"]
+
+strategy: "priority"
+
+decisions:
+ - name: "urgent_request_decision"
+ description: "Urgent and time-sensitive requests"
+ priority: 150
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "urgent_request"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a highly responsive assistant specialized in handling urgent requests. Prioritize speed and efficiency while maintaining accuracy. Provide concise, actionable responses and focus on immediate solutions."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "sensitive_data_decision"
+ description: "Requests involving sensitive personal data"
+ priority: 150
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "sensitive_data"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a security-conscious assistant specialized in handling sensitive data. Exercise extreme caution with personal information, follow data protection best practices, and remind users about privacy considerations."
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ threshold: 0.6
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "exclude_spam_decision"
+ description: "Potential spam or suspicious requests"
+ priority: 150
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "exclude_spam"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a content moderation assistant. This request has been flagged as potential spam. Please verify the legitimacy of the request before proceeding."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "regex_pattern_match_decision"
+ description: "Structured data and pattern-based requests"
+ priority: 150
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "regex_pattern_match"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a technical assistant specialized in handling structured data and pattern-based requests. Provide precise, format-aware responses."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ # Standard category decisions (similar to config.yaml)
+ - name: "business_decision"
+ description: "Business and management related queries"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ # Add remaining standard decisions (law through engineering)
+ - name: "law_decision"
+ description: "Legal questions and law-related topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "psychology_decision"
+ description: "Psychology and mental health topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "psychology"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.92
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "biology_decision"
+ description: "Biology and life sciences questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "biology"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "chemistry_decision"
+ description: "Chemistry and chemical sciences questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "history_decision"
+ description: "Historical questions and cultural topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "history"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "health_decision"
+ description: "Health and medical information queries"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "economics_decision"
+ description: "Economics and financial topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "economics"
+ modelRefs:
+ - model: "qwen3"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "math_decision"
+ description: "Mathematics and quantitative reasoning"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "physics_decision"
+ description: "Physics and physical sciences"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "computer_science_decision"
+ description: "Computer science and programming"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "computer_science"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "philosophy_decision"
+ description: "Philosophy and ethical questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "philosophy"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "engineering_decision"
+ description: "Engineering and technical problem-solving"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "engineering"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "general_decision"
+ description: "General knowledge and miscellaneous topics"
+ priority: 50
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: "qwen3"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.75
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
# Router Configuration for Dual-Path Selection
router:
diff --git a/config/intelligent-routing/in-tree/lora_routing.yaml b/config/intelligent-routing/in-tree/lora_routing.yaml
index e3510e25b..8e490e3a1 100644
--- a/config/intelligent-routing/in-tree/lora_routing.yaml
+++ b/config/intelligent-routing/in-tree/lora_routing.yaml
@@ -36,8 +36,6 @@ model_config:
"llama2-7b":
reasoning_family: "llama2"
preferred_endpoints: ["vllm-primary"]
- pii_policy:
- allow_by_default: true
# Define available LoRA adapters for this model
# These names must match the LoRA modules registered with vLLM at startup
loras:
@@ -61,41 +59,102 @@ classifier:
categories:
- name: technical
description: "Programming, software engineering, and technical questions"
- system_prompt: "You are an expert software engineer with deep knowledge of programming languages, algorithms, system design, and best practices. Provide clear, accurate technical guidance with code examples when appropriate."
- model_scores:
- - model: llama2-7b # Base model name (for endpoint selection and PII policy)
- lora_name: technical-lora # LoRA adapter name (used as final model name in request)
- score: 1.0
+ mmlu_categories: ["technical"]
+ - name: medical
+ description: "Medical and healthcare questions"
+ mmlu_categories: ["medical"]
+ - name: legal
+ description: "Legal questions and law-related topics"
+ mmlu_categories: ["legal"]
+ - name: general
+ description: "General questions that don't fit specific domains"
+ mmlu_categories: ["general"]
+
+strategy: "priority"
+
+decisions:
+ - name: "technical_decision"
+ description: "Programming, software engineering, and technical questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "technical"
+ modelRefs:
+ - model: "llama2-7b"
+ lora_name: "technical-lora"
use_reasoning: true
- reasoning_effort: medium
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are an expert software engineer with deep knowledge of programming languages, algorithms, system design, and best practices. Provide clear, accurate technical guidance with code examples when appropriate."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
- - name: medical
+ - name: "medical_decision"
description: "Medical and healthcare questions"
- system_prompt: "You are a medical expert with comprehensive knowledge of anatomy, physiology, diseases, treatments, and healthcare practices. Provide accurate medical information while emphasizing that responses are for educational purposes only and not a substitute for professional medical advice."
- model_scores:
- - model: llama2-7b
- lora_name: medical-lora # Different LoRA adapter for medical domain
- score: 1.0
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "medical"
+ modelRefs:
+ - model: "llama2-7b"
+ lora_name: "medical-lora"
use_reasoning: true
- reasoning_effort: high
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a medical expert with comprehensive knowledge of anatomy, physiology, diseases, treatments, and healthcare practices. Provide accurate medical information while emphasizing that responses are for educational purposes only and not a substitute for professional medical advice."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
- - name: legal
+ - name: "legal_decision"
description: "Legal questions and law-related topics"
- system_prompt: "You are a legal expert with knowledge of legal principles, case law, and statutory interpretation. Provide accurate legal information while clearly stating that responses are for informational purposes only and do not constitute legal advice."
- model_scores:
- - model: llama2-7b
- lora_name: legal-lora # Different LoRA adapter for legal domain
- score: 1.0
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "legal"
+ modelRefs:
+ - model: "llama2-7b"
+ lora_name: "legal-lora"
use_reasoning: true
- reasoning_effort: high
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a legal expert with knowledge of legal principles, case law, and statutory interpretation. Provide accurate legal information while clearly stating that responses are for informational purposes only and do not constitute legal advice."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
- - name: general
+ - name: "general_decision"
description: "General questions that don't fit specific domains"
- system_prompt: "You are a helpful AI assistant with broad knowledge across many topics. Provide clear, accurate, and helpful responses."
- model_scores:
- - model: llama2-7b # No lora_name specified - uses base model
- score: 0.8
+ priority: 50
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "general"
+ modelRefs:
+ - model: "llama2-7b"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a helpful AI assistant with broad knowledge across many topics. Provide clear, accurate, and helpful responses."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
# Default model for fallback
default_model: llama2-7b
diff --git a/config/intelligent-routing/out-tree/config-mcp-classifier.yaml b/config/intelligent-routing/out-tree/config-mcp-classifier.yaml
index e66c8a662..ee5f27c30 100644
--- a/config/intelligent-routing/out-tree/config-mcp-classifier.yaml
+++ b/config/intelligent-routing/out-tree/config-mcp-classifier.yaml
@@ -110,8 +110,6 @@ model_config:
reasoning_family: gpt-oss
preferred_endpoints:
- endpoint1
- pii_policy:
- allow_by_default: true
# Reasoning family configurations
reasoning_families:
diff --git a/config/observability/config.tracing.yaml b/config/observability/config.tracing.yaml
index 6ebd651bf..960a86fa9 100644
--- a/config/observability/config.tracing.yaml
+++ b/config/observability/config.tracing.yaml
@@ -39,8 +39,6 @@ model_config:
"openai/gpt-oss-20b":
reasoning_family: "gpt-oss"
preferred_endpoints: ["endpoint1"]
- pii_policy:
- allow_by_default: true
classifier:
category_model:
@@ -58,17 +56,54 @@ classifier:
categories:
- name: math
- system_prompt: "You are a mathematics expert. Provide step-by-step solutions."
- model_scores:
- - model: openai/gpt-oss-20b
- score: 1.0
- use_reasoning: true
+ description: "Mathematics and quantitative reasoning"
+ mmlu_categories: ["math"]
- name: other
- system_prompt: "You are a helpful assistant."
- model_scores:
- - model: openai/gpt-oss-20b
- score: 0.7
+ description: "General knowledge and miscellaneous topics"
+ mmlu_categories: ["other"]
+
+strategy: "priority"
+
+decisions:
+ - name: "math_decision"
+ description: "Mathematics and quantitative reasoning"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: "openai/gpt-oss-20b"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a mathematics expert. Provide step-by-step solutions."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "general_decision"
+ description: "General knowledge and miscellaneous topics"
+ priority: 50
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: "openai/gpt-oss-20b"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a helpful assistant."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
default_model: openai/gpt-oss-20b
diff --git a/config/prompt-guard/jailbreak_domain.yaml b/config/prompt-guard/jailbreak_domain.yaml
index 52b84087c..9a377fc66 100644
--- a/config/prompt-guard/jailbreak_domain.yaml
+++ b/config/prompt-guard/jailbreak_domain.yaml
@@ -20,69 +20,153 @@ classifier:
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
-# Categories with different jailbreak detection settings
+# Categories define domain metadata only (no routing logic)
categories:
- # High-security category: Strict jailbreak detection with high threshold
- name: business
description: "Business queries, strategy, and professional advice"
- jailbreak_enabled: true # Explicitly enable (inherits from global by default)
- jailbreak_threshold: 0.9 # Higher threshold for stricter detection
- system_prompt: "You are a professional business consultant. Provide practical, actionable business advice."
- model_scores:
- - model: qwen3
- score: 0.7
+ mmlu_categories: ["business"]
+ - name: customer_support
+ description: "Customer support and general inquiries"
+ mmlu_categories: ["customer_support"]
+ - name: code_generation
+ description: "Internal code generation and development tools"
+ mmlu_categories: ["code_generation"]
+ - name: testing
+ description: "Testing and quality assurance queries"
+ mmlu_categories: ["testing"]
+ - name: general
+ description: "General queries that don't fit into specific categories"
+ mmlu_categories: ["general"]
+
+# Decisions define routing logic with domain-based conditions
+strategy: "priority"
+
+decisions:
+ # High-security category: Strict jailbreak detection with high threshold
+ - name: "business_decision"
+ description: "Business queries, strategy, and professional advice"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: "qwen3"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a professional business consultant. Provide practical, actionable business advice."
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ threshold: 0.9
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
# Public-facing category: Enable with standard threshold
- - name: customer_support
+ - name: "customer_support_decision"
description: "Customer support and general inquiries"
- jailbreak_enabled: true # Explicitly enable for customer-facing content
- jailbreak_threshold: 0.8 # Slightly higher than global for public-facing
- system_prompt: "You are a friendly customer support agent. Help users with their questions."
- model_scores:
- - model: qwen3
- score: 0.8
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "customer_support"
+ modelRefs:
+ - model: "qwen3"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a friendly customer support agent. Help users with their questions."
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ threshold: 0.8
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
# Internal tool category: Relaxed threshold (trusted environment)
- - name: code_generation
+ - name: "code_generation_decision"
description: "Internal code generation and development tools"
- jailbreak_enabled: true # Keep enabled but with relaxed threshold
- jailbreak_threshold: 0.5 # Lower threshold to reduce false positives for code
- system_prompt: "You are a code generation assistant for internal developers."
- model_scores:
- - model: qwen3
- score: 0.9
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "code_generation"
+ modelRefs:
+ - model: "qwen3"
use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a code generation assistant for internal developers."
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ threshold: 0.5
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
# Testing category: Disable jailbreak detection
- - name: testing
+ - name: "testing_decision"
description: "Testing and quality assurance queries"
- jailbreak_enabled: false # Disable for testing purposes
- system_prompt: "You are a QA assistant helping with test scenarios."
- model_scores:
- - model: qwen3
- score: 0.6
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "testing"
+ modelRefs:
+ - model: "qwen3"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a QA assistant helping with test scenarios."
+ - type: "jailbreak"
+ configuration:
+ enabled: false
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
# Default category: Uses global setting (inherits prompt_guard.enabled and threshold)
- - name: general
+ - name: "general_decision"
description: "General queries that don't fit into specific categories"
- # jailbreak_enabled not specified - will inherit from global prompt_guard.enabled
- # jailbreak_threshold not specified - will inherit from global prompt_guard.threshold (0.7)
- system_prompt: "You are a helpful assistant."
- model_scores:
- - model: qwen3
- score: 0.5
+ priority: 50
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "general"
+ modelRefs:
+ - model: "qwen3"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a helpful assistant."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
# Model configuration
model_config:
"qwen3":
reasoning_family: "qwen3"
preferred_endpoints: ["endpoint1"]
- pii_policy:
- allow_by_default: true
# Reasoning family configurations
reasoning_families:
diff --git a/config/prompt-guard/pii_domain.yaml b/config/prompt-guard/pii_domain.yaml
index 39e34539f..46a1de9fa 100644
--- a/config/prompt-guard/pii_domain.yaml
+++ b/config/prompt-guard/pii_domain.yaml
@@ -17,98 +17,193 @@ classifier:
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
-# Categories with different PII detection settings
+# Categories define domain metadata only (no routing logic)
categories:
- # High-security category: Strict PII detection with high threshold
- name: healthcare
description: "Healthcare and medical queries"
- pii_enabled: true # Explicitly enable (inherits from global by default)
- pii_threshold: 0.9 # Higher threshold for stricter detection (fewer false positives)
- system_prompt: "You are a healthcare assistant. Handle all personal information with utmost care."
- model_scores:
- - model: secure-llm
- score: 0.9
+ mmlu_categories: ["healthcare"]
+ - name: finance
+ description: "Financial and banking queries"
+ mmlu_categories: ["finance"]
+ - name: customer_support
+ description: "Customer support and general inquiries"
+ mmlu_categories: ["customer_support"]
+ - name: code_generation
+ description: "Internal code generation and development tools"
+ mmlu_categories: ["code_generation"]
+ - name: documentation
+ description: "Public documentation and help articles"
+ mmlu_categories: ["documentation"]
+ - name: testing
+ description: "Testing and quality assurance queries"
+ mmlu_categories: ["testing"]
+ - name: general
+ description: "General queries that don't fit into specific categories"
+ mmlu_categories: ["general"]
+
+# Decisions define routing logic with domain-based conditions
+strategy: "priority"
+
+decisions:
+ # High-security category: Strict PII detection with high threshold
+ - name: "healthcare_decision"
+ description: "Healthcare and medical queries"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "healthcare"
+ modelRefs:
+ - model: "secure-llm"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a healthcare assistant. Handle all personal information with utmost care."
+ - type: "pii"
+ configuration:
+ enabled: true
+ threshold: 0.9
+ pii_types_allowed: ["GPE", "ORGANIZATION"]
# Financial category: Very strict PII detection
- - name: finance
+ - name: "finance_decision"
description: "Financial and banking queries"
- pii_enabled: true
- pii_threshold: 0.95 # Very high threshold for critical PII like SSN, credit cards
- system_prompt: "You are a financial advisor. Never store or log any PII information."
- model_scores:
- - model: secure-llm
- score: 0.9
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "finance"
+ modelRefs:
+ - model: "secure-llm"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a financial advisor. Never store or log any PII information."
+ - type: "pii"
+ configuration:
+ enabled: true
+ threshold: 0.95
+ pii_types_allowed: ["GPE", "ORGANIZATION"]
# Customer support: Balanced threshold
- - name: customer_support
+ - name: "customer_support_decision"
description: "Customer support and general inquiries"
- pii_enabled: true
- pii_threshold: 0.8 # Slightly higher than global for customer-facing content
- system_prompt: "You are a friendly customer support agent. Be cautious with customer information."
- model_scores:
- - model: general-llm
- score: 0.8
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "customer_support"
+ modelRefs:
+ - model: "general-llm"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a friendly customer support agent. Be cautious with customer information."
+ - type: "pii"
+ configuration:
+ enabled: true
+ threshold: 0.8
+ pii_types_allowed: []
# Internal tools: Relaxed threshold (trusted environment)
- - name: code_generation
+ - name: "code_generation_decision"
description: "Internal code generation and development tools"
- pii_enabled: true
- pii_threshold: 0.5 # Lower threshold to reduce false positives for code/technical content
- system_prompt: "You are a code generation assistant for internal developers."
- model_scores:
- - model: general-llm
- score: 0.9
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "code_generation"
+ modelRefs:
+ - model: "general-llm"
use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a code generation assistant for internal developers."
+ - type: "pii"
+ configuration:
+ enabled: true
+ threshold: 0.5
+ pii_types_allowed: []
# Public documentation: Lower threshold for broader detection
- - name: documentation
+ - name: "documentation_decision"
description: "Public documentation and help articles"
- pii_enabled: true
- pii_threshold: 0.6 # Lower threshold to catch more potential PII in public content
- system_prompt: "You are a documentation assistant. Help create clear public documentation."
- model_scores:
- - model: general-llm
- score: 0.7
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "documentation"
+ modelRefs:
+ - model: "general-llm"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a documentation assistant. Help create clear public documentation."
+ - type: "pii"
+ configuration:
+ enabled: true
+ threshold: 0.6
+ pii_types_allowed: []
# Testing category: Disable PII detection
- - name: testing
+ - name: "testing_decision"
description: "Testing and quality assurance queries"
- pii_enabled: false # Disable PII detection for testing purposes
- system_prompt: "You are a QA assistant helping with test scenarios."
- model_scores:
- - model: general-llm
- score: 0.6
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "testing"
+ modelRefs:
+ - model: "general-llm"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a QA assistant helping with test scenarios."
+ - type: "pii"
+ configuration:
+ enabled: false
+ pii_types_allowed: []
# Default category: Uses global setting
- - name: general
+ - name: "general_decision"
description: "General queries that don't fit into specific categories"
- # pii_enabled not specified - will inherit from global (enabled if pii_model is configured)
- # pii_threshold not specified - will inherit from global threshold (0.7)
- system_prompt: "You are a helpful assistant."
- model_scores:
- - model: general-llm
- score: 0.5
+ priority: 50
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "general"
+ modelRefs:
+ - model: "general-llm"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a helpful assistant."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
# Model configuration
model_config:
"secure-llm":
preferred_endpoints: ["secure-endpoint"]
- pii_policy:
- allow_by_default: false # Deny all PII by default for secure model
- pii_types_allowed:
- - "GPE" # Geopolitical entities (cities, countries) are OK
- - "ORGANIZATION" # Organization names are OK
"general-llm":
preferred_endpoints: ["general-endpoint"]
- pii_policy:
- allow_by_default: true # Allow all PII for general model
# Default model for fallback
default_model: general-llm
diff --git a/config/semantic-cache/config.hybrid.yaml b/config/semantic-cache/config.hybrid.yaml
index 7fa0a3537..c15a8bf82 100644
--- a/config/semantic-cache/config.hybrid.yaml
+++ b/config/semantic-cache/config.hybrid.yaml
@@ -45,8 +45,6 @@ model_config:
"qwen3":
reasoning_family: "qwen3"
preferred_endpoints: ["endpoint1"]
- pii_policy:
- allow_by_default: true
# Classifier configuration
classifier:
diff --git a/config/testing/config.e2e.yaml b/config/testing/config.e2e.yaml
index 34cbbd323..2fb33d98b 100644
--- a/config/testing/config.e2e.yaml
+++ b/config/testing/config.e2e.yaml
@@ -51,20 +51,13 @@ vllm_endpoints:
health_check_path: "/health"
model_config:
-
"Model-A":
use_reasoning: false
reasoning_family: "qwen3" # This model uses Qwen reasoning syntax
preferred_endpoints: ["qwen-endpoint"]
- pii_policy:
- allow_by_default: false # Strict PII blocking model
- pii_types_allowed: ["EMAIL_ADDRESS"] # Only allow emails
"Model-B":
use_reasoning: false
preferred_endpoints: ["tinyllama-endpoint"]
- pii_policy:
- allow_by_default: true # Permissive PII model for safe routing
- pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD"]
# Classifier configuration for text classification
# Using LoRA intent classifier (preferred modern approach with lora_config.json)
@@ -83,190 +76,288 @@ classifier:
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
categories:
- name: business
- model_scores:
- - model: "Model-A"
- score: 0.8
- use_reasoning: false
- reasoning_description: "Business content is typically conversational"
- reasoning_effort: low
- - model: "Model-B"
- score: 0.4
- use_reasoning: false
- - model: "Model-A"
- score: 0.2
- use_reasoning: false
+ description: "Business and management related queries"
+ mmlu_categories: ["business"]
- name: law
- model_scores:
- - model: "Model-B"
- score: 0.8
- use_reasoning: false
- reasoning_description: "Legal content is typically explanatory"
- - model: "Model-A"
- score: 0.6
- use_reasoning: false
- - model: "Model-A"
- score: 0.4
- use_reasoning: false
+ description: "Legal questions and law-related topics"
+ mmlu_categories: ["law"]
- name: psychology
- # Example: Strict cache threshold for psychology - clinical nuances matter
- # semantic_cache_enabled: true
- # semantic_cache_similarity_threshold: 0.92
- model_scores:
- - model: "Model-A"
- score: 0.6
- use_reasoning: false
- reasoning_description: "Psychology content is usually explanatory"
- - model: "Model-B"
- score: 0.4
- use_reasoning: false
- - model: "Model-A"
- score: 0.4
- use_reasoning: false
+ description: "Psychology and mental health topics"
+ mmlu_categories: ["psychology"]
- name: biology
- model_scores:
- - model: "Model-A"
- score: 0.8
- use_reasoning: false
- reasoning_description: "Biological processes benefit from structured analysis"
- - model: "Model-B"
- score: 0.6
- use_reasoning: false
- - model: "Model-A"
- score: 0.2
- use_reasoning: false
+ description: "Biology and life sciences questions"
+ mmlu_categories: ["biology"]
- name: chemistry
- model_scores:
- - model: "Model-A"
- score: 0.8
- use_reasoning: true
- reasoning_description: "Chemical reactions and formulas require systematic thinking"
- reasoning_effort: high
- - model: "Model-B"
- score: 0.6
- use_reasoning: false
- - model: "Model-A"
- score: 0.6
- use_reasoning: false
+ description: "Chemistry and chemical sciences questions"
+ mmlu_categories: ["chemistry"]
- name: history
- model_scores:
- - model: "Model-A"
- score: 0.8
- use_reasoning: false
- reasoning_description: "Historical content is narrative-based"
- - model: "Model-A"
- score: 0.6
- use_reasoning: false
- - model: "Model-B"
- score: 0.4
- use_reasoning: false
+ description: "Historical questions and cultural topics"
+ mmlu_categories: ["history"]
- name: other
- # Example: Lower threshold for general queries - better cache hit rate
- # semantic_cache_enabled: true
- # semantic_cache_similarity_threshold: 0.75
- model_scores:
- - model: "Model-B"
- score: 0.8
- use_reasoning: false
- reasoning_description: "General content doesn't require reasoning"
- - model: "Model-A"
- score: 0.6
- use_reasoning: false
- - model: "Model-A"
- score: 0.6
- use_reasoning: false
+ description: "General knowledge and miscellaneous topics"
+ mmlu_categories: ["other"]
- name: health
- # Example: Very strict cache threshold for health - word changes matter medically
- # semantic_cache_enabled: true
- # semantic_cache_similarity_threshold: 0.95
- model_scores:
- - model: "Model-B"
- score: 0.8
- use_reasoning: false
- reasoning_description: "Health information is typically informational"
- - model: "Model-A"
- score: 0.8
- use_reasoning: false
+ description: "Health and medical information queries"
+ mmlu_categories: ["health"]
+ - name: economics
+ description: "Economics and financial topics"
+ mmlu_categories: ["economics"]
+ - name: math
+ description: "Mathematics and quantitative reasoning"
+ mmlu_categories: ["math"]
+ - name: physics
+ description: "Physics and physical sciences"
+ mmlu_categories: ["physics"]
+ - name: computer_science
+ description: "Computer science and programming"
+ mmlu_categories: ["computer_science"]
+ - name: philosophy
+ description: "Philosophy and ethical questions"
+ mmlu_categories: ["philosophy"]
+ - name: engineering
+ description: "Engineering and technical problem-solving"
+ mmlu_categories: ["engineering"]
+
+strategy: "priority"
+
+decisions:
+ - name: "business_decision"
+ description: "Business and management related queries"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
- model: "Model-A"
- score: 0.6
use_reasoning: false
- - name: economics
- model_scores:
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS"]
+
+ - name: "law_decision"
+ description: "Legal questions and law-related topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
- model: "Model-B"
- score: 0.8
use_reasoning: false
- reasoning_description: "Economic discussions are usually explanatory"
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD"]
+
+ - name: "psychology_decision"
+ description: "Psychology and mental health topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "psychology"
+ modelRefs:
- model: "Model-A"
- score: 0.8
use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS"]
+
+ - name: "biology_decision"
+ description: "Biology and life sciences questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "biology"
+ modelRefs:
- model: "Model-A"
- score: 0.1
use_reasoning: false
- - name: math
- model_scores:
- - model: "Model-B"
- score: 1.0
- use_reasoning: true
- reasoning_description: "Mathematical problems require step-by-step reasoning"
- reasoning_effort: high
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS"]
+
+ - name: "chemistry_decision"
+ description: "Chemistry and chemical sciences questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
- model: "Model-A"
- score: 0.9
use_reasoning: true
- reasoning_description: "Mathematical problems require step-by-step reasoning"
- reasoning_effort: high
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS"]
+
+ - name: "history_decision"
+ description: "Historical questions and cultural topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "history"
+ modelRefs:
- model: "Model-A"
- score: 0.8
use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS"]
+
+ - name: "health_decision"
+ description: "Health and medical information queries"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
- model: "Model-B"
- score: 0.6
use_reasoning: false
- - name: physics
- model_scores:
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD"]
+
+ - name: "economics_decision"
+ description: "Economics and financial topics"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "economics"
+ modelRefs:
- model: "Model-B"
- score: 0.4
- use_reasoning: true
- reasoning_description: "Physics concepts need logical analysis"
- - model: "Model-A"
- score: 0.4
- use_reasoning: false
- - model: "Model-A"
- score: 0.4
use_reasoning: false
- - name: computer science
- model_scores:
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD"]
+
+ - name: "math_decision"
+ description: "Mathematics and quantitative reasoning"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
- model: "Model-B"
- score: 0.6
- use_reasoning: false
- reasoning_description: "Programming and algorithms need logical reasoning"
- - model: "Model-A"
- score: 0.6
- use_reasoning: false
- - model: "Model-A"
- score: 0.1
- use_reasoning: false
- - name: philosophy
- model_scores:
- - model: "Model-A"
- score: 0.6
- use_reasoning: false
- reasoning_description: "Philosophical discussions are conversational"
+ use_reasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD"]
+
+ - name: "physics_decision"
+ description: "Physics and physical sciences"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
+ - model: "Model-B"
+ use_reasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD"]
+
+ - name: "computer_science_decision"
+ description: "Computer science and programming"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "computer_science"
+ modelRefs:
- model: "Model-B"
- score: 0.2
use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD"]
+
+ - name: "philosophy_decision"
+ description: "Philosophy and ethical questions"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "philosophy"
+ modelRefs:
- model: "Model-A"
- score: 0.2
use_reasoning: false
- - name: engineering
- model_scores:
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS"]
+
+ - name: "engineering_decision"
+ description: "Engineering and technical problem-solving"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "engineering"
+ modelRefs:
- model: "Model-B"
- score: 0.6
use_reasoning: false
- reasoning_description: "Engineering problems require systematic problem-solving"
- - model: "Model-A"
- score: 0.6
- use_reasoning: false
- - model: "Model-A"
- score: 0.2
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD"]
+
+ - name: "general_decision"
+ description: "General knowledge and miscellaneous topics"
+ priority: 50
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: "Model-B"
use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD"]
default_model: "Model-A"
diff --git a/config/testing/config.testing.yaml b/config/testing/config.testing.yaml
index 8e4b631f9..e7919dc93 100644
--- a/config/testing/config.testing.yaml
+++ b/config/testing/config.testing.yaml
@@ -37,18 +37,31 @@ model_config:
"openai/gpt-oss-20b":
reasoning_family: "gpt-oss"
preferred_endpoints: ["mock"]
- pii_policy:
- allow_by_default: true
categories:
- name: other
- # Category-level cache settings (optional - falls back to global if not set)
- # semantic_cache_enabled: true
- # semantic_cache_similarity_threshold: 0.8
- model_scores:
- - model: openai/gpt-oss-20b
- score: 0.7
+ description: "General knowledge and miscellaneous topics"
+ mmlu_categories: ["other"]
+
+strategy: "priority"
+
+decisions:
+ - name: "general_decision"
+ description: "General knowledge and miscellaneous topics"
+ priority: 50
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: "openai/gpt-oss-20b"
use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
default_model: openai/gpt-oss-20b
diff --git a/deploy/helm/semantic-router/README.md b/deploy/helm/semantic-router/README.md
index 99d763e37..0c7dce4cd 100644
--- a/deploy/helm/semantic-router/README.md
+++ b/deploy/helm/semantic-router/README.md
@@ -6,6 +6,43 @@ A Helm chart for deploying Semantic Router - an intelligent routing system for L
**Homepage:**
+## CRD Management
+
+This Helm chart includes Custom Resource Definitions (CRDs) in the `crds/` directory:
+
+- `vllm.ai_intelligentpools.yaml` - IntelligentPool CRD
+- `vllm.ai_intelligentroutes.yaml` - IntelligentRoute CRD
+
+### Generating CRDs
+
+CRDs are automatically generated from Go type definitions using `controller-gen`. To regenerate CRDs:
+
+```bash
+# From the repository root
+make generate-crd
+```
+
+This command will:
+
+1. Generate CRDs from `src/semantic-router/pkg/apis/vllm.ai/v1alpha1` types
+2. Output to `deploy/kubernetes/crds/`
+3. Copy to `deploy/helm/semantic-router/crds/` for Helm chart
+
+### CRD Installation
+
+CRDs in the `crds/` directory are automatically installed by Helm:
+
+- Installed **before** other resources during `helm install`
+- **Not managed** by Helm (no Helm labels/annotations)
+- **Not updated** during `helm upgrade` (must be updated manually)
+- **Not deleted** during `helm uninstall` (protects custom resources)
+
+To manually update CRDs:
+
+```bash
+kubectl apply -f deploy/helm/semantic-router/crds/
+```
+
## Maintainers
| Name | Email | Url |
@@ -14,7 +51,7 @@ A Helm chart for deploying Semantic Router - an intelligent routing system for L
## Source Code
-*
+-
## Values
diff --git a/deploy/helm/semantic-router/crds/vllm.ai_intelligentpools.yaml b/deploy/helm/semantic-router/crds/vllm.ai_intelligentpools.yaml
new file mode 100644
index 000000000..7ffbd0a24
--- /dev/null
+++ b/deploy/helm/semantic-router/crds/vllm.ai_intelligentpools.yaml
@@ -0,0 +1,202 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.19.0
+ name: intelligentpools.vllm.ai
+spec:
+ group: vllm.ai
+ names:
+ kind: IntelligentPool
+ listKind: IntelligentPoolList
+ plural: intelligentpools
+ shortNames:
+ - ipool
+ singular: intelligentpool
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - description: Default model name
+ jsonPath: .spec.defaultModel
+ name: Default Model
+ type: string
+ - description: Number of models
+ jsonPath: .status.modelCount
+ name: Models
+ type: integer
+ - description: Ready status
+ jsonPath: .status.conditions[?(@.type=='Ready')].status
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: IntelligentPool defines a pool of models with their configurations
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: IntelligentPoolSpec defines the desired state of IntelligentPool
+ properties:
+ defaultModel:
+ description: DefaultModel specifies the default model to use when
+ no specific model is selected
+ maxLength: 100
+ minLength: 1
+ type: string
+ models:
+ description: Models defines the list of available models in this pool
+ items:
+ description: ModelConfig defines the configuration for a single
+ model
+ properties:
+ loras:
+ description: LoRAs defines the list of LoRA adapters available
+ for this model
+ items:
+ description: LoRAConfig defines a LoRA adapter configuration
+ properties:
+ description:
+ description: Description provides a human-readable description
+ of this LoRA adapter
+ maxLength: 500
+ type: string
+ name:
+ description: Name is the unique identifier for this LoRA
+ adapter
+ maxLength: 100
+ minLength: 1
+ type: string
+ required:
+ - name
+ type: object
+ maxItems: 50
+ type: array
+ name:
+ description: Name is the unique identifier for this model
+ maxLength: 100
+ minLength: 1
+ type: string
+ pricing:
+ description: Pricing defines the cost structure for this model
+ properties:
+ inputTokenPrice:
+ description: InputTokenPrice is the cost per input token
+ minimum: 0
+ type: number
+ outputTokenPrice:
+ description: OutputTokenPrice is the cost per output token
+ minimum: 0
+ type: number
+ type: object
+ reasoningFamily:
+ description: |-
+ ReasoningFamily specifies the reasoning syntax family (e.g., "qwen3", "deepseek")
+ Must be defined in the global static configuration's ReasoningFamilies
+ maxLength: 50
+ type: string
+ required:
+ - name
+ type: object
+ maxItems: 100
+ minItems: 1
+ type: array
+ required:
+ - defaultModel
+ - models
+ type: object
+ status:
+ description: IntelligentPoolStatus defines the observed state of IntelligentPool
+ properties:
+ conditions:
+ description: Conditions represent the latest available observations
+ of the IntelligentPool's state
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ modelCount:
+ description: ModelCount indicates the number of models in the pool
+ format: int32
+ type: integer
+ observedGeneration:
+ description: ObservedGeneration reflects the generation of the most
+ recently observed IntelligentPool
+ format: int64
+ type: integer
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/deploy/helm/semantic-router/crds/vllm.ai_intelligentroutes.yaml b/deploy/helm/semantic-router/crds/vllm.ai_intelligentroutes.yaml
new file mode 100644
index 000000000..09beb4579
--- /dev/null
+++ b/deploy/helm/semantic-router/crds/vllm.ai_intelligentroutes.yaml
@@ -0,0 +1,410 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.19.0
+ name: intelligentroutes.vllm.ai
+spec:
+ group: vllm.ai
+ names:
+ kind: IntelligentRoute
+ listKind: IntelligentRouteList
+ plural: intelligentroutes
+ shortNames:
+ - iroute
+ singular: intelligentroute
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - description: Number of decisions
+ jsonPath: .status.statistics.decisions
+ name: Decisions
+ type: integer
+ - description: Number of keyword signals
+ jsonPath: .status.statistics.keywords
+ name: Keywords
+ type: integer
+ - description: Number of embedding signals
+ jsonPath: .status.statistics.embeddings
+ name: Embeddings
+ type: integer
+ - description: Number of domain signals
+ jsonPath: .status.statistics.domains
+ name: Domains
+ type: integer
+ - description: Ready status
+ jsonPath: .status.conditions[?(@.type=='Ready')].status
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: IntelligentRoute defines intelligent routing rules and decisions
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: IntelligentRouteSpec defines the desired state of IntelligentRoute
+ properties:
+ decisions:
+ description: Decisions defines the routing decisions based on signal
+ combinations
+ items:
+ description: Decision defines a routing decision based on rule combinations
+ properties:
+ description:
+ description: Description provides a human-readable description
+ of this decision
+ maxLength: 500
+ type: string
+ modelRefs:
+ description: ModelRefs defines the model references for this
+ decision (currently only one model is supported)
+ items:
+ description: ModelRef defines a model reference without score
+ properties:
+ loraName:
+ description: LoRAName is the name of the LoRA adapter
+ to use (must exist in the model's LoRAs)
+ maxLength: 100
+ type: string
+ model:
+ description: Model is the name of the model (must exist
+ in IntelligentPool)
+ maxLength: 100
+ minLength: 1
+ type: string
+ reasoningDescription:
+ description: ReasoningDescription provides context for
+ when to use reasoning
+ maxLength: 500
+ type: string
+ reasoningEffort:
+ description: ReasoningEffort defines the reasoning effort
+ level (low/medium/high)
+ enum:
+ - low
+ - medium
+ - high
+ type: string
+ useReasoning:
+ default: false
+ description: UseReasoning specifies whether to enable
+ reasoning mode for this model
+ type: boolean
+ required:
+ - model
+ type: object
+ maxItems: 1
+ minItems: 1
+ type: array
+ name:
+ description: Name is the unique identifier for this decision
+ maxLength: 100
+ minLength: 1
+ type: string
+ plugins:
+ description: Plugins defines the plugins to apply for this decision
+ items:
+ description: DecisionPlugin defines a plugin configuration
+ for a decision
+ properties:
+ configuration:
+ description: Configuration is the plugin-specific configuration
+ as a raw JSON object
+ x-kubernetes-preserve-unknown-fields: true
+ type:
+ description: Type is the plugin type (semantic-cache,
+ jailbreak, pii, system_prompt, header_mutation)
+ enum:
+ - semantic-cache
+ - jailbreak
+ - pii
+ - system_prompt
+ - header_mutation
+ type: string
+ required:
+ - type
+ type: object
+ maxItems: 10
+ type: array
+ priority:
+ default: 0
+ description: |-
+ Priority defines the priority of this decision (higher values = higher priority)
+ Used when strategy is "priority"
+ format: int32
+ maximum: 1000
+ minimum: 0
+ type: integer
+ signals:
+ description: Signals defines the signal combination logic
+ properties:
+ conditions:
+ description: Conditions defines the list of signal conditions
+ items:
+ description: SignalCondition defines a single signal condition
+ properties:
+ name:
+ description: Name is the name of the signal to reference
+ maxLength: 100
+ minLength: 1
+ type: string
+ type:
+ description: Type defines the type of signal (keyword/embedding/domain)
+ enum:
+ - keyword
+ - embedding
+ - domain
+ type: string
+ required:
+ - name
+ - type
+ type: object
+ maxItems: 50
+ minItems: 1
+ type: array
+ operator:
+ description: Operator defines the logical operator for combining
+ conditions (AND/OR)
+ enum:
+ - AND
+ - OR
+ type: string
+ required:
+ - conditions
+ - operator
+ type: object
+ required:
+ - modelRefs
+ - name
+ - signals
+ type: object
+ maxItems: 100
+ minItems: 1
+ type: array
+ signals:
+ description: Signals defines signal extraction rules for routing decisions
+ properties:
+ domains:
+ description: Domains defines MMLU domain categories for classification
+ items:
+ description: DomainSignal defines a domain category for classification
+ properties:
+ description:
+ description: Description provides a human-readable description
+ of this domain
+ maxLength: 500
+ type: string
+ name:
+ description: Name is the unique identifier for this domain
+ maxLength: 100
+ minLength: 1
+ type: string
+ required:
+ - name
+ type: object
+ maxItems: 14
+ type: array
+ embeddings:
+ description: Embeddings defines embedding-based signal extraction
+ rules
+ items:
+ description: EmbeddingSignal defines an embedding-based signal
+ extraction rule
+ properties:
+ aggregationMethod:
+ default: max
+ description: AggregationMethod defines how to aggregate
+ multiple candidate similarities
+ enum:
+ - mean
+ - max
+ - any
+ type: string
+ candidates:
+ description: Candidates is the list of candidate phrases
+ for semantic matching
+ items:
+ type: string
+ maxItems: 100
+ minItems: 1
+ type: array
+ name:
+ description: Name is the unique identifier for this signal
+ maxLength: 100
+ minLength: 1
+ type: string
+ threshold:
+ description: Threshold is the similarity threshold for matching
+ (0.0-1.0)
+ maximum: 1
+ minimum: 0
+ type: number
+ required:
+ - candidates
+ - name
+ - threshold
+ type: object
+ maxItems: 100
+ type: array
+ keywords:
+ description: Keywords defines keyword-based signal extraction
+ rules
+ items:
+ description: KeywordSignal defines a keyword-based signal extraction
+ rule
+ properties:
+ caseSensitive:
+ default: false
+ description: CaseSensitive specifies whether keyword matching
+ is case-sensitive
+ type: boolean
+ keywords:
+ description: Keywords is the list of keywords to match
+ items:
+ type: string
+ maxItems: 100
+ minItems: 1
+ type: array
+ name:
+ description: Name is the unique identifier for this rule
+ (also used as category name)
+ maxLength: 100
+ minLength: 1
+ type: string
+ operator:
+ description: Operator defines the logical operator for keywords
+ (AND/OR)
+ enum:
+ - AND
+ - OR
+ type: string
+ required:
+ - keywords
+ - name
+ - operator
+ type: object
+ maxItems: 100
+ type: array
+ type: object
+ required:
+ - decisions
+ type: object
+ status:
+ description: IntelligentRouteStatus defines the observed state of IntelligentRoute
+ properties:
+ conditions:
+ description: Conditions represent the latest available observations
+ of the IntelligentRoute's state
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ observedGeneration:
+ description: ObservedGeneration reflects the generation of the most
+ recently observed IntelligentRoute
+ format: int64
+ type: integer
+ statistics:
+ description: Statistics provides statistics about configured decisions
+ and signals
+ properties:
+ decisions:
+ description: Decisions indicates the number of decisions
+ format: int32
+ type: integer
+ domains:
+ description: Domains indicates the number of domain signals
+ format: int32
+ type: integer
+ embeddings:
+ description: Embeddings indicates the number of embedding signals
+ format: int32
+ type: integer
+ keywords:
+ description: Keywords indicates the number of keyword signals
+ format: int32
+ type: integer
+ required:
+ - decisions
+ - domains
+ - embeddings
+ - keywords
+ type: object
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/deploy/helm/semantic-router/templates/clusterrole.yaml b/deploy/helm/semantic-router/templates/clusterrole.yaml
new file mode 100644
index 000000000..3bc293fa0
--- /dev/null
+++ b/deploy/helm/semantic-router/templates/clusterrole.yaml
@@ -0,0 +1,44 @@
+{{- if .Values.rbac.create -}}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: {{ include "semantic-router.fullname" . }}
+ labels:
+ {{- include "semantic-router.labels" . | nindent 4 }}
+rules:
+ # Permissions for IntelligentPool CRD
+ - apiGroups:
+ - vllm.ai
+ resources:
+ - intelligentpools
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - vllm.ai
+ resources:
+ - intelligentpools/status
+ verbs:
+ - get
+ - update
+ - patch
+ # Permissions for IntelligentRoute CRD
+ - apiGroups:
+ - vllm.ai
+ resources:
+ - intelligentroutes
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - vllm.ai
+ resources:
+ - intelligentroutes/status
+ verbs:
+ - get
+ - update
+ - patch
+{{- end }}
+
diff --git a/deploy/helm/semantic-router/templates/clusterrolebinding.yaml b/deploy/helm/semantic-router/templates/clusterrolebinding.yaml
new file mode 100644
index 000000000..3b526b3a7
--- /dev/null
+++ b/deploy/helm/semantic-router/templates/clusterrolebinding.yaml
@@ -0,0 +1,17 @@
+{{- if .Values.rbac.create -}}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: {{ include "semantic-router.fullname" . }}
+ labels:
+ {{- include "semantic-router.labels" . | nindent 4 }}
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: {{ include "semantic-router.fullname" . }}
+subjects:
+ - kind: ServiceAccount
+ name: {{ include "semantic-router.serviceAccountName" . }}
+ namespace: {{ include "semantic-router.namespace" . }}
+{{- end }}
+
diff --git a/deploy/helm/semantic-router/templates/deployment.yaml b/deploy/helm/semantic-router/templates/deployment.yaml
index 477cdb216..853876534 100644
--- a/deploy/helm/semantic-router/templates/deployment.yaml
+++ b/deploy/helm/semantic-router/templates/deployment.yaml
@@ -35,8 +35,6 @@ spec:
image: {{ .Values.initContainer.image }}
securityContext:
{{- toYaml .Values.securityContext | nindent 10 }}
- # Allow up to 10 minutes for model downloads in CI environments
- # This prevents the init container from being killed prematurely
command: ["/bin/bash", "-c"]
args:
- |
@@ -53,7 +51,7 @@ spec:
# Remove .cache directory to ensure fresh download
rm -rf "{{ .name }}/.cache" 2>/dev/null || true
# Download with ignore_patterns to exclude ONNX-only files if pytorch model exists
- python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='{{ .repo }}', local_dir='{{ .name }}', local_dir_use_symlinks=False, ignore_patterns=['*.onnx', '*.msgpack', '*.h5', '*.tflite'] if '{{ .name }}' == 'all-MiniLM-L12-v2' else None)"
+ python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='{{ .repo }}', local_dir='{{ .name }}', ignore_patterns=['*.onnx', '*.msgpack', '*.h5', '*.tflite'] if '{{ .name }}' == 'all-MiniLM-L12-v2' else None)"
# Check for required model files
echo "Checking {{ .name }} for required files:"
diff --git a/deploy/helm/semantic-router/values.yaml b/deploy/helm/semantic-router/values.yaml
index 926676823..fa5f052c8 100644
--- a/deploy/helm/semantic-router/values.yaml
+++ b/deploy/helm/semantic-router/values.yaml
@@ -37,6 +37,11 @@ serviceAccount:
# -- The name of the service account to use
name: ""
+# RBAC configuration
+rbac:
+ # -- Create RBAC resources (ClusterRole and ClusterRoleBinding)
+ create: true
+
# Pod annotations
podAnnotations: {}
@@ -149,6 +154,9 @@ initContainer:
env: []
# -- Models to download
models:
+ # Embedding models for semantic cache and tools
+ - name: Qwen3-Embedding-0.6B
+ repo: Qwen/Qwen3-Embedding-0.6B
- name: all-MiniLM-L12-v2
repo: sentence-transformers/all-MiniLM-L12-v2
- name: category_classifier_modernbert-base_model
@@ -160,6 +168,7 @@ initContainer:
- name: pii_classifier_modernbert-base_presidio_token_model
repo: LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model
+
# Autoscaling configuration
autoscaling:
# -- Enable horizontal pod autoscaling
@@ -257,21 +266,6 @@ config:
use_cpu: true
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
- # vLLM endpoints configuration
- vllm_endpoints:
- - name: "endpoint1"
- address: "172.28.0.20"
- port: 8002
- weight: 1
-
- # Model configuration
- model_config:
- "qwen3":
- reasoning_family: "qwen3"
- preferred_endpoints: ["endpoint1"]
- pii_policy:
- allow_by_default: true
-
# Classifier configuration
classifier:
category_model:
@@ -287,102 +281,6 @@ config:
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
- # Categories configuration
- categories:
- - name: business
- system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false
- - name: law
- system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
- model_scores:
- - model: qwen3
- score: 0.4
- use_reasoning: false
- - name: psychology
- system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.92
- model_scores:
- - model: qwen3
- score: 0.6
- use_reasoning: false
- - name: biology
- system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
- model_scores:
- - model: qwen3
- score: 0.9
- use_reasoning: false
- - name: chemistry
- system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
- model_scores:
- - model: qwen3
- score: 0.6
- use_reasoning: true
- - name: history
- system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false
- - name: other
- system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.75
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false
- - name: health
- system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95
- model_scores:
- - model: qwen3
- score: 0.5
- use_reasoning: false
- - name: economics
- system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
- model_scores:
- - model: qwen3
- score: 1.0
- use_reasoning: false
- - name: math
- system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
- model_scores:
- - model: qwen3
- score: 1.0
- use_reasoning: true
- - name: physics
- system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: true
- - name: computer science
- system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
- model_scores:
- - model: qwen3
- score: 0.6
- use_reasoning: false
- - name: philosophy
- system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
- model_scores:
- - model: qwen3
- score: 0.5
- use_reasoning: false
- - name: engineering
- system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
- model_scores:
- - model: qwen3
- score: 0.7
- use_reasoning: false
-
- # Default model
- default_model: "qwen3"
-
# Reasoning families
reasoning_families:
deepseek:
diff --git a/deploy/kserve/configmap-router-config.yaml b/deploy/kserve/configmap-router-config.yaml
index 83328f193..6e73814f9 100644
--- a/deploy/kserve/configmap-router-config.yaml
+++ b/deploy/kserve/configmap-router-config.yaml
@@ -58,7 +58,6 @@ data:
"{{MODEL_NAME}}":
reasoning_family: "qwen3" # Adjust based on model family: qwen3, deepseek, gpt, gpt-oss
preferred_endpoints: ["{{INFERENCESERVICE_NAME}}-endpoint"]
- pii_policy:
allow_by_default: true
pii_types_allowed: ["EMAIL_ADDRESS"]
@@ -77,98 +76,117 @@ data:
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
- # Categories with model scoring
+ # Categories define domain metadata only (no routing logic)
categories:
- name: business
- system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices."
- model_scores:
- - model: {{MODEL_NAME}}
- score: 0.7
- use_reasoning: false
+ description: "Business and management related queries"
+ mmlu_categories: ["business"]
- name: law
- system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions."
- model_scores:
- - model: granite32-8b
- score: 0.4
- use_reasoning: false
+ description: "Legal questions and law-related topics"
+ mmlu_categories: ["law"]
- name: psychology
- system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.92
- model_scores:
- - model: granite32-8b
- score: 0.6
- use_reasoning: false
+ description: "Psychology and mental health topics"
+ mmlu_categories: ["psychology"]
- name: biology
- system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology."
- model_scores:
- - model: granite32-8b
- score: 0.9
- use_reasoning: false
+ description: "Biology and life sciences questions"
+ mmlu_categories: ["biology"]
- name: chemistry
- system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
- model_scores:
- - model: granite32-8b
- score: 0.6
- use_reasoning: true
+ description: "Chemistry and chemical sciences questions"
+ mmlu_categories: ["chemistry"]
- name: history
- system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
- model_scores:
- - model: {{MODEL_NAME}}
- score: 0.7
- use_reasoning: false
+ description: "Historical questions and cultural topics"
+ mmlu_categories: ["history"]
- name: other
- system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.75
- model_scores:
- - model: {{MODEL_NAME}}
- score: 0.7
- use_reasoning: false
+ description: "General knowledge and miscellaneous topics"
+ mmlu_categories: ["other"]
- name: health
- system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95
- model_scores:
- - model: granite32-8b
- score: 0.5
- use_reasoning: false
+ description: "Health and medical information queries"
+ mmlu_categories: ["health"]
- name: economics
- system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory."
- model_scores:
- - model: granite32-8b
- score: 1.0
- use_reasoning: false
+ description: "Economics and financial topics"
+ mmlu_categories: ["economics"]
- name: math
- system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
- model_scores:
- - model: granite32-8b
- score: 1.0
- use_reasoning: true
+ description: "Mathematics and quantitative reasoning"
+ mmlu_categories: ["math"]
- name: physics
- system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
- model_scores:
- - model: granite32-8b
- score: 0.7
- use_reasoning: true
- - name: computer science
- system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
- model_scores:
- - model: granite32-8b
- score: 0.6
- use_reasoning: false
+ description: "Physics and physical sciences"
+ mmlu_categories: ["physics"]
+ - name: computer_science
+ description: "Computer science and programming"
+ mmlu_categories: ["computer_science"]
- name: philosophy
- system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought."
- model_scores:
- - model: granite32-8b
- score: 0.5
- use_reasoning: false
+ description: "Philosophy and ethical questions"
+ mmlu_categories: ["philosophy"]
- name: engineering
- system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering."
- model_scores:
- - model: {{MODEL_NAME}}
- score: 0.7
+ description: "Engineering and technical problem-solving"
+ mmlu_categories: ["engineering"]
+
+ strategy: "priority"
+
+ decisions:
+ - name: "business_decision"
+ description: "Business and management related queries"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: "{{MODEL_NAME}}"
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a senior business consultant and strategic advisor."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "math_decision"
+ description: "Mathematics and quantitative reasoning"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: "granite32-8b"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a mathematics expert."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "general_decision"
+ description: "General knowledge and miscellaneous topics"
+ priority: 50
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: "{{MODEL_NAME}}"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a helpful assistant."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.75
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
default_model: {{MODEL_NAME}}
diff --git a/deploy/kserve/example-multi-model-config.yaml b/deploy/kserve/example-multi-model-config.yaml
index 4faea00fd..c451ede6a 100644
--- a/deploy/kserve/example-multi-model-config.yaml
+++ b/deploy/kserve/example-multi-model-config.yaml
@@ -70,7 +70,6 @@ data:
"granite32-8b":
reasoning_family: "qwen3"
preferred_endpoints: ["granite32-8b-endpoint"]
- pii_policy:
allow_by_default: true
pii_types_allowed: ["EMAIL_ADDRESS"]
@@ -103,143 +102,117 @@ data:
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
- # Category-based routing strategy
- # Higher scores route to that model for the category
+ # Categories define domain metadata only (no routing logic)
categories:
- # Simple categories β small model
- name: business
- system_prompt: "You are a senior business consultant and strategic advisor."
- model_scores:
- - model: granite32-8b
- score: 0.8
- use_reasoning: false
- # - model: granite32-78b
- # score: 0.6
- # use_reasoning: false
-
- - name: other
- system_prompt: "You are a helpful assistant."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.75
- model_scores:
- - model: granite32-8b
- score: 1.0
- use_reasoning: false
-
- # Complex reasoning categories β large model
- - name: math
- system_prompt: "You are a mathematics expert."
- model_scores:
- - model: granite32-8b
- score: 0.7
- use_reasoning: true
- # - model: granite32-78b
- # score: 1.0
- # use_reasoning: true
-
- - name: physics
- system_prompt: "You are a physics expert."
- model_scores:
- - model: granite32-8b
- score: 0.7
- use_reasoning: true
- # - model: granite32-78b
- # score: 0.9
- # use_reasoning: true
-
- # Coding β specialized code model
- - name: computer science
- system_prompt: "You are a computer science expert."
- model_scores:
- # - model: granite-code
- # score: 1.0
- # use_reasoning: false
- - model: granite32-8b
- score: 0.8
- use_reasoning: false
- # - model: granite32-78b
- # score: 0.6
- # use_reasoning: false
-
- # Other categories
+ description: "Business and management related queries"
+ mmlu_categories: ["business"]
- name: law
- system_prompt: "You are a knowledgeable legal expert."
- model_scores:
- - model: granite32-8b
- score: 0.5
- use_reasoning: false
- # - model: granite32-78b
- # score: 0.9
- # use_reasoning: false
-
+ description: "Legal questions and law-related topics"
+ mmlu_categories: ["law"]
- name: psychology
- system_prompt: "You are a psychology expert."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.92
- model_scores:
- - model: granite32-8b
- score: 0.7
- use_reasoning: false
-
+ description: "Psychology and mental health topics"
+ mmlu_categories: ["psychology"]
- name: biology
- system_prompt: "You are a biology expert."
- model_scores:
- - model: granite32-8b
- score: 0.9
- use_reasoning: false
-
+ description: "Biology and life sciences questions"
+ mmlu_categories: ["biology"]
- name: chemistry
- system_prompt: "You are a chemistry expert."
- model_scores:
- - model: granite32-8b
- score: 0.7
- use_reasoning: true
- # - model: granite32-78b
- # score: 0.9
- # use_reasoning: true
-
+ description: "Chemistry and chemical sciences questions"
+ mmlu_categories: ["chemistry"]
- name: history
- system_prompt: "You are a historian."
- model_scores:
- - model: granite32-8b
- score: 0.8
- use_reasoning: false
-
+ description: "Historical questions and cultural topics"
+ mmlu_categories: ["history"]
+ - name: other
+ description: "General knowledge and miscellaneous topics"
+ mmlu_categories: ["other"]
- name: health
- system_prompt: "You are a health and medical information expert."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95
- model_scores:
- - model: granite32-8b
- score: 0.6
- use_reasoning: false
- # - model: granite32-78b
- # score: 0.8
- # use_reasoning: false
-
+ description: "Health and medical information queries"
+ mmlu_categories: ["health"]
- name: economics
- system_prompt: "You are an economics expert."
- model_scores:
- - model: granite32-8b
- score: 0.9
- use_reasoning: false
-
+ description: "Economics and financial topics"
+ mmlu_categories: ["economics"]
+ - name: math
+ description: "Mathematics and quantitative reasoning"
+ mmlu_categories: ["math"]
+ - name: physics
+ description: "Physics and physical sciences"
+ mmlu_categories: ["physics"]
+ - name: computer_science
+ description: "Computer science and programming"
+ mmlu_categories: ["computer_science"]
- name: philosophy
- system_prompt: "You are a philosophy expert."
- model_scores:
- - model: granite32-8b
- score: 0.6
+ description: "Philosophy and ethical questions"
+ mmlu_categories: ["philosophy"]
+ - name: engineering
+ description: "Engineering and technical problem-solving"
+ mmlu_categories: ["engineering"]
+
+ strategy: "priority"
+
+ decisions:
+ - name: "business_decision"
+ description: "Business and management related queries"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: "granite32-8b"
use_reasoning: false
- # - model: granite32-78b
- # score: 0.8
- # use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a senior business consultant and strategic advisor."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
- - name: engineering
- system_prompt: "You are an engineering expert."
- model_scores:
- - model: granite32-8b
- score: 0.8
+ - name: "math_decision"
+ description: "Mathematics and quantitative reasoning"
+ priority: 100
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: "granite32-8b"
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a mathematics expert."
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+
+ - name: "general_decision"
+ description: "General knowledge and miscellaneous topics"
+ priority: 50
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: "granite32-8b"
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a helpful assistant."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.75
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
default_model: granite32-8b
diff --git a/deploy/kubernetes/ai-gateway/semantic-router-values/values.yaml b/deploy/kubernetes/ai-gateway/semantic-router-values/values.yaml
index 4d26a4c23..1b3f4fe36 100644
--- a/deploy/kubernetes/ai-gateway/semantic-router-values/values.yaml
+++ b/deploy/kubernetes/ai-gateway/semantic-router-values/values.yaml
@@ -3,9 +3,6 @@ config:
model_config:
"base-model":
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
- # preferred_endpoints omitted - let upstream handle endpoint selection
- pii_policy:
- allow_by_default: false
# Define available LoRA adapters for this base model
# These names must match the LoRA modules registered with vLLM at startup
loras:
@@ -22,121 +19,400 @@ config:
- name: "general-expert"
description: "General-purpose adapter for diverse topics"
- # Categories with LoRA routing
- # Each category uses the base-model model with a specific LoRA adapter
+ # Categories for domain classification (used by domain rules)
+ # Category names are MMLU category names used by the classifier
categories:
- name: business
- system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
- # jailbreak_enabled: true # Optional: Override global jailbreak detection per category
- # jailbreak_threshold: 0.8 # Optional: Override global jailbreak threshold per category
- model_scores:
- - model: base-model # Base model name (for endpoint selection and PII policy)
- lora_name: social-expert # LoRA adapter name (used as final model name in request)
- score: 0.7
- use_reasoning: false # Business performs better without reasoning
+ description: "Business, corporate strategy, management, finance, marketing"
- name: law
- system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
- model_scores:
+ description: "Legal principles, case law, statutory interpretation, legal procedures"
+ - name: psychology
+ description: "Cognitive processes, behavioral patterns, mental health, developmental psychology"
+ - name: biology
+ description: "Molecular biology, genetics, cell biology, ecology, evolution, anatomy"
+ - name: chemistry
+ description: "Chemical reactions, molecular structures, laboratory techniques"
+ - name: history
+ description: "Historical events, time periods, cultures, civilizations"
+ - name: health
+ description: "Anatomy, physiology, diseases, treatments, preventive care, nutrition"
+ - name: economics
+ description: "Microeconomics, macroeconomics, financial markets, monetary policy, trade"
+ - name: math
+ description: "Mathematics, algebra, calculus, geometry, statistics"
+ - name: physics
+ description: "Physical laws, mechanics, thermodynamics, electromagnetism, quantum physics"
+ - name: computer science
+ description: "Algorithms, data structures, programming, software engineering"
+ - name: philosophy
+ description: "Philosophical traditions, ethics, logic, metaphysics, epistemology"
+ - name: engineering
+ description: "Engineering disciplines, design, problem-solving, systems"
+ - name: other
+ description: "General knowledge and miscellaneous topics"
+
+ # Decisions with rule-based routing and plugins
+ decisions:
+ - name: business_decision
+ description: "Business and management related queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: base-model
+ lora_name: social-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
+ mode: "replace"
+
+ - name: law_decision
+ description: "Legal questions and law-related topics"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
- model: base-model
lora_name: law-expert
- score: 0.4
use_reasoning: false
- - name: psychology
- system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.92 # High threshold for psychology - sensitive to nuances
- model_scores:
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
+ mode: "replace"
+
+ - name: psychology_decision
+ description: "Psychology and mental health topics"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "psychology"
+ modelRefs:
- model: base-model
lora_name: humanities-expert
- score: 0.6
use_reasoning: false
- - name: biology
- system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
- model_scores:
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.92
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
+ mode: "replace"
+
+ - name: biology_decision
+ description: "Biology and life sciences questions"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "biology"
+ modelRefs:
- model: base-model
lora_name: science-expert
- score: 0.9
use_reasoning: false
- - name: chemistry
- system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
- model_scores:
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
+ mode: "replace"
+
+ - name: chemistry_decision
+ description: "Chemistry and chemical sciences questions"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
- model: base-model
lora_name: science-expert
- score: 0.6
- use_reasoning: true # Enable reasoning for complex chemistry
- - name: history
- system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
- model_scores:
+ use_reasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
+ mode: "replace"
+
+ - name: history_decision
+ description: "Historical questions and cultural topics"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "history"
+ modelRefs:
- model: base-model
lora_name: humanities-expert
- score: 0.7
- use_reasoning: false
- - name: other
- system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.75 # Lower threshold for general chat - less sensitive
- model_scores:
- - model: base-model
- lora_name: general-expert
- score: 0.7
use_reasoning: false
- - name: health
- system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95 # High threshold for health - very sensitive to word changes
- model_scores:
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
+ mode: "replace"
+
+ - name: health_decision
+ description: "Health and medical information queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
- model: base-model
lora_name: science-expert
- score: 0.5
use_reasoning: false
- - name: economics
- system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
- model_scores:
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
+ mode: "replace"
+
+ - name: economics_decision
+ description: "Economics and financial topics"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "economics"
+ modelRefs:
- model: base-model
lora_name: social-expert
- score: 1.0
use_reasoning: false
- - name: math
- system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
- model_scores:
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
+ mode: "replace"
+
+ - name: math_decision
+ description: "Mathematics and quantitative reasoning"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
- model: base-model
lora_name: math-expert
- score: 1.0
- use_reasoning: true # Enable reasoning for complex math
- - name: physics
- system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
- model_scores:
+ use_reasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
+ mode: "replace"
+
+ - name: physics_decision
+ description: "Physics and physical sciences"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
- model: base-model
lora_name: science-expert
- score: 0.7
- use_reasoning: true # Enable reasoning for physics
- - name: computer science
- system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
- model_scores:
+ use_reasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
+ mode: "replace"
+
+ - name: computer_science_decision
+ description: "Computer science and programming"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "computer science"
+ modelRefs:
- model: base-model
lora_name: science-expert
- score: 0.6
use_reasoning: false
- - name: philosophy
- system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
- model_scores:
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
+ mode: "replace"
+
+ - name: philosophy_decision
+ description: "Philosophy and ethical questions"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "philosophy"
+ modelRefs:
- model: base-model
lora_name: humanities-expert
- score: 0.5
use_reasoning: false
- - name: engineering
- system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
- model_scores:
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
+ mode: "replace"
+
+ - name: engineering_decision
+ description: "Engineering and technical problem-solving"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "engineering"
+ modelRefs:
- model: base-model
lora_name: science-expert
- score: 0.7
use_reasoning: false
- - name: thinking
- system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
- model_scores:
- - model: general-expert
- score: 0.7
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
+ mode: "replace"
+
+ - name: thinking_decision
+ description: "Complex reasoning and multi-step thinking"
+ priority: 20
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "keyword"
+ rule_name: "thinking"
+ modelRefs:
+ - model: base-model
+ lora_name: general-expert
use_reasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
+ mode: "replace"
+
+ - name: general_decision
+ description: "General knowledge and miscellaneous topics"
+ priority: 1
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: base-model
+ lora_name: general-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.75
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
+ mode: "replace"
+
+ # Strategy for selecting between multiple matching decisions
+ # Options: "priority" (use decision with highest priority) or "confidence" (use decision with highest confidence)
+ strategy: "priority"
default_model: general-expert
@@ -286,13 +562,12 @@ config:
[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
- # Embedding Models Configuration
+ # Embedding Models Configuration (Optional)
# These models provide intelligent embedding generation with automatic routing:
- # - Qwen3-Embedding-0.6B: Up to 32K context, high quality,
+ # - Qwen3-Embedding-0.6B: Up to 32K context, high quality, 1024-dim embeddings
# - EmbeddingGemma-300M: Up to 8K context, fast inference, Matryoshka support (768/512/256/128)
embedding_models:
qwen3_model_path: "models/Qwen3-Embedding-0.6B"
- gemma_model_path: "models/embeddinggemma-300m"
use_cpu: true # Set to false for GPU acceleration (requires CUDA)
# Observability Configuration
diff --git a/deploy/kubernetes/ai-gateway/semantic-router/config.yaml b/deploy/kubernetes/ai-gateway/semantic-router/config.yaml
index 08308077e..9a0173e91 100644
--- a/deploy/kubernetes/ai-gateway/semantic-router/config.yaml
+++ b/deploy/kubernetes/ai-gateway/semantic-router/config.yaml
@@ -1,9 +1,6 @@
model_config:
"base-model":
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
- # preferred_endpoints omitted - let upstream handle endpoint selection
- pii_policy:
- allow_by_default: false
# Define available LoRA adapters for this base model
# These names must match the LoRA modules registered with vLLM at startup
loras:
@@ -20,123 +17,413 @@ model_config:
- name: "general-expert"
description: "General-purpose adapter for diverse topics"
-# Categories with LoRA routing
-# Each category uses the base-model model with a specific LoRA adapter
+default_model: general-expert
+
+# Categories - now only contain metadata for domain classification
categories:
- name: business
- system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
- # jailbreak_enabled: true # Optional: Override global jailbreak detection per category
- # jailbreak_threshold: 0.8 # Optional: Override global jailbreak threshold per category
- model_scores:
- - model: base-model # Base model name (for endpoint selection and PII policy)
- lora_name: social-expert # LoRA adapter name (used as final model name in request)
- score: 0.7
- use_reasoning: false # Business performs better without reasoning
- name: law
- system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
- model_scores:
+ - name: psychology
+ - name: biology
+ - name: chemistry
+ - name: history
+ - name: other
+ - name: health
+ - name: economics
+ - name: math
+ - name: physics
+ - name: computer science
+ - name: philosophy
+ - name: engineering
+ - name: urgent request
+ - name: technical_support
+ - name: product_info
+
+# Decisions - define routing logic with rules, model selection, and plugins
+decisions:
+ - name: business
+ description: "Route business and management queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: base-model
+ lora_name: social-expert
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
+ mode: "replace"
+ - name: law
+ description: "Route legal queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
- model: base-model
lora_name: law-expert
- score: 0.4
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
+ mode: "replace"
- name: psychology
- system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.92 # High threshold for psychology - sensitive to nuances
- model_scores:
+ description: "Route psychology queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "psychology"
+ modelRefs:
- model: base-model
lora_name: humanities-expert
- score: 0.6
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.92
- name: biology
- system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
- model_scores:
+ description: "Route biology queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "biology"
+ modelRefs:
- model: base-model
lora_name: science-expert
- score: 0.9
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
+ mode: "replace"
- name: chemistry
- system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
- model_scores:
+ description: "Route chemistry queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
- model: base-model
lora_name: science-expert
- score: 0.6
- use_reasoning: true # Enable reasoning for complex chemistry
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
+ mode: "replace"
- name: history
- system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
- model_scores:
+ description: "Route history queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "history"
+ modelRefs:
- model: base-model
lora_name: humanities-expert
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
+ mode: "replace"
- name: other
- system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.75 # Lower threshold for general chat - less sensitive
- model_scores:
+ description: "Route general queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
- model: base-model
lora_name: general-expert
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.75
- name: health
- system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95 # High threshold for health - very sensitive to word changes
- model_scores:
+ description: "Route health and medical queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
- model: base-model
lora_name: science-expert
- score: 0.5
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
- name: economics
- system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
- model_scores:
+ description: "Route economics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "economics"
+ modelRefs:
- model: base-model
lora_name: social-expert
- score: 1.0
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
+ mode: "replace"
- name: math
- system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
- model_scores:
+ description: "Route mathematics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
- model: base-model
lora_name: math-expert
- score: 1.0
- use_reasoning: true # Enable reasoning for complex math
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
+ mode: "replace"
- name: physics
- system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
- model_scores:
+ description: "Route physics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
- model: base-model
lora_name: science-expert
- score: 0.7
- use_reasoning: true # Enable reasoning for physics
- - name: computer science
- system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
- model_scores:
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
+ mode: "replace"
+ - name: computer_science
+ description: "Route computer science queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "computer science"
+ modelRefs:
- model: base-model
lora_name: science-expert
- score: 0.6
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
+ mode: "replace"
- name: philosophy
- system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
- model_scores:
+ description: "Route philosophy queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "philosophy"
+ modelRefs:
- model: base-model
lora_name: humanities-expert
- score: 0.5
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
+ mode: "replace"
- name: engineering
- system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
- model_scores:
+ description: "Route engineering queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "engineering"
+ modelRefs:
- model: base-model
lora_name: science-expert
- score: 0.7
use_reasoning: false
- - name: thinking
- system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
- model_scores:
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
+ mode: "replace"
+ - name: urgent_request
+ description: "Route urgent and emergency requests"
+ priority: 20
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "urgent request"
+ modelRefs:
+ - model: general-expert
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
+ mode: "replace"
+ - name: technical_support
+ description: "Route technical support queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "technical_support"
+ modelRefs:
- model: general-expert
- score: 0.7
use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a technical support specialist. Provide detailed, step-by-step guidance for technical issues. Use clear explanations and include relevant troubleshooting steps."
+ mode: "replace"
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ - type: "pii"
+ configuration:
+ enabled: true
+ - name: product_info
+ description: "Route product information queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "product_info"
+ modelRefs:
+ - model: general-expert
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a product specialist. Provide accurate information about products, features, pricing, and availability. Be helpful and informative."
+ mode: "replace"
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ - type: "pii"
+ configuration:
+ enabled: true
-default_model: general-expert
+keyword_rules:
+ # Keyword Rule 1: Emergency/Urgent Requests
+ # Use case: Fast routing for time-sensitive queries that need immediate attention
+ # Examples: "URGENT: server down", "EMERGENCY: data loss", "CRITICAL: security breach"
+ - name: "urgent request"
+ operator: "OR"
+ keywords: ["urgent", "emergency", "critical", "asap", "immediately", "help!", "sos"]
+ case_sensitive: false
+
+ # Keyword Rule 2: Programming Language Detection
+ # Use case: Route code-related queries to appropriate handlers based on language
+ # Examples: "python error", "java exception", "golang panic", "rust compiler error"
+ - name: "computer science"
+ operator: "OR"
+ keywords: ["python", "java", "golang", "rust", "javascript", "typescript", "c++", "ruby", "php"]
+ case_sensitive: false
+
+# Embedding-based classification rules
+# These rules use semantic similarity between query text and candidates
+embedding_rules:
+ # Embedding Rule 1: Customer Complaint/Feedback Detection
+ # Use case: Identify negative sentiment and complaints regardless of exact wording
+ # Examples: "I'm disappointed with the service", "This product doesn't work as expected",
+ # "Not satisfied with my purchase", "The quality is poor"
+ - name: "technical_support"
+ threshold: 0.72
+ candidates:
+ - "I'm not satisfied with the product quality"
+ - "The service didn't meet my expectations"
+ - "I'm experiencing issues and need help"
+ - "Something is broken and not working properly"
+ - "I'm disappointed with the performance"
+ - "This is not what I expected when I ordered"
+ aggregation_method: "max" # Use max to catch any strong complaint signal
+ model: "auto"
+ dimension: 768
+ quality_priority: 0.8 # High quality needed for sentiment detection
+ latency_priority: 0.2
+
+ # Embedding Rule 2: Account/Billing Related Queries
+ # Use case: Route financial and account queries even with varied phrasing
+ # Examples: "How much do I owe?", "Check my balance", "Update payment method",
+ # "Why was I charged twice?", "Cancel my subscription"
+ - name: "product_info"
+ threshold: 0.68
+ candidates:
+ - "I need to check my account balance and payment history"
+ - "How can I update my billing information and payment method"
+ - "I was charged incorrectly and need a refund"
+ - "I want to cancel my subscription and stop recurring payments"
+ - "What are the fees and charges on my account"
+ - "I need to review my invoice and transaction details"
+ aggregation_method: "avg" # Use avg for balanced matching across billing topics
+ model: "qwen3" # Use high-quality model for financial queries
+ dimension: 1024
bert_model:
model_id: models/all-MiniLM-L12-v2
@@ -195,12 +482,6 @@ classifier:
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
-keyword_rules:
- - category: "thinking"
- operator: "OR"
- keywords: ["urgent", "immediate", "asap", "think", "careful"]
- case_sensitive: false
-
# Router Configuration for Dual-Path Selection
router:
diff --git a/deploy/kubernetes/aibrix/semantic-router-values/values.yaml b/deploy/kubernetes/aibrix/semantic-router-values/values.yaml
index 0aadde88d..e853beb8b 100644
--- a/deploy/kubernetes/aibrix/semantic-router-values/values.yaml
+++ b/deploy/kubernetes/aibrix/semantic-router-values/values.yaml
@@ -3,9 +3,6 @@ config:
model_config:
"vllm-llama3-8b-instruct":
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
- # preferred_endpoints omitted - let upstream handle endpoint selection
- pii_policy:
- allow_by_default: false
# Define available LoRA adapters for this base model
# These names must match the LoRA modules registered with vLLM at startup
loras:
diff --git a/deploy/kubernetes/aibrix/semantic-router/config.yaml b/deploy/kubernetes/aibrix/semantic-router/config.yaml
index fadfecb65..a2d7bab1a 100644
--- a/deploy/kubernetes/aibrix/semantic-router/config.yaml
+++ b/deploy/kubernetes/aibrix/semantic-router/config.yaml
@@ -1,9 +1,6 @@
model_config:
"vllm-llama3-8b-instruct":
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
- # preferred_endpoints omitted - let upstream handle endpoint selection
- pii_policy:
- allow_by_default: false
# Define available LoRA adapters for this base model
# These names must match the LoRA modules registered with vLLM at startup
loras:
@@ -20,107 +17,293 @@ model_config:
- name: "general-expert"
description: "General-purpose adapter for diverse topics"
-# Categories with LoRA routing
-# Each category uses the vllm-llama3-8b-instruct model with a specific LoRA adapter
+# Categories - now only contain metadata for domain classification
categories:
- name: business
- system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
- # jailbreak_enabled: true # Optional: Override global jailbreak detection per category
- # jailbreak_threshold: 0.8 # Optional: Override global jailbreak threshold per category
- model_scores:
- - model: vllm-llama3-8b-instruct # Base model name (for endpoint selection and PII policy)
- score: 0.7
- use_reasoning: false # Business performs better without reasoning
- name: law
- system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
- model_scores:
+ - name: psychology
+ - name: biology
+ - name: chemistry
+ - name: history
+ - name: other
+ - name: health
+ - name: economics
+ - name: math
+ - name: physics
+ - name: computer science
+ - name: philosophy
+ - name: engineering
+ - name: thinking
+
+# Decisions - define routing logic with rules, model selection, and plugins
+decisions:
+ - name: business
+ description: "Route business and management queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 0.4
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
+ mode: "replace"
+ - name: law
+ description: "Route legal queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
+ - model: vllm-llama3-8b-instruct
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
+ mode: "replace"
- name: psychology
- system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.92 # High threshold for psychology - sensitive to nuances
- model_scores:
+ description: "Route psychology queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "psychology"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 0.6
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.92
- name: biology
- system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
- model_scores:
+ description: "Route biology queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "biology"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 0.9
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
+ mode: "replace"
- name: chemistry
- system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
- model_scores:
+ description: "Route chemistry queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 0.6
- use_reasoning: true # Enable reasoning for complex chemistry
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
+ mode: "replace"
- name: history
- system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
- model_scores:
+ description: "Route history queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "history"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
+ mode: "replace"
- name: other
- system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.75 # Lower threshold for general chat - less sensitive
- model_scores:
+ description: "Route general queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.75
- name: health
- system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95 # High threshold for health - very sensitive to word changes
- model_scores:
+ description: "Route health and medical queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 0.5
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
- name: economics
- system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
- model_scores:
+ description: "Route economics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "economics"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 1.0
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
+ mode: "replace"
- name: math
- system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
- model_scores:
+ description: "Route mathematics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 1.0
- use_reasoning: true # Enable reasoning for complex math
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
+ mode: "replace"
- name: physics
- system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
- model_scores:
+ description: "Route physics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 0.7
- use_reasoning: true # Enable reasoning for physics
- - name: computer science
- system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
- model_scores:
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
+ mode: "replace"
+ - name: computer_science
+ description: "Route computer science queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "computer science"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 0.6
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
+ mode: "replace"
- name: philosophy
- system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
- model_scores:
+ description: "Route philosophy queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "philosophy"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 0.5
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
+ mode: "replace"
- name: engineering
- system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
- model_scores:
+ description: "Route engineering queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "engineering"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
+ mode: "replace"
- name: thinking
- system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
- model_scores:
+ description: "Route thinking and reasoning queries"
+ priority: 15
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "thinking"
+ modelRefs:
- model: vllm-llama3-8b-instruct
- score: 0.7
use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
+ mode: "replace"
default_model: vllm-llama3-8b-instruct
@@ -182,7 +365,7 @@ classifier:
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
keyword_rules:
- - category: "thinking"
+ - name: "thinking"
operator: "OR"
keywords: ["urgent", "immediate", "asap", "think", "careful"]
case_sensitive: false
diff --git a/deploy/kubernetes/crds/vllm.ai_intelligentpools.yaml b/deploy/kubernetes/crds/vllm.ai_intelligentpools.yaml
new file mode 100644
index 000000000..7ffbd0a24
--- /dev/null
+++ b/deploy/kubernetes/crds/vllm.ai_intelligentpools.yaml
@@ -0,0 +1,202 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.19.0
+ name: intelligentpools.vllm.ai
+spec:
+ group: vllm.ai
+ names:
+ kind: IntelligentPool
+ listKind: IntelligentPoolList
+ plural: intelligentpools
+ shortNames:
+ - ipool
+ singular: intelligentpool
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - description: Default model name
+ jsonPath: .spec.defaultModel
+ name: Default Model
+ type: string
+ - description: Number of models
+ jsonPath: .status.modelCount
+ name: Models
+ type: integer
+ - description: Ready status
+ jsonPath: .status.conditions[?(@.type=='Ready')].status
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: IntelligentPool defines a pool of models with their configurations
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: IntelligentPoolSpec defines the desired state of IntelligentPool
+ properties:
+ defaultModel:
+ description: DefaultModel specifies the default model to use when
+ no specific model is selected
+ maxLength: 100
+ minLength: 1
+ type: string
+ models:
+ description: Models defines the list of available models in this pool
+ items:
+ description: ModelConfig defines the configuration for a single
+ model
+ properties:
+ loras:
+ description: LoRAs defines the list of LoRA adapters available
+ for this model
+ items:
+ description: LoRAConfig defines a LoRA adapter configuration
+ properties:
+ description:
+ description: Description provides a human-readable description
+ of this LoRA adapter
+ maxLength: 500
+ type: string
+ name:
+ description: Name is the unique identifier for this LoRA
+ adapter
+ maxLength: 100
+ minLength: 1
+ type: string
+ required:
+ - name
+ type: object
+ maxItems: 50
+ type: array
+ name:
+ description: Name is the unique identifier for this model
+ maxLength: 100
+ minLength: 1
+ type: string
+ pricing:
+ description: Pricing defines the cost structure for this model
+ properties:
+ inputTokenPrice:
+ description: InputTokenPrice is the cost per input token
+ minimum: 0
+ type: number
+ outputTokenPrice:
+ description: OutputTokenPrice is the cost per output token
+ minimum: 0
+ type: number
+ type: object
+ reasoningFamily:
+ description: |-
+ ReasoningFamily specifies the reasoning syntax family (e.g., "qwen3", "deepseek")
+ Must be defined in the global static configuration's ReasoningFamilies
+ maxLength: 50
+ type: string
+ required:
+ - name
+ type: object
+ maxItems: 100
+ minItems: 1
+ type: array
+ required:
+ - defaultModel
+ - models
+ type: object
+ status:
+ description: IntelligentPoolStatus defines the observed state of IntelligentPool
+ properties:
+ conditions:
+ description: Conditions represent the latest available observations
+ of the IntelligentPool's state
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ modelCount:
+ description: ModelCount indicates the number of models in the pool
+ format: int32
+ type: integer
+ observedGeneration:
+ description: ObservedGeneration reflects the generation of the most
+ recently observed IntelligentPool
+ format: int64
+ type: integer
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/deploy/kubernetes/crds/vllm.ai_intelligentroutes.yaml b/deploy/kubernetes/crds/vllm.ai_intelligentroutes.yaml
new file mode 100644
index 000000000..09beb4579
--- /dev/null
+++ b/deploy/kubernetes/crds/vllm.ai_intelligentroutes.yaml
@@ -0,0 +1,410 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.19.0
+ name: intelligentroutes.vllm.ai
+spec:
+ group: vllm.ai
+ names:
+ kind: IntelligentRoute
+ listKind: IntelligentRouteList
+ plural: intelligentroutes
+ shortNames:
+ - iroute
+ singular: intelligentroute
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - description: Number of decisions
+ jsonPath: .status.statistics.decisions
+ name: Decisions
+ type: integer
+ - description: Number of keyword signals
+ jsonPath: .status.statistics.keywords
+ name: Keywords
+ type: integer
+ - description: Number of embedding signals
+ jsonPath: .status.statistics.embeddings
+ name: Embeddings
+ type: integer
+ - description: Number of domain signals
+ jsonPath: .status.statistics.domains
+ name: Domains
+ type: integer
+ - description: Ready status
+ jsonPath: .status.conditions[?(@.type=='Ready')].status
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: IntelligentRoute defines intelligent routing rules and decisions
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: IntelligentRouteSpec defines the desired state of IntelligentRoute
+ properties:
+ decisions:
+ description: Decisions defines the routing decisions based on signal
+ combinations
+ items:
+ description: Decision defines a routing decision based on rule combinations
+ properties:
+ description:
+ description: Description provides a human-readable description
+ of this decision
+ maxLength: 500
+ type: string
+ modelRefs:
+ description: ModelRefs defines the model references for this
+ decision (currently only one model is supported)
+ items:
+ description: ModelRef defines a model reference without score
+ properties:
+ loraName:
+ description: LoRAName is the name of the LoRA adapter
+ to use (must exist in the model's LoRAs)
+ maxLength: 100
+ type: string
+ model:
+ description: Model is the name of the model (must exist
+ in IntelligentPool)
+ maxLength: 100
+ minLength: 1
+ type: string
+ reasoningDescription:
+ description: ReasoningDescription provides context for
+ when to use reasoning
+ maxLength: 500
+ type: string
+ reasoningEffort:
+ description: ReasoningEffort defines the reasoning effort
+ level (low/medium/high)
+ enum:
+ - low
+ - medium
+ - high
+ type: string
+ useReasoning:
+ default: false
+ description: UseReasoning specifies whether to enable
+ reasoning mode for this model
+ type: boolean
+ required:
+ - model
+ type: object
+ maxItems: 1
+ minItems: 1
+ type: array
+ name:
+ description: Name is the unique identifier for this decision
+ maxLength: 100
+ minLength: 1
+ type: string
+ plugins:
+ description: Plugins defines the plugins to apply for this decision
+ items:
+ description: DecisionPlugin defines a plugin configuration
+ for a decision
+ properties:
+ configuration:
+ description: Configuration is the plugin-specific configuration
+ as a raw JSON object
+ x-kubernetes-preserve-unknown-fields: true
+ type:
+ description: Type is the plugin type (semantic-cache,
+ jailbreak, pii, system_prompt, header_mutation)
+ enum:
+ - semantic-cache
+ - jailbreak
+ - pii
+ - system_prompt
+ - header_mutation
+ type: string
+ required:
+ - type
+ type: object
+ maxItems: 10
+ type: array
+ priority:
+ default: 0
+ description: |-
+ Priority defines the priority of this decision (higher values = higher priority)
+ Used when strategy is "priority"
+ format: int32
+ maximum: 1000
+ minimum: 0
+ type: integer
+ signals:
+ description: Signals defines the signal combination logic
+ properties:
+ conditions:
+ description: Conditions defines the list of signal conditions
+ items:
+ description: SignalCondition defines a single signal condition
+ properties:
+ name:
+ description: Name is the name of the signal to reference
+ maxLength: 100
+ minLength: 1
+ type: string
+ type:
+ description: Type defines the type of signal (keyword/embedding/domain)
+ enum:
+ - keyword
+ - embedding
+ - domain
+ type: string
+ required:
+ - name
+ - type
+ type: object
+ maxItems: 50
+ minItems: 1
+ type: array
+ operator:
+ description: Operator defines the logical operator for combining
+ conditions (AND/OR)
+ enum:
+ - AND
+ - OR
+ type: string
+ required:
+ - conditions
+ - operator
+ type: object
+ required:
+ - modelRefs
+ - name
+ - signals
+ type: object
+ maxItems: 100
+ minItems: 1
+ type: array
+ signals:
+ description: Signals defines signal extraction rules for routing decisions
+ properties:
+ domains:
+ description: Domains defines MMLU domain categories for classification
+ items:
+ description: DomainSignal defines a domain category for classification
+ properties:
+ description:
+ description: Description provides a human-readable description
+ of this domain
+ maxLength: 500
+ type: string
+ name:
+ description: Name is the unique identifier for this domain
+ maxLength: 100
+ minLength: 1
+ type: string
+ required:
+ - name
+ type: object
+ maxItems: 14
+ type: array
+ embeddings:
+ description: Embeddings defines embedding-based signal extraction
+ rules
+ items:
+ description: EmbeddingSignal defines an embedding-based signal
+ extraction rule
+ properties:
+ aggregationMethod:
+ default: max
+ description: AggregationMethod defines how to aggregate
+ multiple candidate similarities
+ enum:
+ - mean
+ - max
+ - any
+ type: string
+ candidates:
+ description: Candidates is the list of candidate phrases
+ for semantic matching
+ items:
+ type: string
+ maxItems: 100
+ minItems: 1
+ type: array
+ name:
+ description: Name is the unique identifier for this signal
+ maxLength: 100
+ minLength: 1
+ type: string
+ threshold:
+ description: Threshold is the similarity threshold for matching
+ (0.0-1.0)
+ maximum: 1
+ minimum: 0
+ type: number
+ required:
+ - candidates
+ - name
+ - threshold
+ type: object
+ maxItems: 100
+ type: array
+ keywords:
+ description: Keywords defines keyword-based signal extraction
+ rules
+ items:
+ description: KeywordSignal defines a keyword-based signal extraction
+ rule
+ properties:
+ caseSensitive:
+ default: false
+ description: CaseSensitive specifies whether keyword matching
+ is case-sensitive
+ type: boolean
+ keywords:
+ description: Keywords is the list of keywords to match
+ items:
+ type: string
+ maxItems: 100
+ minItems: 1
+ type: array
+ name:
+ description: Name is the unique identifier for this rule
+ (also used as category name)
+ maxLength: 100
+ minLength: 1
+ type: string
+ operator:
+ description: Operator defines the logical operator for keywords
+ (AND/OR)
+ enum:
+ - AND
+ - OR
+ type: string
+ required:
+ - keywords
+ - name
+ - operator
+ type: object
+ maxItems: 100
+ type: array
+ type: object
+ required:
+ - decisions
+ type: object
+ status:
+ description: IntelligentRouteStatus defines the observed state of IntelligentRoute
+ properties:
+ conditions:
+ description: Conditions represent the latest available observations
+ of the IntelligentRoute's state
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ observedGeneration:
+ description: ObservedGeneration reflects the generation of the most
+ recently observed IntelligentRoute
+ format: int64
+ type: integer
+ statistics:
+ description: Statistics provides statistics about configured decisions
+ and signals
+ properties:
+ decisions:
+ description: Decisions indicates the number of decisions
+ format: int32
+ type: integer
+ domains:
+ description: Domains indicates the number of domain signals
+ format: int32
+ type: integer
+ embeddings:
+ description: Embeddings indicates the number of embedding signals
+ format: int32
+ type: integer
+ keywords:
+ description: Keywords indicates the number of keyword signals
+ format: int32
+ type: integer
+ required:
+ - decisions
+ - domains
+ - embeddings
+ - keywords
+ type: object
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/deploy/kubernetes/crds/vllm.ai_semanticroutes.yaml b/deploy/kubernetes/crds/vllm.ai_semanticroutes.yaml
deleted file mode 100644
index c943e699b..000000000
--- a/deploy/kubernetes/crds/vllm.ai_semanticroutes.yaml
+++ /dev/null
@@ -1,293 +0,0 @@
----
-apiVersion: apiextensions.k8s.io/v1
-kind: CustomResourceDefinition
-metadata:
- annotations:
- controller-gen.kubebuilder.io/version: v0.19.0
- name: semanticroutes.vllm.ai
-spec:
- group: vllm.ai
- names:
- kind: SemanticRoute
- listKind: SemanticRouteList
- plural: semanticroutes
- shortNames:
- - sr
- singular: semanticroute
- scope: Namespaced
- versions:
- - additionalPrinterColumns:
- - description: Number of routing rules
- jsonPath: .spec.rules
- name: Rules
- type: integer
- - jsonPath: .metadata.creationTimestamp
- name: Age
- type: date
- name: v1alpha1
- schema:
- openAPIV3Schema:
- description: SemanticRoute defines a semantic routing rule for LLM requests
- properties:
- apiVersion:
- description: |-
- APIVersion defines the versioned schema of this representation of an object.
- Servers should convert recognized schemas to the latest internal value, and
- may reject unrecognized values.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
- type: string
- kind:
- description: |-
- Kind is a string value representing the REST resource this object represents.
- Servers may infer this from the endpoint the client submits requests to.
- Cannot be updated.
- In CamelCase.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- metadata:
- type: object
- spec:
- description: SemanticRouteSpec defines the desired state of SemanticRoute
- properties:
- rules:
- description: Rules defines the routing rules to be applied
- items:
- description: RouteRule defines a single routing rule
- properties:
- defaultModel:
- description: DefaultModel defines the fallback model if no modelRefs
- are available
- properties:
- address:
- description: Address defines the endpoint address
- maxLength: 255
- minLength: 1
- type: string
- modelName:
- description: ModelName defines the name of the model
- maxLength: 100
- minLength: 1
- type: string
- port:
- description: Port defines the endpoint port
- format: int32
- maximum: 65535
- minimum: 1
- type: integer
- priority:
- description: Priority defines the priority of this model
- reference (higher values = higher priority)
- format: int32
- maximum: 1000
- minimum: 0
- type: integer
- weight:
- default: 100
- description: Weight defines the traffic weight for this
- model (0-100)
- format: int32
- maximum: 100
- minimum: 0
- type: integer
- required:
- - address
- - modelName
- - port
- type: object
- filters:
- description: Filters defines the optional filters to be applied
- to requests matching this rule
- items:
- description: Filter defines a filter to be applied to requests
- properties:
- config:
- description: Config defines the filter-specific configuration
- type: object
- x-kubernetes-preserve-unknown-fields: true
- enabled:
- default: true
- description: Enabled defines whether this filter is enabled
- type: boolean
- type:
- allOf:
- - enum:
- - PIIDetection
- - PromptGuard
- - SemanticCache
- - ReasoningControl
- - ToolSelection
- - enum:
- - PIIDetection
- - PromptGuard
- - SemanticCache
- - ReasoningControl
- description: Type defines the filter type
- type: string
- required:
- - type
- type: object
- maxItems: 20
- type: array
- intents:
- description: Intents defines the intent categories that this
- rule should match
- items:
- description: Intent defines an intent category for routing
- properties:
- category:
- description: Category defines the intent category name
- (e.g., "math", "computer science", "creative")
- maxLength: 100
- minLength: 1
- pattern: ^[a-zA-Z0-9\s\-_]+$
- type: string
- description:
- description: Description provides an optional description
- of this intent category
- maxLength: 500
- type: string
- threshold:
- default: 0.7
- description: Threshold defines the confidence threshold
- for this intent (0.0-1.0)
- maximum: 1
- minimum: 0
- type: number
- required:
- - category
- type: object
- maxItems: 50
- minItems: 1
- type: array
- modelRefs:
- description: ModelRefs defines the target models for this routing
- rule
- items:
- description: ModelRef defines a reference to a model endpoint
- properties:
- address:
- description: Address defines the endpoint address
- maxLength: 255
- minLength: 1
- type: string
- modelName:
- description: ModelName defines the name of the model
- maxLength: 100
- minLength: 1
- type: string
- port:
- description: Port defines the endpoint port
- format: int32
- maximum: 65535
- minimum: 1
- type: integer
- priority:
- description: Priority defines the priority of this model
- reference (higher values = higher priority)
- format: int32
- maximum: 1000
- minimum: 0
- type: integer
- weight:
- default: 100
- description: Weight defines the traffic weight for this
- model (0-100)
- format: int32
- maximum: 100
- minimum: 0
- type: integer
- required:
- - address
- - modelName
- - port
- type: object
- maxItems: 10
- minItems: 1
- type: array
- required:
- - intents
- - modelRefs
- type: object
- maxItems: 100
- minItems: 1
- type: array
- required:
- - rules
- type: object
- status:
- description: SemanticRouteStatus defines the observed state of SemanticRoute
- properties:
- activeRules:
- description: ActiveRules indicates the number of currently active
- routing rules
- format: int32
- type: integer
- conditions:
- description: Conditions represent the latest available observations
- of the SemanticRoute's current state
- items:
- description: Condition contains details for one aspect of the current
- state of this API Resource.
- properties:
- lastTransitionTime:
- description: |-
- lastTransitionTime is the last time the condition transitioned from one status to another.
- This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
- format: date-time
- type: string
- message:
- description: |-
- message is a human readable message indicating details about the transition.
- This may be an empty string.
- maxLength: 32768
- type: string
- observedGeneration:
- description: |-
- observedGeneration represents the .metadata.generation that the condition was set based upon.
- For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
- with respect to the current state of the instance.
- format: int64
- minimum: 0
- type: integer
- reason:
- description: |-
- reason contains a programmatic identifier indicating the reason for the condition's last transition.
- Producers of specific condition types may define expected values and meanings for this field,
- and whether the values are considered a guaranteed API.
- The value should be a CamelCase string.
- This field may not be empty.
- maxLength: 1024
- minLength: 1
- pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
- type: string
- status:
- description: status of the condition, one of True, False, Unknown.
- enum:
- - "True"
- - "False"
- - Unknown
- type: string
- type:
- description: type of condition in CamelCase or in foo.example.com/CamelCase.
- maxLength: 316
- pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
- type: string
- required:
- - lastTransitionTime
- - message
- - reason
- - status
- - type
- type: object
- type: array
- observedGeneration:
- description: ObservedGeneration reflects the generation of the most
- recently observed SemanticRoute
- format: int64
- type: integer
- type: object
- type: object
- served: true
- storage: true
- subresources:
- status: {}
diff --git a/deploy/kubernetes/istio/config.yaml b/deploy/kubernetes/istio/config.yaml
index 0e4cdbca1..8f0fd1a40 100644
--- a/deploy/kubernetes/istio/config.yaml
+++ b/deploy/kubernetes/istio/config.yaml
@@ -47,13 +47,11 @@ model_config:
"llama3-8b":
# reasoning_family: "" # This model uses Qwen-3 reasoning syntax
preferred_endpoints: ["endpoint1"]
- pii_policy:
- allow_by_default: true
+ allow_by_default: true
"phi4-mini":
# reasoning_family: "" # This model uses Qwen-3 reasoning syntax
preferred_endpoints: ["endpoint2"]
- pii_policy:
- allow_by_default: true
+ allow_by_default: true
# Classifier configuration
classifier:
@@ -70,103 +68,275 @@ classifier:
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
-# Categories with new use_reasoning field structure
+# Categories - now only contain metadata for domain classification
categories:
- name: business
- system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
- # jailbreak_enabled: true # Optional: Override global jailbreak detection per category
- # jailbreak_threshold: 0.8 # Optional: Override global jailbreak threshold per category
- model_scores:
+ - name: law
+ - name: psychology
+ - name: biology
+ - name: chemistry
+ - name: history
+ - name: other
+ - name: health
+ - name: economics
+ - name: math
+ - name: physics
+ - name: computer science
+ - name: philosophy
+ - name: engineering
+
+# Decisions - define routing logic with rules, model selection, and plugins
+decisions:
+ - name: business
+ description: "Route business and management queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
- model: llama3-8b
- score: 0.8
- use_reasoning: false # Business performs better without reasoning
- - model: phi4-mini
- score: 0.3
- use_reasoning: false # Business performs better without reasoning
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
+ mode: "replace"
- name: law
- system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
- model_scores:
+ description: "Route legal queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
- model: llama3-8b
- score: 0.4
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
+ mode: "replace"
- name: psychology
- system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.92 # High threshold for psychology - sensitive to nuances
- model_scores:
+ description: "Route psychology queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "psychology"
+ modelRefs:
- model: llama3-8b
- score: 0.6
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.92
- name: biology
- system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
- model_scores:
+ description: "Route biology queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "biology"
+ modelRefs:
- model: llama3-8b
- score: 0.9
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
+ mode: "replace"
- name: chemistry
- system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
- model_scores:
+ description: "Route chemistry queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
- model: llama3-8b
- score: 0.6
- use_reasoning: false # Enable reasoning for complex chemistry
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
+ mode: "replace"
- name: history
- system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
- model_scores:
+ description: "Route history queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "history"
+ modelRefs:
- model: llama3-8b
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
+ mode: "replace"
- name: other
- system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.75 # Lower threshold for general chat - less sensitive
- model_scores:
+ description: "Route general queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
- model: llama3-8b
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.75
- name: health
- system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95 # High threshold for health - very sensitive to word changes
- model_scores:
+ description: "Route health and medical queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
- model: llama3-8b
- score: 0.5
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
- name: economics
- system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
- model_scores:
+ description: "Route economics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "economics"
+ modelRefs:
- model: llama3-8b
- score: 1.0
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
+ mode: "replace"
- name: math
- system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
- model_scores:
+ description: "Route mathematics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
- model: phi4-mini
- score: 1.0
- use_reasoning: false # Enable reasoning for complex math
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
+ mode: "replace"
- name: physics
- system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
- model_scores:
+ description: "Route physics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
- model: llama3-8b
- score: 0.7
- use_reasoning: false # Enable reasoning for physics
- - name: computer science
- system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
- model_scores:
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
+ mode: "replace"
+ - name: computer_science
+ description: "Route computer science queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "computer science"
+ modelRefs:
- model: llama3-8b
- score: 0.6
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
+ mode: "replace"
- name: philosophy
- system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
- model_scores:
+ description: "Route philosophy queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "philosophy"
+ modelRefs:
- model: llama3-8b
- score: 0.5
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
+ mode: "replace"
- name: engineering
- system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
- model_scores:
+ description: "Route engineering queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "engineering"
+ modelRefs:
- model: llama3-8b
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
+ mode: "replace"
default_model: "llama3-8b"
diff --git a/deploy/kubernetes/observability/dashboard/config.yaml b/deploy/kubernetes/observability/dashboard/config.yaml
index ccb9e9d60..e5988a465 100644
--- a/deploy/kubernetes/observability/dashboard/config.yaml
+++ b/deploy/kubernetes/observability/dashboard/config.yaml
@@ -40,8 +40,7 @@ model_config:
"qwen3":
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
preferred_endpoints: ["endpoint1"]
- pii_policy:
- allow_by_default: true
+ allow_by_default: true
# Classifier configuration
classifier:
diff --git a/deploy/openshift/config-openshift.yaml b/deploy/openshift/config-openshift.yaml
index 3a0e5c760..7045345f5 100644
--- a/deploy/openshift/config-openshift.yaml
+++ b/deploy/openshift/config-openshift.yaml
@@ -44,15 +44,9 @@ model_config:
"Model-A":
reasoning_family: "qwen3"
preferred_endpoints: ["model-a-endpoint"]
- pii_policy:
- allow_by_default: false
- pii_types_allowed: ["EMAIL_ADDRESS"]
"Model-B":
reasoning_family: "qwen3"
preferred_endpoints: ["model-b-endpoint"]
- pii_policy:
- allow_by_default: false
- pii_types_allowed: ["EMAIL_ADDRESS"]
# Classifier configuration
classifier:
@@ -69,98 +63,281 @@ classifier:
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
-# Categories
+# Categories - now only contain metadata for domain classification
categories:
- name: business
- system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development."
- model_scores:
+ - name: law
+ - name: psychology
+ - name: biology
+ - name: chemistry
+ - name: history
+ - name: other
+ - name: health
+ - name: economics
+ - name: math
+ - name: physics
+ - name: computer science
+ - name: philosophy
+ - name: engineering
+ - name: general
+
+# Decisions - define routing logic with rules, model selection, and plugins
+decisions:
+ - name: business
+ description: "Route business and management queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
- model: Model-B
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development."
+ mode: "replace"
- name: law
- system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures."
- model_scores:
+ description: "Route legal queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
- model: Model-B
- score: 0.4
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures."
+ mode: "replace"
- name: psychology
- system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, and therapeutic approaches."
- model_scores:
+ description: "Route psychology queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "psychology"
+ modelRefs:
- model: Model-B
- score: 0.6
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, and therapeutic approaches."
+ mode: "replace"
- name: biology
- system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, and evolution."
- model_scores:
+ description: "Route biology queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "biology"
+ modelRefs:
- model: Model-A
- score: 0.9
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, and evolution."
+ mode: "replace"
- name: chemistry
- system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques."
- model_scores:
+ description: "Route chemistry queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
- model: Model-A
- score: 0.6
use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques."
+ mode: "replace"
- name: history
- system_prompt: "You are a historian with expertise across different time periods and cultures."
- model_scores:
+ description: "Route history queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "history"
+ modelRefs:
- model: Model-A
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a historian with expertise across different time periods and cultures."
+ mode: "replace"
- name: other
- system_prompt: "You are a helpful and knowledgeable assistant."
- model_scores:
+ description: "Route general queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
- model: Model-A
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a helpful and knowledgeable assistant."
+ mode: "replace"
- name: health
- system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, and treatments."
- model_scores:
+ description: "Route health and medical queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
- model: Model-B
- score: 0.5
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, and treatments."
+ mode: "replace"
- name: economics
- system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, and financial markets."
- model_scores:
+ description: "Route economics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "economics"
+ modelRefs:
- model: Model-A
- score: 1.0
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, and financial markets."
+ mode: "replace"
- name: math
- system_prompt: "You are a mathematics expert. Provide step-by-step solutions and explain mathematical concepts clearly."
- model_scores:
+ description: "Route mathematics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
- model: Model-A
- score: 1.0
use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a mathematics expert. Provide step-by-step solutions and explain mathematical concepts clearly."
+ mode: "replace"
- name: physics
- system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena."
- model_scores:
+ description: "Route physics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
- model: Model-A
- score: 0.7
use_reasoning: true
- - name: computer science
- system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, and software engineering."
- model_scores:
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena."
+ mode: "replace"
+ - name: computer_science
+ description: "Route computer science queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "computer science"
+ modelRefs:
- model: Model-A
- score: 0.6
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, and software engineering."
+ mode: "replace"
- name: philosophy
- system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions and ethical theories."
- model_scores:
+ description: "Route philosophy queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "philosophy"
+ modelRefs:
- model: Model-B
- score: 0.5
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions and ethical theories."
+ mode: "replace"
- name: engineering
- system_prompt: "You are an engineering expert across multiple disciplines."
- model_scores:
+ description: "Route engineering queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "engineering"
+ modelRefs:
- model: Model-A
- score: 0.8
use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an engineering expert across multiple disciplines."
+ mode: "replace"
- name: general
- system_prompt: "You are a knowledgeable assistant for general questions."
- model_scores:
+ description: "Route general queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "general"
+ modelRefs:
- model: Model-A
- score: 0.5
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a knowledgeable assistant for general questions."
+ mode: "replace"
# Observability
observability:
diff --git a/e2e/cmd/e2e/main.go b/e2e/cmd/e2e/main.go
index 6c04953a8..a7f8b53eb 100644
--- a/e2e/cmd/e2e/main.go
+++ b/e2e/cmd/e2e/main.go
@@ -10,6 +10,7 @@ import (
"github.com/vllm-project/semantic-router/e2e/pkg/banner"
"github.com/vllm-project/semantic-router/e2e/pkg/framework"
aigateway "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway"
+ dynamicconfig "github.com/vllm-project/semantic-router/e2e/profiles/dynamic-config"
// Import profiles to register test cases
_ "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway"
@@ -96,6 +97,8 @@ func getProfile(name string) (framework.Profile, error) {
switch name {
case "ai-gateway":
return aigateway.NewProfile(), nil
+ case "dynamic-config":
+ return dynamicconfig.NewProfile(), nil
// Add more profiles here as they are implemented
// case "istio":
// return istio.NewProfile(), nil
diff --git a/e2e/profiles/ai-gateway/profile.go b/e2e/profiles/ai-gateway/profile.go
index d5bf09b2f..46b447d99 100644
--- a/e2e/profiles/ai-gateway/profile.go
+++ b/e2e/profiles/ai-gateway/profile.go
@@ -129,7 +129,7 @@ func (p *Profile) GetServiceConfig() framework.ServiceConfig {
func (p *Profile) deploySemanticRouter(ctx context.Context, deployer *helm.Deployer, opts *framework.SetupOptions) error {
// Use local Helm chart instead of remote OCI registry
chartPath := "deploy/helm/semantic-router"
- valuesFile := "deploy/kubernetes/ai-gateway/semantic-router-values/values.yaml"
+ valuesFile := "e2e/profiles/ai-gateway/values.yaml"
// Override image to use locally built image
imageRepo := "ghcr.io/vllm-project/semantic-router/extproc"
@@ -146,7 +146,7 @@ func (p *Profile) deploySemanticRouter(ctx context.Context, deployer *helm.Deplo
"image.pullPolicy": "Never", // Use local image, don't pull from registry
},
Wait: true,
- Timeout: "10m",
+ Timeout: "30m",
}
if err := deployer.Install(ctx, installOpts); err != nil {
@@ -164,14 +164,14 @@ func (p *Profile) deployEnvoyGateway(ctx context.Context, deployer *helm.Deploye
Version: "v0.0.0-latest",
ValuesFiles: []string{"https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/manifests/envoy-gateway-values.yaml"},
Wait: true,
- Timeout: "5m",
+ Timeout: "10m",
}
if err := deployer.Install(ctx, installOpts); err != nil {
return err
}
- return deployer.WaitForDeployment(ctx, "envoy-gateway-system", "envoy-gateway", 5*time.Minute)
+ return deployer.WaitForDeployment(ctx, "envoy-gateway-system", "envoy-gateway", 10*time.Minute)
}
func (p *Profile) deployEnvoyAIGateway(ctx context.Context, deployer *helm.Deployer, _ *framework.SetupOptions) error {
@@ -182,7 +182,7 @@ func (p *Profile) deployEnvoyAIGateway(ctx context.Context, deployer *helm.Deplo
Namespace: "envoy-ai-gateway-system",
Version: "v0.0.0-latest",
Wait: true,
- Timeout: "5m",
+ Timeout: "10m",
}
if err := deployer.Install(ctx, crdOpts); err != nil {
@@ -196,14 +196,14 @@ func (p *Profile) deployEnvoyAIGateway(ctx context.Context, deployer *helm.Deplo
Namespace: "envoy-ai-gateway-system",
Version: "v0.0.0-latest",
Wait: true,
- Timeout: "5m",
+ Timeout: "10m",
}
if err := deployer.Install(ctx, installOpts); err != nil {
return err
}
- return deployer.WaitForDeployment(ctx, "envoy-ai-gateway-system", "ai-gateway-controller", 5*time.Minute)
+ return deployer.WaitForDeployment(ctx, "envoy-ai-gateway-system", "ai-gateway-controller", 10*time.Minute)
}
func (p *Profile) deployGatewayResources(ctx context.Context, opts *framework.SetupOptions) error {
diff --git a/e2e/profiles/ai-gateway/values.yaml b/e2e/profiles/ai-gateway/values.yaml
new file mode 100644
index 000000000..1b3f4fe36
--- /dev/null
+++ b/e2e/profiles/ai-gateway/values.yaml
@@ -0,0 +1,589 @@
+# Semantic Router Configuration for AI Gateway
+config:
+ model_config:
+ "base-model":
+ reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
+ # Define available LoRA adapters for this base model
+ # These names must match the LoRA modules registered with vLLM at startup
+ loras:
+ - name: "science-expert"
+ description: "Specialized for science domains: biology, chemistry, physics, health, engineering"
+ - name: "social-expert"
+ description: "Optimized for social sciences: business, economics"
+ - name: "math-expert"
+ description: "Fine-tuned for mathematics and quantitative reasoning"
+ - name: "law-expert"
+ description: "Specialized for legal questions and law-related topics"
+ - name: "humanities-expert"
+ description: "Optimized for humanities: psychology, history, philosophy"
+ - name: "general-expert"
+ description: "General-purpose adapter for diverse topics"
+
+ # Categories for domain classification (used by domain rules)
+ # Category names are MMLU category names used by the classifier
+ categories:
+ - name: business
+ description: "Business, corporate strategy, management, finance, marketing"
+ - name: law
+ description: "Legal principles, case law, statutory interpretation, legal procedures"
+ - name: psychology
+ description: "Cognitive processes, behavioral patterns, mental health, developmental psychology"
+ - name: biology
+ description: "Molecular biology, genetics, cell biology, ecology, evolution, anatomy"
+ - name: chemistry
+ description: "Chemical reactions, molecular structures, laboratory techniques"
+ - name: history
+ description: "Historical events, time periods, cultures, civilizations"
+ - name: health
+ description: "Anatomy, physiology, diseases, treatments, preventive care, nutrition"
+ - name: economics
+ description: "Microeconomics, macroeconomics, financial markets, monetary policy, trade"
+ - name: math
+ description: "Mathematics, algebra, calculus, geometry, statistics"
+ - name: physics
+ description: "Physical laws, mechanics, thermodynamics, electromagnetism, quantum physics"
+ - name: computer science
+ description: "Algorithms, data structures, programming, software engineering"
+ - name: philosophy
+ description: "Philosophical traditions, ethics, logic, metaphysics, epistemology"
+ - name: engineering
+ description: "Engineering disciplines, design, problem-solving, systems"
+ - name: other
+ description: "General knowledge and miscellaneous topics"
+
+ # Decisions with rule-based routing and plugins
+ decisions:
+ - name: business_decision
+ description: "Business and management related queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: base-model
+ lora_name: social-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
+ mode: "replace"
+
+ - name: law_decision
+ description: "Legal questions and law-related topics"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
+ - model: base-model
+ lora_name: law-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
+ mode: "replace"
+
+ - name: psychology_decision
+ description: "Psychology and mental health topics"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "psychology"
+ modelRefs:
+ - model: base-model
+ lora_name: humanities-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.92
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
+ mode: "replace"
+
+ - name: biology_decision
+ description: "Biology and life sciences questions"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "biology"
+ modelRefs:
+ - model: base-model
+ lora_name: science-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
+ mode: "replace"
+
+ - name: chemistry_decision
+ description: "Chemistry and chemical sciences questions"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
+ - model: base-model
+ lora_name: science-expert
+ use_reasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
+ mode: "replace"
+
+ - name: history_decision
+ description: "Historical questions and cultural topics"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "history"
+ modelRefs:
+ - model: base-model
+ lora_name: humanities-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
+ mode: "replace"
+
+ - name: health_decision
+ description: "Health and medical information queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
+ - model: base-model
+ lora_name: science-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
+ mode: "replace"
+
+ - name: economics_decision
+ description: "Economics and financial topics"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "economics"
+ modelRefs:
+ - model: base-model
+ lora_name: social-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
+ mode: "replace"
+
+ - name: math_decision
+ description: "Mathematics and quantitative reasoning"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: base-model
+ lora_name: math-expert
+ use_reasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
+ mode: "replace"
+
+ - name: physics_decision
+ description: "Physics and physical sciences"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
+ - model: base-model
+ lora_name: science-expert
+ use_reasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
+ mode: "replace"
+
+ - name: computer_science_decision
+ description: "Computer science and programming"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "computer science"
+ modelRefs:
+ - model: base-model
+ lora_name: science-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
+ mode: "replace"
+
+ - name: philosophy_decision
+ description: "Philosophy and ethical questions"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "philosophy"
+ modelRefs:
+ - model: base-model
+ lora_name: humanities-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
+ mode: "replace"
+
+ - name: engineering_decision
+ description: "Engineering and technical problem-solving"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "engineering"
+ modelRefs:
+ - model: base-model
+ lora_name: science-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
+ mode: "replace"
+
+ - name: thinking_decision
+ description: "Complex reasoning and multi-step thinking"
+ priority: 20
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "keyword"
+ rule_name: "thinking"
+ modelRefs:
+ - model: base-model
+ lora_name: general-expert
+ use_reasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
+ mode: "replace"
+
+ - name: general_decision
+ description: "General knowledge and miscellaneous topics"
+ priority: 1
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: base-model
+ lora_name: general-expert
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.75
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
+ mode: "replace"
+
+ # Strategy for selecting between multiple matching decisions
+ # Options: "priority" (use decision with highest priority) or "confidence" (use decision with highest confidence)
+ strategy: "priority"
+
+ default_model: general-expert
+
+ bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+
+ semantic_cache:
+ enabled: true
+ backend_type: "memory" # Options: "memory", "milvus", or "hybrid"
+ similarity_threshold: 0.8
+ max_entries: 1000 # Only applies to memory backend
+ ttl_seconds: 3600
+ eviction_policy: "fifo"
+ # HNSW index configuration (for memory backend only)
+ use_hnsw: true # Enable HNSW index for faster similarity search
+ hnsw_m: 16 # Number of bi-directional links (higher = better recall, more memory)
+ hnsw_ef_construction: 200 # Construction parameter (higher = better quality, slower build)
+
+ # Hybrid cache configuration (when backend_type: "hybrid")
+ # Combines in-memory HNSW for fast search with Milvus for scalable storage
+ # max_memory_entries: 100000 # Max entries in HNSW index (default: 100,000)
+ # backend_config_path: "config/milvus.yaml" # Path to Milvus config
+
+ # Embedding model for semantic similarity matching
+ # Options: "bert" (fast, 384-dim), "qwen3" (high quality, 1024-dim, 32K context), "gemma" (balanced, 768-dim, 8K context)
+ # Default: "bert" (fastest, lowest memory)
+ embedding_model: "bert"
+
+ tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: "config/tools_db.json"
+ fallback_to_empty: true
+
+ prompt_guard:
+ enabled: true # Global default - can be overridden per category with jailbreak_enabled
+ use_modernbert: true
+ model_id: "models/jailbreak_classifier_modernbert-base_model"
+ threshold: 0.7
+ use_cpu: true
+ jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
+
+ # Classifier configuration
+ classifier:
+ category_model:
+ model_id: "models/category_classifier_modernbert-base_model"
+ use_modernbert: true
+ threshold: 0.6
+ use_cpu: true
+ category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
+ pii_model:
+ model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
+ use_modernbert: true
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
+
+ keyword_rules:
+ - category: "thinking"
+ operator: "OR"
+ keywords: ["urgent", "immediate", "asap", "think", "careful"]
+ case_sensitive: false
+
+
+ # Router Configuration for Dual-Path Selection
+ router:
+ # High confidence threshold for automatic LoRA selection
+ high_confidence_threshold: 0.99
+ # Low latency threshold in milliseconds for LoRA path selection
+ low_latency_threshold_ms: 2000
+ # Baseline scores for path evaluation
+ lora_baseline_score: 0.8
+ traditional_baseline_score: 0.7
+ embedding_baseline_score: 0.75
+ # Success rate calculation threshold
+ success_confidence_threshold: 0.8
+ # Large batch size threshold for parallel processing
+ large_batch_threshold: 4
+ # Default performance metrics (milliseconds)
+ lora_default_execution_time_ms: 1345
+ traditional_default_execution_time_ms: 4567
+ # Default processing requirements
+ default_confidence_threshold: 0.95
+ default_max_latency_ms: 5000
+ default_batch_size: 4
+ default_avg_execution_time_ms: 3000
+ # Default confidence and success rates
+ lora_default_confidence: 0.99
+ traditional_default_confidence: 0.95
+ lora_default_success_rate: 0.98
+ traditional_default_success_rate: 0.95
+ # Scoring weights for intelligent path selection (balanced approach)
+ multi_task_lora_weight: 0.30 # LoRA advantage for multi-task processing
+ single_task_traditional_weight: 0.30 # Traditional advantage for single tasks
+ large_batch_lora_weight: 0.25 # LoRA advantage for large batches (β₯4)
+ small_batch_traditional_weight: 0.25 # Traditional advantage for single items
+ medium_batch_weight: 0.10 # Neutral weight for medium batches (2-3)
+ high_confidence_lora_weight: 0.25 # LoRA advantage for high confidence (β₯0.99)
+ low_confidence_traditional_weight: 0.25 # Traditional for lower confidence (β€0.9)
+ low_latency_lora_weight: 0.30 # LoRA advantage for low latency (β€2000ms)
+ high_latency_traditional_weight: 0.10 # Traditional acceptable for relaxed timing
+ performance_history_weight: 0.20 # Historical performance comparison factor
+ # Traditional model specific configurations
+ traditional_bert_confidence_threshold: 0.95 # Traditional BERT confidence threshold
+ traditional_modernbert_confidence_threshold: 0.8 # Traditional ModernBERT confidence threshold
+ traditional_pii_detection_threshold: 0.5 # Traditional PII detection confidence threshold
+ traditional_token_classification_threshold: 0.9 # Traditional token classification threshold
+ traditional_dropout_prob: 0.1 # Traditional model dropout probability
+ traditional_attention_dropout_prob: 0.1 # Traditional model attention dropout probability
+ tie_break_confidence: 0.5 # Confidence value for tie-breaking situations
+
+ # Reasoning family configurations
+ reasoning_families:
+ deepseek:
+ type: "chat_template_kwargs"
+ parameter: "thinking"
+
+ qwen3:
+ type: "chat_template_kwargs"
+ parameter: "enable_thinking"
+
+ gpt-oss:
+ type: "reasoning_effort"
+ parameter: "reasoning_effort"
+ gpt:
+ type: "reasoning_effort"
+ parameter: "reasoning_effort"
+
+ # Global default reasoning effort level
+ default_reasoning_effort: high
+
+ # API Configuration
+ api:
+ batch_classification:
+ max_batch_size: 100
+ concurrency_threshold: 5
+ max_concurrency: 8
+ metrics:
+ enabled: true
+ detailed_goroutine_tracking: true
+ high_resolution_timing: false
+ sample_rate: 1.0
+ duration_buckets:
+ [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
+ size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
+
+ # Embedding Models Configuration (Optional)
+ # These models provide intelligent embedding generation with automatic routing:
+ # - Qwen3-Embedding-0.6B: Up to 32K context, high quality, 1024-dim embeddings
+ # - EmbeddingGemma-300M: Up to 8K context, fast inference, Matryoshka support (768/512/256/128)
+ embedding_models:
+ qwen3_model_path: "models/Qwen3-Embedding-0.6B"
+ use_cpu: true # Set to false for GPU acceleration (requires CUDA)
+
+ # Observability Configuration
+ observability:
+ tracing:
+ enabled: false # Enable distributed tracing for docker-compose stack
+ provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry
+ exporter:
+ type: "otlp" # Export spans to Jaeger (via OTLP gRPC)
+ endpoint: "jaeger:4317" # Jaeger collector inside compose network
+ insecure: true # Use insecure connection (no TLS)
+ sampling:
+ type: "always_on" # Sampling: always_on, always_off, probabilistic
+ rate: 1.0 # Sampling rate for probabilistic (0.0-1.0)
+ resource:
+ service_name: "vllm-semantic-router"
+ service_version: "v0.1.0"
+ deployment_environment: "development"
+
diff --git a/e2e/profiles/dynamic-config/crds/intelligentpool.yaml b/e2e/profiles/dynamic-config/crds/intelligentpool.yaml
new file mode 100644
index 000000000..8f20f7de5
--- /dev/null
+++ b/e2e/profiles/dynamic-config/crds/intelligentpool.yaml
@@ -0,0 +1,24 @@
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: ai-gateway-pool
+ namespace: default
+spec:
+ defaultModel: "general-expert"
+ models:
+ - name: "base-model"
+ reasoningFamily: "qwen3"
+ loras:
+ - name: "science-expert"
+ description: "Specialized for science domains: biology, chemistry, physics, health, engineering"
+ - name: "social-expert"
+ description: "Optimized for social sciences: business, economics"
+ - name: "math-expert"
+ description: "Fine-tuned for mathematics and quantitative reasoning"
+ - name: "law-expert"
+ description: "Specialized for legal questions and law-related topics"
+ - name: "humanities-expert"
+ description: "Optimized for humanities: psychology, history, philosophy"
+ - name: "general-expert"
+ description: "General-purpose adapter for diverse topics"
+
diff --git a/e2e/profiles/dynamic-config/crds/intelligentroute.yaml b/e2e/profiles/dynamic-config/crds/intelligentroute.yaml
new file mode 100644
index 000000000..b04ac926d
--- /dev/null
+++ b/e2e/profiles/dynamic-config/crds/intelligentroute.yaml
@@ -0,0 +1,346 @@
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: ai-gateway-route
+ namespace: default
+spec:
+ signals:
+ domains:
+ - name: "business"
+ description: "Business and management related queries"
+ - name: "law"
+ description: "Legal questions and law-related topics"
+ - name: "psychology"
+ description: "Psychology and mental health topics"
+ - name: "biology"
+ description: "Biology and life sciences questions"
+ - name: "chemistry"
+ description: "Chemistry and chemical sciences questions"
+ - name: "history"
+ description: "Historical questions and cultural topics"
+ - name: "health"
+ description: "Health and medical information queries"
+ - name: "economics"
+ description: "Economics and financial topics"
+ - name: "math"
+ description: "Mathematics and quantitative reasoning"
+ - name: "physics"
+ description: "Physics and physical sciences"
+ - name: "computer science"
+ description: "Computer science and programming"
+ - name: "philosophy"
+ description: "Philosophy and ethical questions"
+ - name: "engineering"
+ description: "Engineering and technical problem-solving"
+ - name: "other"
+ description: "General knowledge and miscellaneous topics"
+
+ decisions:
+ - name: "business_decision"
+ priority: 10
+ description: "Business and management related queries"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: "base-model"
+ loraName: "social-expert"
+ useReasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
+ mode: "replace"
+
+ - name: "law_decision"
+ priority: 10
+ description: "Legal questions and law-related topics"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
+ - model: "base-model"
+ loraName: "law-expert"
+ useReasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
+ mode: "replace"
+
+ - name: "psychology_decision"
+ priority: 10
+ description: "Psychology and mental health topics"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "psychology"
+ modelRefs:
+ - model: "base-model"
+ loraName: "humanities-expert"
+ useReasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.92
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
+ mode: "replace"
+
+ - name: "biology_decision"
+ priority: 10
+ description: "Biology and life sciences questions"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "biology"
+ modelRefs:
+ - model: "base-model"
+ loraName: "science-expert"
+ useReasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
+ mode: "replace"
+
+ - name: "chemistry_decision"
+ priority: 10
+ description: "Chemistry and chemical sciences questions"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
+ - model: "base-model"
+ loraName: "science-expert"
+ useReasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
+ mode: "replace"
+
+ - name: "history_decision"
+ priority: 10
+ description: "Historical questions and cultural topics"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "history"
+ modelRefs:
+ - model: "base-model"
+ loraName: "humanities-expert"
+ useReasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
+ mode: "replace"
+
+ - name: "health_decision"
+ priority: 10
+ description: "Health and medical information queries"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
+ - model: "base-model"
+ loraName: "science-expert"
+ useReasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
+ mode: "replace"
+
+ - name: "economics_decision"
+ priority: 10
+ description: "Economics and financial topics"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "economics"
+ modelRefs:
+ - model: "base-model"
+ loraName: "social-expert"
+ useReasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
+ mode: "replace"
+
+ - name: "math_decision"
+ priority: 10
+ description: "Mathematics and quantitative reasoning"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: "base-model"
+ loraName: "math-expert"
+ useReasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
+ mode: "replace"
+
+ - name: "physics_decision"
+ priority: 10
+ description: "Physics and physical sciences"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
+ - model: "base-model"
+ loraName: "science-expert"
+ useReasoning: true
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
+ mode: "replace"
+
+ - name: "computer_science_decision"
+ priority: 10
+ description: "Computer science and programming"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "computer science"
+ modelRefs:
+ - model: "base-model"
+ loraName: "science-expert"
+ useReasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
+ mode: "replace"
+
+ - name: "philosophy_decision"
+ priority: 10
+ description: "Philosophy and ethical questions"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "philosophy"
+ modelRefs:
+ - model: "base-model"
+ loraName: "humanities-expert"
+ useReasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
+ mode: "replace"
+
+ - name: "engineering_decision"
+ priority: 10
+ description: "Engineering and technical problem-solving"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "engineering"
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: "base-model"
+ loraName: "science-expert"
+ useReasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ pii_types_allowed: []
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
+ mode: "replace"
diff --git a/e2e/profiles/dynamic-config/profile.go b/e2e/profiles/dynamic-config/profile.go
new file mode 100644
index 000000000..50bcffb1c
--- /dev/null
+++ b/e2e/profiles/dynamic-config/profile.go
@@ -0,0 +1,326 @@
+package dynamicconfig
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "os/exec"
+ "time"
+
+ "github.com/vllm-project/semantic-router/e2e/pkg/framework"
+ "github.com/vllm-project/semantic-router/e2e/pkg/helm"
+ "github.com/vllm-project/semantic-router/e2e/pkg/helpers"
+ "k8s.io/client-go/kubernetes"
+ "k8s.io/client-go/tools/clientcmd"
+
+ // Import testcases package to register all test cases via their init() functions
+ _ "github.com/vllm-project/semantic-router/e2e/testcases"
+)
+
+// Profile implements the Dynamic Config test profile
+// This profile tests Kubernetes CRD-based dynamic configuration
+type Profile struct {
+ verbose bool
+}
+
+// NewProfile creates a new Dynamic Config profile
+func NewProfile() *Profile {
+ return &Profile{}
+}
+
+// Name returns the profile name
+func (p *Profile) Name() string {
+ return "dynamic-config"
+}
+
+// Description returns a description of what this profile tests
+func (p *Profile) Description() string {
+ return "Tests Kubernetes CRD-based dynamic configuration with IntelligentPool and IntelligentRoute"
+}
+
+// Setup deploys all required components for Dynamic Config testing
+func (p *Profile) Setup(ctx context.Context, opts *framework.SetupOptions) error {
+ p.verbose = opts.Verbose
+ p.log("Setting up Dynamic Config test environment")
+
+ deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose)
+
+ // Step 1: Deploy Semantic Router with Kubernetes config source
+ p.log("Step 1/5: Deploying Semantic Router with Kubernetes config source")
+ if err := p.deploySemanticRouter(ctx, deployer, opts); err != nil {
+ return fmt.Errorf("failed to deploy semantic router: %w", err)
+ }
+
+ // Step 2: Deploy Envoy Gateway
+ p.log("Step 2/5: Deploying Envoy Gateway")
+ if err := p.deployEnvoyGateway(ctx, deployer, opts); err != nil {
+ return fmt.Errorf("failed to deploy envoy gateway: %w", err)
+ }
+
+ // Step 3: Deploy Envoy AI Gateway
+ p.log("Step 3/5: Deploying Envoy AI Gateway")
+ if err := p.deployEnvoyAIGateway(ctx, deployer, opts); err != nil {
+ return fmt.Errorf("failed to deploy envoy ai gateway: %w", err)
+ }
+
+ // Step 4: Deploy Demo LLM and Gateway API Resources
+ p.log("Step 4/5: Deploying Demo LLM and Gateway API Resources")
+ if err := p.deployGatewayResources(ctx, opts); err != nil {
+ return fmt.Errorf("failed to deploy gateway resources: %w", err)
+ }
+
+ // Step 5: Deploy CRDs (IntelligentPool and IntelligentRoute)
+ p.log("Step 5/5: Deploying IntelligentPool and IntelligentRoute CRDs")
+ if err := p.deployCRDs(ctx, opts); err != nil {
+ return fmt.Errorf("failed to deploy CRDs: %w", err)
+ }
+
+ // Step 6: Verify all components are ready
+ p.log("Step 6/6: Verifying all components are ready")
+ if err := p.verifyEnvironment(ctx, opts); err != nil {
+ return fmt.Errorf("failed to verify environment: %w", err)
+ }
+
+ p.log("Dynamic Config test environment setup complete")
+ return nil
+}
+
+// Teardown cleans up resources created during setup
+func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions) error {
+ p.verbose = opts.Verbose
+ p.log("Tearing down Dynamic Config test environment")
+
+ deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose)
+
+ // Uninstall in reverse order
+ _ = deployer.Uninstall(ctx, "envoy-ai-gateway", "envoy-ai-gateway-system")
+ _ = deployer.Uninstall(ctx, "eg", "envoy-gateway-system")
+ _ = deployer.Uninstall(ctx, "semantic-router", "vllm-semantic-router-system")
+
+ p.log("Dynamic Config test environment teardown complete")
+ return nil
+}
+
+// GetTestCases returns the list of test cases for this profile
+func (p *Profile) GetTestCases() []string {
+ return []string{
+ "chat-completions-request",
+ "chat-completions-stress-request",
+ "domain-classify",
+ "semantic-cache",
+ "pii-detection",
+ "jailbreak-detection",
+ "chat-completions-progressive-stress",
+ }
+}
+
+// GetServiceConfig returns the service configuration for accessing the deployed service
+func (p *Profile) GetServiceConfig() framework.ServiceConfig {
+ return framework.ServiceConfig{
+ LabelSelector: "gateway.envoyproxy.io/owning-gateway-namespace=default,gateway.envoyproxy.io/owning-gateway-name=semantic-router",
+ Namespace: "envoy-gateway-system",
+ PortMapping: "8080:80",
+ }
+}
+
+func (p *Profile) deploySemanticRouter(ctx context.Context, deployer *helm.Deployer, opts *framework.SetupOptions) error {
+ // Use local Helm chart with dynamic config values
+ chartPath := "deploy/helm/semantic-router"
+ valuesFile := "e2e/profiles/dynamic-config/values.yaml"
+
+ // Override image to use locally built image
+ imageRepo := "ghcr.io/vllm-project/semantic-router/extproc"
+ imageTag := opts.ImageTag
+
+ installOpts := helm.InstallOptions{
+ ReleaseName: "semantic-router",
+ Chart: chartPath,
+ Namespace: "vllm-semantic-router-system",
+ ValuesFiles: []string{valuesFile},
+ Set: map[string]string{
+ "image.repository": imageRepo,
+ "image.tag": imageTag,
+ "image.pullPolicy": "Never", // Use local image, don't pull from registry
+ },
+ Wait: true,
+ Timeout: "30m",
+ }
+
+ if err := deployer.Install(ctx, installOpts); err != nil {
+ return err
+ }
+
+ return deployer.WaitForDeployment(ctx, "vllm-semantic-router-system", "semantic-router", 10*time.Minute)
+}
+
+func (p *Profile) deployEnvoyGateway(ctx context.Context, deployer *helm.Deployer, _ *framework.SetupOptions) error {
+ installOpts := helm.InstallOptions{
+ ReleaseName: "eg",
+ Chart: "oci://docker.io/envoyproxy/gateway-helm",
+ Namespace: "envoy-gateway-system",
+ Version: "v0.0.0-latest",
+ ValuesFiles: []string{"https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/manifests/envoy-gateway-values.yaml"},
+ Wait: true,
+ Timeout: "10m",
+ }
+
+ if err := deployer.Install(ctx, installOpts); err != nil {
+ return err
+ }
+
+ return deployer.WaitForDeployment(ctx, "envoy-gateway-system", "envoy-gateway", 10*time.Minute)
+}
+
+func (p *Profile) deployEnvoyAIGateway(ctx context.Context, deployer *helm.Deployer, _ *framework.SetupOptions) error {
+ // Install AI Gateway CRDs
+ crdOpts := helm.InstallOptions{
+ ReleaseName: "aieg-crd",
+ Chart: "oci://docker.io/envoyproxy/ai-gateway-crds-helm",
+ Namespace: "envoy-ai-gateway-system",
+ Version: "v0.0.0-latest",
+ Wait: true,
+ Timeout: "10m",
+ }
+
+ if err := deployer.Install(ctx, crdOpts); err != nil {
+ return err
+ }
+
+ installOpts := helm.InstallOptions{
+ ReleaseName: "envoy-ai-gateway",
+ Chart: "oci://docker.io/envoyproxy/ai-gateway-helm",
+ Namespace: "envoy-ai-gateway-system",
+ Version: "v0.0.0-latest",
+ Wait: true,
+ Timeout: "10m",
+ }
+
+ if err := deployer.Install(ctx, installOpts); err != nil {
+ return err
+ }
+
+ return deployer.WaitForDeployment(ctx, "envoy-ai-gateway-system", "ai-gateway-controller", 10*time.Minute)
+}
+
+func (p *Profile) deployGatewayResources(ctx context.Context, opts *framework.SetupOptions) error {
+ // Apply base model
+ if err := p.kubectlApply(ctx, opts.KubeConfig, "deploy/kubernetes/ai-gateway/aigw-resources/base-model.yaml"); err != nil {
+ return fmt.Errorf("failed to apply base model: %w", err)
+ }
+
+ // Apply gateway API resources
+ if err := p.kubectlApply(ctx, opts.KubeConfig, "deploy/kubernetes/ai-gateway/aigw-resources/gwapi-resources.yaml"); err != nil {
+ return fmt.Errorf("failed to apply gateway API resources: %w", err)
+ }
+
+ return nil
+}
+
+func (p *Profile) deployCRDs(ctx context.Context, opts *framework.SetupOptions) error {
+ // Apply IntelligentPool CRD
+ if err := p.kubectlApply(ctx, opts.KubeConfig, "e2e/profiles/dynamic-config/crds/intelligentpool.yaml"); err != nil {
+ return fmt.Errorf("failed to apply IntelligentPool CRD: %w", err)
+ }
+
+ // Apply IntelligentRoute CRD
+ if err := p.kubectlApply(ctx, opts.KubeConfig, "e2e/profiles/dynamic-config/crds/intelligentroute.yaml"); err != nil {
+ return fmt.Errorf("failed to apply IntelligentRoute CRD: %w", err)
+ }
+
+ // Wait a bit for CRDs to be processed
+ time.Sleep(5 * time.Second)
+
+ return nil
+}
+
+func (p *Profile) kubectlApply(ctx context.Context, kubeconfig, manifestPath string) error {
+ cmd := exec.CommandContext(ctx, "kubectl", "apply", "-f", manifestPath, "--kubeconfig", kubeconfig)
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ return cmd.Run()
+}
+
+func (p *Profile) verifyEnvironment(ctx context.Context, opts *framework.SetupOptions) error {
+ // Create Kubernetes client
+ config, err := clientcmd.BuildConfigFromFlags("", opts.KubeConfig)
+ if err != nil {
+ return fmt.Errorf("failed to build kubeconfig: %w", err)
+ }
+
+ client, err := kubernetes.NewForConfig(config)
+ if err != nil {
+ return fmt.Errorf("failed to create kube client: %w", err)
+ }
+
+ // Wait for Envoy Gateway service to be ready with retry
+ retryTimeout := 10 * time.Minute
+ retryInterval := 5 * time.Second
+ startTime := time.Now()
+
+ p.log("Waiting for Envoy Gateway service to be ready...")
+
+ // Label selector for the semantic-router gateway service
+ labelSelector := "gateway.envoyproxy.io/owning-gateway-namespace=default,gateway.envoyproxy.io/owning-gateway-name=semantic-router"
+
+ var envoyService string
+ for {
+ // Try to get Envoy service name
+ envoyService, err = helpers.GetEnvoyServiceName(ctx, client, labelSelector, p.verbose)
+ if err == nil {
+ // Verify that the service has exactly 1 pod running with all containers ready
+ podErr := helpers.VerifyServicePodsRunning(ctx, client, "envoy-gateway-system", envoyService, p.verbose)
+ if podErr == nil {
+ p.log("Envoy Gateway service is ready: %s", envoyService)
+ break
+ }
+ if p.verbose {
+ p.log("Envoy service found but pods not ready: %v", podErr)
+ }
+ err = fmt.Errorf("service pods not ready: %w", podErr)
+ }
+
+ if time.Since(startTime) >= retryTimeout {
+ return fmt.Errorf("failed to get Envoy service with running pods after %v: %w", retryTimeout, err)
+ }
+
+ if p.verbose {
+ p.log("Envoy service not ready, retrying in %v... (elapsed: %v)",
+ retryInterval, time.Since(startTime).Round(time.Second))
+ }
+
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-time.After(retryInterval):
+ // Continue retry
+ }
+ }
+
+ // Check all deployments are healthy
+ p.log("Verifying all deployments are healthy...")
+
+ // Check semantic-router deployment
+ if err := helpers.CheckDeployment(ctx, client, "vllm-semantic-router-system", "semantic-router", p.verbose); err != nil {
+ return fmt.Errorf("semantic-router deployment not healthy: %w", err)
+ }
+
+ // Check envoy-gateway deployment
+ if err := helpers.CheckDeployment(ctx, client, "envoy-gateway-system", "envoy-gateway", p.verbose); err != nil {
+ return fmt.Errorf("envoy-gateway deployment not healthy: %w", err)
+ }
+
+ // Check ai-gateway-controller deployment
+ if err := helpers.CheckDeployment(ctx, client, "envoy-ai-gateway-system", "ai-gateway-controller", p.verbose); err != nil {
+ return fmt.Errorf("ai-gateway-controller deployment not healthy: %w", err)
+ }
+
+ p.log("All deployments are healthy")
+
+ return nil
+}
+
+func (p *Profile) log(format string, args ...interface{}) {
+ if p.verbose {
+ fmt.Printf("[dynamic-config] "+format+"\n", args...)
+ }
+}
diff --git a/e2e/profiles/dynamic-config/values.yaml b/e2e/profiles/dynamic-config/values.yaml
new file mode 100644
index 000000000..af8d9ee71
--- /dev/null
+++ b/e2e/profiles/dynamic-config/values.yaml
@@ -0,0 +1,143 @@
+# Semantic Router Configuration for Dynamic Config E2E Testing
+# This configuration uses Kubernetes CRDs for dynamic configuration
+# Static parts are defined here, dynamic parts (model_config, decisions, categories) come from CRDs
+
+config:
+ # Set config source to kubernetes to enable CRD-based configuration
+ config_source: kubernetes
+
+ # Static configuration - these are not managed by CRDs
+
+ bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+
+ semantic_cache:
+ enabled: true
+ backend_type: "memory"
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: "fifo"
+ use_hnsw: true
+ hnsw_m: 16
+ hnsw_ef_construction: 200
+ embedding_model: "bert"
+
+ tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: "config/tools_db.json"
+ fallback_to_empty: true
+
+ prompt_guard:
+ enabled: true
+ use_modernbert: true
+ model_id: "models/jailbreak_classifier_modernbert-base_model"
+ threshold: 0.7
+ use_cpu: true
+ jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
+
+ classifier:
+ category_model:
+ model_id: "models/category_classifier_modernbert-base_model"
+ use_modernbert: true
+ threshold: 0.6
+ use_cpu: true
+ category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
+ pii_model:
+ model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
+ use_modernbert: true
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
+
+ router:
+ high_confidence_threshold: 0.99
+ low_latency_threshold_ms: 2000
+ lora_baseline_score: 0.8
+ traditional_baseline_score: 0.7
+ embedding_baseline_score: 0.75
+ success_confidence_threshold: 0.8
+ large_batch_threshold: 4
+ lora_default_execution_time_ms: 1345
+ traditional_default_execution_time_ms: 4567
+ default_confidence_threshold: 0.95
+ default_max_latency_ms: 5000
+ default_batch_size: 4
+ default_avg_execution_time_ms: 3000
+ lora_default_confidence: 0.99
+ traditional_default_confidence: 0.95
+ lora_default_success_rate: 0.98
+ traditional_default_success_rate: 0.95
+ multi_task_lora_weight: 0.30
+ single_task_traditional_weight: 0.30
+ large_batch_lora_weight: 0.25
+ small_batch_traditional_weight: 0.25
+ medium_batch_weight: 0.10
+ high_confidence_lora_weight: 0.25
+ low_confidence_traditional_weight: 0.25
+ low_latency_lora_weight: 0.30
+ high_latency_traditional_weight: 0.10
+ performance_history_weight: 0.20
+ traditional_bert_confidence_threshold: 0.95
+ traditional_modernbert_confidence_threshold: 0.8
+ traditional_pii_detection_threshold: 0.5
+ traditional_token_classification_threshold: 0.9
+ traditional_dropout_prob: 0.1
+ traditional_attention_dropout_prob: 0.1
+ tie_break_confidence: 0.5
+
+ reasoning_families:
+ deepseek:
+ type: "chat_template_kwargs"
+ parameter: "thinking"
+ qwen3:
+ type: "chat_template_kwargs"
+ parameter: "enable_thinking"
+ gpt-oss:
+ type: "reasoning_effort"
+ parameter: "reasoning_effort"
+ gpt:
+ type: "reasoning_effort"
+ parameter: "reasoning_effort"
+
+ default_reasoning_effort: high
+
+ api:
+ batch_classification:
+ max_batch_size: 100
+ concurrency_threshold: 5
+ max_concurrency: 8
+ metrics:
+ enabled: true
+ detailed_goroutine_tracking: true
+ high_resolution_timing: false
+ sample_rate: 1.0
+ duration_buckets:
+ [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
+ size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
+
+ embedding_models:
+ qwen3_model_path: "models/Qwen3-Embedding-0.6B"
+ gemma_model_path: "models/embeddinggemma-300m"
+ use_cpu: true
+
+ observability:
+ tracing:
+ enabled: false
+ provider: "opentelemetry"
+ exporter:
+ type: "otlp"
+ endpoint: "jaeger:4317"
+ insecure: true
+ sampling:
+ type: "always_on"
+ rate: 1.0
+ resource:
+ service_name: "vllm-semantic-router"
+ service_version: "v0.1.0"
+ deployment_environment: "development"
+
diff --git a/examples/semanticroute/README.md b/examples/semanticroute/README.md
deleted file mode 100644
index d232b3c19..000000000
--- a/examples/semanticroute/README.md
+++ /dev/null
@@ -1,179 +0,0 @@
-# SemanticRoute Examples
-
-This directory contains various examples of SemanticRoute configurations demonstrating different routing scenarios and capabilities.
-
-## Examples Overview
-
-### 1. Simple Intent Routing (`simple-intent-routing.yaml`)
-
-A basic example showing intent-based routing for math and computer science queries.
-
-**Features:**
-
-- Simple intent matching with categories
-- Single model reference with fallback
-- Minimal configuration
-
-**Use Case:** Basic routing based on query categories without complex filtering.
-
-### 2. Complex Filter Chain (`complex-filter-chain.yaml`)
-
-Demonstrates a comprehensive filter chain with multiple security and performance filters.
-
-**Features:**
-
-- PII detection with custom allowed types
-- Prompt guard with custom security rules
-- Semantic caching for performance
-- Reasoning control configuration
-
-**Use Case:** Production environments requiring security, privacy, and performance optimizations.
-
-### 3. Multiple Routes (`multiple-routes.yaml`)
-
-Shows how to define multiple routing rules within a single SemanticRoute resource.
-
-**Features:**
-
-- Separate rules for technical vs. creative queries
-- Different reasoning configurations per rule
-- Rule-specific caching strategies
-
-**Use Case:** Applications serving diverse query types with different processing requirements.
-
-### 4. Weighted Routing (`weighted-routing.yaml`)
-
-Demonstrates traffic distribution across multiple model endpoints using weights and priorities.
-
-**Features:**
-
-- Traffic splitting (80/20) between models
-- Priority-based failover
-- Load balancing configuration
-
-**Use Case:** A/B testing, gradual rollouts, or load distribution across model endpoints.
-
-### 5. Tool Selection Example (`tool-selection-example.yaml`)
-
-Demonstrates automatic tool selection based on semantic similarity to user queries.
-
-**Features:**
-
-- Automatic tool selection with configurable similarity threshold
-- Tool filtering by categories and tags
-- Fallback behavior configuration
-- Integration with semantic caching and reasoning control
-
-**Use Case:** Applications requiring dynamic tool selection based on user intent and query content.
-
-### 6. Comprehensive Example (`comprehensive-example.yaml`)
-
-A production-ready configuration showcasing all SemanticRoute features.
-
-**Features:**
-
-- Multiple rules with different configurations
-- Advanced filtering with custom rules
-- External cache backend (Redis)
-- High-availability model setup
-- Comprehensive security policies
-
-**Use Case:** Enterprise production deployments requiring full feature utilization.
-
-## Deployment Instructions
-
-### Prerequisites
-
-1. Kubernetes cluster with SemanticRoute CRD installed:
-
- ```bash
- kubectl apply -f ../../deploy/kubernetes/crds/vllm.ai_semanticroutes.yaml
- ```
-
-2. Ensure your model endpoints are accessible from the cluster.
-
-### Deploy Examples
-
-1. **Deploy a single example:**
-
- ```bash
- kubectl apply -f simple-intent-routing.yaml
- ```
-
-2. **Deploy all examples:**
-
- ```bash
- kubectl apply -f .
- ```
-
-3. **Verify deployment:**
-
- ```bash
- kubectl get semanticroutes
- kubectl describe semanticroute reasoning-route
- ```
-
-## Configuration Reference
-
-### Intent Configuration
-
-```yaml
-intents:
-- category: "math" # Required: Intent category name
- description: "Mathematics queries" # Optional: Human-readable description
- threshold: 0.7 # Optional: Confidence threshold (0.0-1.0)
-```
-
-### Model Reference Configuration
-
-```yaml
-modelRefs:
-- modelName: "gpt-oss" # Required: Model identifier
- address: "127.0.0.1" # Required: Endpoint address
- port: 8080 # Required: Endpoint port
- weight: 80 # Optional: Traffic weight (0-100)
- priority: 100 # Optional: Priority for failover
-```
-
-### Filter Configuration
-
-Each filter type has specific configuration options:
-
-- **PIIDetection**: Controls PII detection and handling
-- **PromptGuard**: Provides security and jailbreak protection
-- **SemanticCache**: Enables response caching for performance
-- **ReasoningControl**: Manages reasoning mode behavior
-- **ToolSelection**: Enables automatic tool selection based on semantic similarity
-
-## Best Practices
-
-1. **Start Simple**: Begin with basic intent routing and add filters as needed.
-
-2. **Test Thoroughly**: Validate routing behavior with representative queries.
-
-3. **Monitor Performance**: Use appropriate cache settings and monitor hit rates.
-
-4. **Security First**: Enable PII detection and prompt guard in production.
-
-5. **Gradual Rollout**: Use weighted routing for safe model deployments.
-
-## Troubleshooting
-
-### Common Issues
-
-1. **Route Not Matching**: Check intent categories and thresholds.
-2. **Model Unreachable**: Verify endpoint addresses and network connectivity.
-3. **Filter Errors**: Validate filter configurations against the schema.
-
-### Debugging Commands
-
-```bash
-# Check SemanticRoute status
-kubectl get sr -o wide
-
-# View detailed configuration
-kubectl describe semanticroute
-
-# Check logs (if controller is deployed)
-kubectl logs -l app=semantic-router-controller
-```
diff --git a/examples/semanticroute/complex-filter-chain.yaml b/examples/semanticroute/complex-filter-chain.yaml
deleted file mode 100644
index 6eabacfce..000000000
--- a/examples/semanticroute/complex-filter-chain.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-apiVersion: vllm.ai/v1alpha1
-kind: SemanticRoute
-metadata:
- name: complex-route
- namespace: default
- labels:
- app: semantic-router
- scenario: complex-filter-chain
-spec:
- rules:
- - intents:
- - category: "computer science"
- description: "Programming, algorithms, data structures"
- threshold: 0.7
- - category: "math"
- description: "Mathematics, calculus, algebra"
- threshold: 0.7
- modelRefs:
- - modelName: gpt-oss
- address: 127.0.0.1
- port: 8080
- weight: 100
- filters:
- - type: PIIDetection
- enabled: true
- config:
- allowByDefault: false
- pii_types_allowed: ["EMAIL_ADDRESS", "PERSON"]
- threshold: 0.7
- action: "block"
- - type: PromptGuard
- enabled: true
- config:
- threshold: 0.7
- action: "block"
- customRules:
- - name: "sensitive-data-rule"
- pattern: "(?i)(password|secret|token|key)"
- action: "block"
- description: "Block requests containing sensitive data keywords"
- - type: SemanticCache
- enabled: true
- config:
- similarityThreshold: 0.8
- maxEntries: 1000
- ttlSeconds: 3600
- backend: "memory"
- - type: ReasoningControl
- enabled: true
- config:
- reasonFamily: "gpt-oss"
- enableReasoning: true
- reasoningEffort: "medium"
- maxReasoningSteps: 10
- reasoningTimeout: 30
- defaultModel:
- modelName: deepseek-v31
- address: 127.0.0.1
- port: 8088
diff --git a/examples/semanticroute/comprehensive-example.yaml b/examples/semanticroute/comprehensive-example.yaml
deleted file mode 100644
index aa3c39093..000000000
--- a/examples/semanticroute/comprehensive-example.yaml
+++ /dev/null
@@ -1,109 +0,0 @@
-apiVersion: vllm.ai/v1alpha1
-kind: SemanticRoute
-metadata:
- name: comprehensive-example
- namespace: default
- labels:
- app: semantic-router
- scenario: comprehensive
- environment: production
-spec:
- rules:
- # Rule 1: High-performance reasoning route for technical queries
- - intents:
- - category: "computer science"
- description: "Programming, algorithms, software engineering"
- threshold: 0.75
- - category: "math"
- description: "Advanced mathematics, calculus, statistics"
- threshold: 0.75
- modelRefs:
- - modelName: gpt-oss-premium
- address: 127.0.0.1
- port: 8080
- weight: 70
- priority: 100
- - modelName: claude-reasoning
- address: 127.0.0.1
- port: 8082
- weight: 30
- priority: 95
- filters:
- - type: PIIDetection
- enabled: true
- config:
- allowByDefault: false
- pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE"]
- threshold: 0.8
- action: "block"
- - type: PromptGuard
- enabled: true
- config:
- threshold: 0.75
- action: "block"
- customRules:
- - name: "code-injection-rule"
- pattern: "(?i)(eval|exec|system|shell|cmd)"
- action: "warn"
- description: "Detect potential code injection attempts"
- - type: SemanticCache
- enabled: true
- config:
- similarityThreshold: 0.85
- maxEntries: 2000
- ttlSeconds: 7200
- backend: "redis"
- backendConfig:
- host: "redis.cache.svc.cluster.local"
- port: "6379"
- - type: ReasoningControl
- enabled: true
- config:
- reasonFamily: "gpt-oss"
- enableReasoning: true
- reasoningEffort: "high"
- maxReasoningSteps: 20
- reasoningTimeout: 60
- defaultModel:
- modelName: deepseek-v31
- address: 127.0.0.1
- port: 8088
-
- # Rule 2: Creative and general purpose route
- - intents:
- - category: "creative"
- description: "Creative writing, storytelling, art generation"
- threshold: 0.6
- - category: "other"
- description: "General purpose conversations"
- threshold: 0.5
- modelRefs:
- - modelName: creative-model
- address: 127.0.0.1
- port: 8081
- weight: 100
- filters:
- - type: PIIDetection
- enabled: true
- config:
- allowByDefault: true
- pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]
- threshold: 0.7
- action: "mask"
- - type: ReasoningControl
- enabled: true
- config:
- reasonFamily: "gpt-oss"
- enableReasoning: false
- reasoningEffort: "low"
- - type: SemanticCache
- enabled: true
- config:
- similarityThreshold: 0.75
- maxEntries: 1000
- ttlSeconds: 3600
- backend: "memory"
- defaultModel:
- modelName: general-model
- address: 127.0.0.1
- port: 8089
diff --git a/examples/semanticroute/multiple-routes.yaml b/examples/semanticroute/multiple-routes.yaml
deleted file mode 100644
index e35ee9e39..000000000
--- a/examples/semanticroute/multiple-routes.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-apiVersion: vllm.ai/v1alpha1
-kind: SemanticRoute
-metadata:
- name: multiple-routes
- namespace: default
- labels:
- app: semantic-router
- scenario: multiple-routes
-spec:
- rules:
- # Rule 1: Reasoning-enabled route for technical queries
- - intents:
- - category: "computer science"
- description: "Programming, algorithms, data structures"
- threshold: 0.7
- - category: "math"
- description: "Mathematics, calculus, algebra"
- threshold: 0.7
- modelRefs:
- - modelName: gpt-oss
- address: 127.0.0.1
- port: 8080
- weight: 100
- filters:
- - type: ReasoningControl
- enabled: true
- config:
- reasonFamily: "gpt-oss"
- enableReasoning: true
- reasoningEffort: "high"
- maxReasoningSteps: 15
- - type: SemanticCache
- enabled: true
- config:
- similarityThreshold: 0.85
- maxEntries: 500
- ttlSeconds: 7200
- defaultModel:
- modelName: deepseek-v31
- address: 127.0.0.1
- port: 8088
-
- # Rule 2: Lightweight route for creative and general queries
- - intents:
- - category: "creative"
- description: "Creative writing, storytelling, art"
- threshold: 0.6
- - category: "other"
- description: "General purpose queries"
- threshold: 0.5
- modelRefs:
- - modelName: lightweight-model
- address: 127.0.0.1
- port: 8081
- weight: 100
- filters:
- - type: ReasoningControl
- enabled: true
- config:
- reasonFamily: "gpt-oss"
- enableReasoning: false
- reasoningEffort: "low"
- - type: PIIDetection
- enabled: true
- config:
- allowByDefault: true
- threshold: 0.8
- action: "mask"
- defaultModel:
- modelName: general-model
- address: 127.0.0.1
- port: 8089
diff --git a/examples/semanticroute/simple-intent-routing.yaml b/examples/semanticroute/simple-intent-routing.yaml
deleted file mode 100644
index 99abd5b06..000000000
--- a/examples/semanticroute/simple-intent-routing.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-apiVersion: vllm.ai/v1alpha1
-kind: SemanticRoute
-metadata:
- name: reasoning-route
- namespace: default
- labels:
- app: semantic-router
- scenario: simple-intent
-spec:
- rules:
- - intents:
- - category: "computer science"
- description: "Programming, algorithms, data structures, software engineering"
- threshold: 0.7
- - category: "math"
- description: "Mathematics, calculus, algebra, statistics"
- threshold: 0.7
- modelRefs:
- - modelName: gpt-oss
- address: 127.0.0.1
- port: 8080
- weight: 100
- defaultModel:
- modelName: deepseek-v31
- address: 127.0.0.1
- port: 8088
diff --git a/examples/semanticroute/tool-selection-example.yaml b/examples/semanticroute/tool-selection-example.yaml
deleted file mode 100644
index 7cd4c2195..000000000
--- a/examples/semanticroute/tool-selection-example.yaml
+++ /dev/null
@@ -1,50 +0,0 @@
-apiVersion: vllm.ai/v1alpha1
-kind: SemanticRoute
-metadata:
- name: tool-selection-example
- namespace: default
- labels:
- app: semantic-router
- scenario: tool-selection
-spec:
- rules:
- - intents:
- - category: "computer science"
- description: "Programming, algorithms, data structures"
- threshold: 0.7
- - category: "math"
- description: "Mathematics, calculus, algebra"
- threshold: 0.7
- modelRefs:
- - modelName: gpt-oss
- address: 127.0.0.1
- port: 8080
- weight: 100
- filters:
- - type: ToolSelection
- enabled: true
- config:
- topK: 3
- similarityThreshold: 0.8
- toolsDBPath: "config/tools_db.json"
- fallbackToEmpty: true
- categories: ["weather", "calculation", "search"]
- tags: ["utility", "api", "function"]
- - type: SemanticCache
- enabled: true
- config:
- similarityThreshold: 0.85
- maxEntries: 1000
- ttlSeconds: 3600
- backend: "memory"
- - type: ReasoningControl
- enabled: true
- config:
- reasonFamily: "gpt-oss"
- enableReasoning: true
- reasoningEffort: "medium"
- maxReasoningSteps: 10
- defaultModel:
- modelName: deepseek-v31
- address: 127.0.0.1
- port: 8088
diff --git a/examples/semanticroute/weighted-routing.yaml b/examples/semanticroute/weighted-routing.yaml
deleted file mode 100644
index 19f381cba..000000000
--- a/examples/semanticroute/weighted-routing.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-apiVersion: vllm.ai/v1alpha1
-kind: SemanticRoute
-metadata:
- name: weighted-routing
- namespace: default
- labels:
- app: semantic-router
- scenario: weighted-routing
-spec:
- rules:
- - intents:
- - category: "computer science"
- description: "Programming, algorithms, data structures"
- threshold: 0.7
- - category: "math"
- description: "Mathematics, calculus, algebra"
- threshold: 0.7
- modelRefs:
- # Primary model gets 80% of traffic
- - modelName: gpt-oss
- address: 127.0.0.1
- port: 8080
- weight: 80
- priority: 100
- # Secondary model gets 20% of traffic
- - modelName: qwen3
- address: 127.0.0.1
- port: 8089
- weight: 20
- priority: 90
- filters:
- - type: ReasoningControl
- enabled: true
- config:
- reasonFamily: "gpt-oss"
- enableReasoning: true
- reasoningEffort: "medium"
- maxReasoningSteps: 10
- - type: SemanticCache
- enabled: true
- config:
- similarityThreshold: 0.8
- maxEntries: 1000
- ttlSeconds: 3600
- backend: "memory"
- defaultModel:
- modelName: deepseek-v31
- address: 127.0.0.1
- port: 8088
diff --git a/src/semantic-router/cmd/main.go b/src/semantic-router/cmd/main.go
index 2b0569560..9a46583bf 100644
--- a/src/semantic-router/cmd/main.go
+++ b/src/semantic-router/cmd/main.go
@@ -16,6 +16,7 @@ import (
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/apiserver"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc"
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/k8s"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/tracing"
)
@@ -31,6 +32,8 @@ func main() {
enableSystemPromptAPI = flag.Bool("enable-system-prompt-api", false, "Enable system prompt configuration endpoints (SECURITY: only enable in trusted environments)")
secure = flag.Bool("secure", false, "Enable secure gRPC server with TLS")
certPath = flag.String("cert-path", "", "Path to TLS certificate directory (containing tls.crt and tls.key)")
+ kubeconfig = flag.String("kubeconfig", "", "Path to kubeconfig file (optional, uses in-cluster config if not specified)")
+ namespace = flag.String("namespace", "default", "Kubernetes namespace to watch for CRDs")
)
flag.Parse()
@@ -51,6 +54,10 @@ func main() {
logging.Fatalf("Failed to load config: %v", err)
}
+ // Set the initial configuration in the global config
+ // This is important for Kubernetes mode where the controller will update it
+ config.Replace(cfg)
+
// Initialize distributed tracing if enabled
ctx := context.Background()
if cfg.Observability.Tracing.Enabled {
@@ -114,10 +121,8 @@ func main() {
logging.Infof("Starting vLLM Semantic Router ExtProc with config: %s", *configPath)
// Initialize embedding models if configured (Long-context support)
- cfg, err = config.Load(*configPath)
- if err != nil {
- logging.Warnf("Failed to load config for embedding models: %v", err)
- } else if cfg.Qwen3ModelPath != "" || cfg.GemmaModelPath != "" {
+ // Use the already loaded config instead of calling config.Load() again
+ if cfg.Qwen3ModelPath != "" || cfg.GemmaModelPath != "" {
logging.Infof("Initializing embedding models...")
logging.Infof(" Qwen3 model: %s", cfg.Qwen3ModelPath)
logging.Infof(" Gemma model: %s", cfg.GemmaModelPath)
@@ -152,7 +157,43 @@ func main() {
}()
}
+ // Start Kubernetes controller if ConfigSource is kubernetes
+ if cfg.ConfigSource == config.ConfigSourceKubernetes {
+ logging.Infof("ConfigSource is kubernetes, starting Kubernetes controller")
+ go startKubernetesController(cfg, *kubeconfig, *namespace)
+ } else {
+ logging.Infof("ConfigSource is file (or not specified), using file-based configuration")
+ }
+
if err := server.Start(); err != nil {
logging.Fatalf("ExtProc server error: %v", err)
}
}
+
+// startKubernetesController starts the Kubernetes controller for watching CRDs
+func startKubernetesController(staticConfig *config.RouterConfig, kubeconfig, namespace string) {
+ // Import k8s package here to avoid import errors when k8s dependencies are not available
+ // This is a lazy import pattern
+ logging.Infof("Initializing Kubernetes controller for namespace: %s", namespace)
+
+ logging.Infof("Starting Kubernetes controller for namespace: %s", namespace)
+
+ controller, err := k8s.NewController(k8s.ControllerConfig{
+ Namespace: namespace,
+ Kubeconfig: kubeconfig,
+ StaticConfig: staticConfig,
+ OnConfigUpdate: func(newConfig *config.RouterConfig) error {
+ config.Replace(newConfig)
+ logging.Infof("Configuration updated from Kubernetes CRDs")
+ return nil
+ },
+ })
+ if err != nil {
+ logging.Fatalf("Failed to create Kubernetes controller: %v", err)
+ }
+
+ ctx := context.Background()
+ if err := controller.Start(ctx); err != nil {
+ logging.Fatalf("Kubernetes controller error: %v", err)
+ }
+}
diff --git a/src/semantic-router/deploy/helm/semantic-router/templates/vllm.ai_intelligentpools.yaml b/src/semantic-router/deploy/helm/semantic-router/templates/vllm.ai_intelligentpools.yaml
new file mode 100644
index 000000000..7ffbd0a24
--- /dev/null
+++ b/src/semantic-router/deploy/helm/semantic-router/templates/vllm.ai_intelligentpools.yaml
@@ -0,0 +1,202 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.19.0
+ name: intelligentpools.vllm.ai
+spec:
+ group: vllm.ai
+ names:
+ kind: IntelligentPool
+ listKind: IntelligentPoolList
+ plural: intelligentpools
+ shortNames:
+ - ipool
+ singular: intelligentpool
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - description: Default model name
+ jsonPath: .spec.defaultModel
+ name: Default Model
+ type: string
+ - description: Number of models
+ jsonPath: .status.modelCount
+ name: Models
+ type: integer
+ - description: Ready status
+ jsonPath: .status.conditions[?(@.type=='Ready')].status
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: IntelligentPool defines a pool of models with their configurations
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: IntelligentPoolSpec defines the desired state of IntelligentPool
+ properties:
+ defaultModel:
+ description: DefaultModel specifies the default model to use when
+ no specific model is selected
+ maxLength: 100
+ minLength: 1
+ type: string
+ models:
+ description: Models defines the list of available models in this pool
+ items:
+ description: ModelConfig defines the configuration for a single
+ model
+ properties:
+ loras:
+ description: LoRAs defines the list of LoRA adapters available
+ for this model
+ items:
+ description: LoRAConfig defines a LoRA adapter configuration
+ properties:
+ description:
+ description: Description provides a human-readable description
+ of this LoRA adapter
+ maxLength: 500
+ type: string
+ name:
+ description: Name is the unique identifier for this LoRA
+ adapter
+ maxLength: 100
+ minLength: 1
+ type: string
+ required:
+ - name
+ type: object
+ maxItems: 50
+ type: array
+ name:
+ description: Name is the unique identifier for this model
+ maxLength: 100
+ minLength: 1
+ type: string
+ pricing:
+ description: Pricing defines the cost structure for this model
+ properties:
+ inputTokenPrice:
+ description: InputTokenPrice is the cost per input token
+ minimum: 0
+ type: number
+ outputTokenPrice:
+ description: OutputTokenPrice is the cost per output token
+ minimum: 0
+ type: number
+ type: object
+ reasoningFamily:
+ description: |-
+ ReasoningFamily specifies the reasoning syntax family (e.g., "qwen3", "deepseek")
+ Must be defined in the global static configuration's ReasoningFamilies
+ maxLength: 50
+ type: string
+ required:
+ - name
+ type: object
+ maxItems: 100
+ minItems: 1
+ type: array
+ required:
+ - defaultModel
+ - models
+ type: object
+ status:
+ description: IntelligentPoolStatus defines the observed state of IntelligentPool
+ properties:
+ conditions:
+ description: Conditions represent the latest available observations
+ of the IntelligentPool's state
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ modelCount:
+ description: ModelCount indicates the number of models in the pool
+ format: int32
+ type: integer
+ observedGeneration:
+ description: ObservedGeneration reflects the generation of the most
+ recently observed IntelligentPool
+ format: int64
+ type: integer
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/src/semantic-router/deploy/helm/semantic-router/templates/vllm.ai_intelligentroutes.yaml b/src/semantic-router/deploy/helm/semantic-router/templates/vllm.ai_intelligentroutes.yaml
new file mode 100644
index 000000000..487f41aff
--- /dev/null
+++ b/src/semantic-router/deploy/helm/semantic-router/templates/vllm.ai_intelligentroutes.yaml
@@ -0,0 +1,369 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.19.0
+ name: intelligentroutes.vllm.ai
+spec:
+ group: vllm.ai
+ names:
+ kind: IntelligentRoute
+ listKind: IntelligentRouteList
+ plural: intelligentroutes
+ shortNames:
+ - iroute
+ singular: intelligentroute
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - description: Decision strategy
+ jsonPath: .spec.strategy
+ name: Strategy
+ type: string
+ - description: Number of decisions
+ jsonPath: .status.decisionCount
+ name: Decisions
+ type: integer
+ - description: Ready status
+ jsonPath: .status.conditions[?(@.type=='Ready')].status
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: IntelligentRoute defines intelligent routing rules and decisions
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: IntelligentRouteSpec defines the desired state of IntelligentRoute
+ properties:
+ decisions:
+ description: Decisions defines the routing decisions based on signal
+ combinations
+ items:
+ description: Decision defines a routing decision based on rule combinations
+ properties:
+ description:
+ description: Description provides a human-readable description
+ of this decision
+ maxLength: 500
+ type: string
+ modelRefs:
+ description: ModelRefs defines the model references for this
+ decision (currently only one model is supported)
+ items:
+ description: ModelRef defines a model reference without score
+ properties:
+ loraName:
+ description: LoRAName is the name of the LoRA adapter
+ to use (must exist in the model's LoRAs)
+ maxLength: 100
+ type: string
+ model:
+ description: Model is the name of the model (must exist
+ in IntelligentPool)
+ maxLength: 100
+ minLength: 1
+ type: string
+ reasoningDescription:
+ description: ReasoningDescription provides context for
+ when to use reasoning
+ maxLength: 500
+ type: string
+ reasoningEffort:
+ description: ReasoningEffort defines the reasoning effort
+ level (low/medium/high)
+ enum:
+ - low
+ - medium
+ - high
+ type: string
+ useReasoning:
+ default: false
+ description: UseReasoning specifies whether to enable
+ reasoning mode for this model
+ type: boolean
+ required:
+ - model
+ type: object
+ maxItems: 1
+ minItems: 1
+ type: array
+ name:
+ description: Name is the unique identifier for this decision
+ maxLength: 100
+ minLength: 1
+ type: string
+ plugins:
+ description: Plugins defines the plugins to apply for this decision
+ items:
+ description: DecisionPlugin defines a plugin configuration
+ for a decision
+ properties:
+ configuration:
+ description: Configuration is the plugin-specific configuration
+ as a raw JSON object
+ x-kubernetes-preserve-unknown-fields: true
+ type:
+ description: Type is the plugin type (semantic-cache,
+ jailbreak, pii, system_prompt, header_mutation)
+ enum:
+ - semantic-cache
+ - jailbreak
+ - pii
+ - system_prompt
+ - header_mutation
+ type: string
+ required:
+ - type
+ type: object
+ maxItems: 10
+ type: array
+ priority:
+ default: 0
+ description: |-
+ Priority defines the priority of this decision (higher values = higher priority)
+ Used when strategy is "priority"
+ format: int32
+ maximum: 1000
+ minimum: 0
+ type: integer
+ signals:
+ description: Signals defines the signal combination logic
+ properties:
+ conditions:
+ description: Conditions defines the list of signal conditions
+ items:
+ description: SignalCondition defines a single signal condition
+ properties:
+ name:
+ description: Name is the name of the signal to reference
+ maxLength: 100
+ minLength: 1
+ type: string
+ type:
+ description: Type defines the type of signal (keyword/embedding/domain)
+ enum:
+ - keyword
+ - embedding
+ - domain
+ type: string
+ required:
+ - name
+ - type
+ type: object
+ maxItems: 50
+ minItems: 1
+ type: array
+ operator:
+ description: Operator defines the logical operator for combining
+ conditions (AND/OR)
+ enum:
+ - AND
+ - OR
+ type: string
+ required:
+ - conditions
+ - operator
+ type: object
+ required:
+ - modelRefs
+ - name
+ - signals
+ type: object
+ maxItems: 100
+ minItems: 1
+ type: array
+ signals:
+ description: Signals defines signal extraction rules for routing decisions
+ properties:
+ domains:
+ description: Domains defines MMLU domain categories for classification
+ items:
+ type: string
+ maxItems: 14
+ type: array
+ embeddings:
+ description: Embeddings defines embedding-based signal extraction
+ rules
+ items:
+ description: EmbeddingSignal defines an embedding-based signal
+ extraction rule
+ properties:
+ aggregationMethod:
+ default: max
+ description: AggregationMethod defines how to aggregate
+ multiple candidate similarities
+ enum:
+ - mean
+ - max
+ - any
+ type: string
+ candidates:
+ description: Candidates is the list of candidate phrases
+ for semantic matching
+ items:
+ type: string
+ maxItems: 100
+ minItems: 1
+ type: array
+ name:
+ description: Name is the unique identifier for this signal
+ maxLength: 100
+ minLength: 1
+ type: string
+ threshold:
+ description: Threshold is the similarity threshold for matching
+ (0.0-1.0)
+ maximum: 1
+ minimum: 0
+ type: number
+ required:
+ - candidates
+ - name
+ - threshold
+ type: object
+ maxItems: 100
+ type: array
+ keywords:
+ description: Keywords defines keyword-based signal extraction
+ rules
+ items:
+ description: KeywordSignal defines a keyword-based signal extraction
+ rule
+ properties:
+ caseSensitive:
+ default: false
+ description: CaseSensitive specifies whether keyword matching
+ is case-sensitive
+ type: boolean
+ keywords:
+ description: Keywords is the list of keywords to match
+ items:
+ type: string
+ maxItems: 100
+ minItems: 1
+ type: array
+ name:
+ description: Name is the unique identifier for this rule
+ (also used as category name)
+ maxLength: 100
+ minLength: 1
+ type: string
+ operator:
+ description: Operator defines the logical operator for keywords
+ (AND/OR)
+ enum:
+ - AND
+ - OR
+ type: string
+ required:
+ - keywords
+ - name
+ - operator
+ type: object
+ maxItems: 100
+ type: array
+ type: object
+ required:
+ - decisions
+ type: object
+ status:
+ description: IntelligentRouteStatus defines the observed state of IntelligentRoute
+ properties:
+ conditions:
+ description: Conditions represent the latest available observations
+ of the IntelligentRoute's state
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ decisionCount:
+ description: DecisionCount indicates the number of decisions
+ format: int32
+ type: integer
+ observedGeneration:
+ description: ObservedGeneration reflects the generation of the most
+ recently observed IntelligentRoute
+ format: int64
+ type: integer
+ referencedPool:
+ description: ReferencedPool indicates the name of the referenced IntelligentPool
+ type: string
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/src/semantic-router/deploy/kubernetes/crds/deploy/vllm.ai_intelligentpools.yaml b/src/semantic-router/deploy/kubernetes/crds/deploy/vllm.ai_intelligentpools.yaml
new file mode 100644
index 000000000..7ffbd0a24
--- /dev/null
+++ b/src/semantic-router/deploy/kubernetes/crds/deploy/vllm.ai_intelligentpools.yaml
@@ -0,0 +1,202 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.19.0
+ name: intelligentpools.vllm.ai
+spec:
+ group: vllm.ai
+ names:
+ kind: IntelligentPool
+ listKind: IntelligentPoolList
+ plural: intelligentpools
+ shortNames:
+ - ipool
+ singular: intelligentpool
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - description: Default model name
+ jsonPath: .spec.defaultModel
+ name: Default Model
+ type: string
+ - description: Number of models
+ jsonPath: .status.modelCount
+ name: Models
+ type: integer
+ - description: Ready status
+ jsonPath: .status.conditions[?(@.type=='Ready')].status
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: IntelligentPool defines a pool of models with their configurations
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: IntelligentPoolSpec defines the desired state of IntelligentPool
+ properties:
+ defaultModel:
+ description: DefaultModel specifies the default model to use when
+ no specific model is selected
+ maxLength: 100
+ minLength: 1
+ type: string
+ models:
+ description: Models defines the list of available models in this pool
+ items:
+ description: ModelConfig defines the configuration for a single
+ model
+ properties:
+ loras:
+ description: LoRAs defines the list of LoRA adapters available
+ for this model
+ items:
+ description: LoRAConfig defines a LoRA adapter configuration
+ properties:
+ description:
+ description: Description provides a human-readable description
+ of this LoRA adapter
+ maxLength: 500
+ type: string
+ name:
+ description: Name is the unique identifier for this LoRA
+ adapter
+ maxLength: 100
+ minLength: 1
+ type: string
+ required:
+ - name
+ type: object
+ maxItems: 50
+ type: array
+ name:
+ description: Name is the unique identifier for this model
+ maxLength: 100
+ minLength: 1
+ type: string
+ pricing:
+ description: Pricing defines the cost structure for this model
+ properties:
+ inputTokenPrice:
+ description: InputTokenPrice is the cost per input token
+ minimum: 0
+ type: number
+ outputTokenPrice:
+ description: OutputTokenPrice is the cost per output token
+ minimum: 0
+ type: number
+ type: object
+ reasoningFamily:
+ description: |-
+ ReasoningFamily specifies the reasoning syntax family (e.g., "qwen3", "deepseek")
+ Must be defined in the global static configuration's ReasoningFamilies
+ maxLength: 50
+ type: string
+ required:
+ - name
+ type: object
+ maxItems: 100
+ minItems: 1
+ type: array
+ required:
+ - defaultModel
+ - models
+ type: object
+ status:
+ description: IntelligentPoolStatus defines the observed state of IntelligentPool
+ properties:
+ conditions:
+ description: Conditions represent the latest available observations
+ of the IntelligentPool's state
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ modelCount:
+ description: ModelCount indicates the number of models in the pool
+ format: int32
+ type: integer
+ observedGeneration:
+ description: ObservedGeneration reflects the generation of the most
+ recently observed IntelligentPool
+ format: int64
+ type: integer
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/src/semantic-router/deploy/kubernetes/crds/deploy/vllm.ai_intelligentroutes.yaml b/src/semantic-router/deploy/kubernetes/crds/deploy/vllm.ai_intelligentroutes.yaml
new file mode 100644
index 000000000..3e971f4b3
--- /dev/null
+++ b/src/semantic-router/deploy/kubernetes/crds/deploy/vllm.ai_intelligentroutes.yaml
@@ -0,0 +1,383 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.19.0
+ name: intelligentroutes.vllm.ai
+spec:
+ group: vllm.ai
+ names:
+ kind: IntelligentRoute
+ listKind: IntelligentRouteList
+ plural: intelligentroutes
+ shortNames:
+ - iroute
+ singular: intelligentroute
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - description: Decision strategy
+ jsonPath: .spec.strategy
+ name: Strategy
+ type: string
+ - description: Number of decisions
+ jsonPath: .status.decisionCount
+ name: Decisions
+ type: integer
+ - description: Ready status
+ jsonPath: .status.conditions[?(@.type=='Ready')].status
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: IntelligentRoute defines intelligent routing rules and decisions
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: IntelligentRouteSpec defines the desired state of IntelligentRoute
+ properties:
+ decisions:
+ description: Decisions defines the routing decisions based on signal
+ combinations
+ items:
+ description: Decision defines a routing decision based on rule combinations
+ properties:
+ description:
+ description: Description provides a human-readable description
+ of this decision
+ maxLength: 500
+ type: string
+ modelRefs:
+ description: ModelRefs defines the model references for this
+ decision (currently only one model is supported)
+ items:
+ description: ModelRef defines a model reference without score
+ properties:
+ loraName:
+ description: LoRAName is the name of the LoRA adapter
+ to use (must exist in the model's LoRAs)
+ maxLength: 100
+ type: string
+ model:
+ description: Model is the name of the model (must exist
+ in IntelligentPool)
+ maxLength: 100
+ minLength: 1
+ type: string
+ reasoningDescription:
+ description: ReasoningDescription provides context for
+ when to use reasoning
+ maxLength: 500
+ type: string
+ reasoningEffort:
+ description: ReasoningEffort defines the reasoning effort
+ level (low/medium/high)
+ enum:
+ - low
+ - medium
+ - high
+ type: string
+ useReasoning:
+ default: false
+ description: UseReasoning specifies whether to enable
+ reasoning mode for this model
+ type: boolean
+ required:
+ - model
+ type: object
+ maxItems: 1
+ minItems: 1
+ type: array
+ name:
+ description: Name is the unique identifier for this decision
+ maxLength: 100
+ minLength: 1
+ type: string
+ plugins:
+ description: Plugins defines the plugins to apply for this decision
+ items:
+ description: DecisionPlugin defines a plugin configuration
+ for a decision
+ properties:
+ configuration:
+ description: Configuration is the plugin-specific configuration
+ as a raw JSON object
+ x-kubernetes-preserve-unknown-fields: true
+ type:
+ description: Type is the plugin type (semantic-cache,
+ jailbreak, pii, system_prompt, header_mutation)
+ enum:
+ - semantic-cache
+ - jailbreak
+ - pii
+ - system_prompt
+ - header_mutation
+ type: string
+ required:
+ - type
+ type: object
+ maxItems: 10
+ type: array
+ priority:
+ default: 0
+ description: |-
+ Priority defines the priority of this decision (higher values = higher priority)
+ Used when strategy is "priority"
+ format: int32
+ maximum: 1000
+ minimum: 0
+ type: integer
+ signals:
+ description: Signals defines the signal combination logic
+ properties:
+ conditions:
+ description: Conditions defines the list of signal conditions
+ items:
+ description: SignalCondition defines a single signal condition
+ properties:
+ name:
+ description: Name is the name of the signal to reference
+ maxLength: 100
+ minLength: 1
+ type: string
+ type:
+ description: Type defines the type of signal (keyword/embedding/domain)
+ enum:
+ - keyword
+ - embedding
+ - domain
+ type: string
+ required:
+ - name
+ - type
+ type: object
+ maxItems: 50
+ minItems: 1
+ type: array
+ operator:
+ description: Operator defines the logical operator for combining
+ conditions (AND/OR)
+ enum:
+ - AND
+ - OR
+ type: string
+ required:
+ - conditions
+ - operator
+ type: object
+ required:
+ - modelRefs
+ - name
+ - signals
+ type: object
+ maxItems: 100
+ minItems: 1
+ type: array
+ signals:
+ description: Signals defines signal extraction rules for routing decisions
+ properties:
+ domains:
+ description: Domains defines MMLU domain categories for classification
+ items:
+ description: DomainSignal defines a domain category for classification
+ properties:
+ description:
+ description: Description provides a human-readable description
+ of this domain
+ maxLength: 500
+ type: string
+ name:
+ description: Name is the unique identifier for this domain
+ maxLength: 100
+ minLength: 1
+ type: string
+ required:
+ - name
+ type: object
+ maxItems: 14
+ type: array
+ embeddings:
+ description: Embeddings defines embedding-based signal extraction
+ rules
+ items:
+ description: EmbeddingSignal defines an embedding-based signal
+ extraction rule
+ properties:
+ aggregationMethod:
+ default: max
+ description: AggregationMethod defines how to aggregate
+ multiple candidate similarities
+ enum:
+ - mean
+ - max
+ - any
+ type: string
+ candidates:
+ description: Candidates is the list of candidate phrases
+ for semantic matching
+ items:
+ type: string
+ maxItems: 100
+ minItems: 1
+ type: array
+ name:
+ description: Name is the unique identifier for this signal
+ maxLength: 100
+ minLength: 1
+ type: string
+ threshold:
+ description: Threshold is the similarity threshold for matching
+ (0.0-1.0)
+ maximum: 1
+ minimum: 0
+ type: number
+ required:
+ - candidates
+ - name
+ - threshold
+ type: object
+ maxItems: 100
+ type: array
+ keywords:
+ description: Keywords defines keyword-based signal extraction
+ rules
+ items:
+ description: KeywordSignal defines a keyword-based signal extraction
+ rule
+ properties:
+ caseSensitive:
+ default: false
+ description: CaseSensitive specifies whether keyword matching
+ is case-sensitive
+ type: boolean
+ keywords:
+ description: Keywords is the list of keywords to match
+ items:
+ type: string
+ maxItems: 100
+ minItems: 1
+ type: array
+ name:
+ description: Name is the unique identifier for this rule
+ (also used as category name)
+ maxLength: 100
+ minLength: 1
+ type: string
+ operator:
+ description: Operator defines the logical operator for keywords
+ (AND/OR)
+ enum:
+ - AND
+ - OR
+ type: string
+ required:
+ - keywords
+ - name
+ - operator
+ type: object
+ maxItems: 100
+ type: array
+ type: object
+ required:
+ - decisions
+ type: object
+ status:
+ description: IntelligentRouteStatus defines the observed state of IntelligentRoute
+ properties:
+ conditions:
+ description: Conditions represent the latest available observations
+ of the IntelligentRoute's state
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ decisionCount:
+ description: DecisionCount indicates the number of decisions
+ format: int32
+ type: integer
+ observedGeneration:
+ description: ObservedGeneration reflects the generation of the most
+ recently observed IntelligentRoute
+ format: int64
+ type: integer
+ referencedPool:
+ description: ReferencedPool indicates the name of the referenced IntelligentPool
+ type: string
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/src/semantic-router/examples/decision-based-routing.yaml b/src/semantic-router/examples/decision-based-routing.yaml
new file mode 100644
index 000000000..dde940f68
--- /dev/null
+++ b/src/semantic-router/examples/decision-based-routing.yaml
@@ -0,0 +1,176 @@
+# Example configuration demonstrating Decision-based routing with AND/OR rule combinations
+# This is the new architecture that separates Categories (metadata) from Decisions (routing logic)
+
+default_model: "general-model"
+
+# Categories define domain metadata only (no routing logic)
+categories:
+ - name: "coding"
+ description: "Programming and software development tasks"
+ mmlu_categories:
+ - "computer_science"
+ - "programming"
+
+ - name: "math"
+ description: "Mathematical problems and calculations"
+ mmlu_categories:
+ - "mathematics"
+ - "statistics"
+
+ - name: "business"
+ description: "Business and management topics"
+ mmlu_categories:
+ - "business"
+ - "management"
+
+# Keyword rules for fast pattern matching
+keyword_rules:
+ - category: "coding"
+ operator: "OR"
+ keywords:
+ - "\\bcode\\b"
+ - "\\bprogramming\\b"
+ - "\\bfunction\\b"
+ - "\\balgorithm\\b"
+ case_sensitive: false
+
+ - category: "math"
+ operator: "OR"
+ keywords:
+ - "\\bcalculate\\b"
+ - "\\bequation\\b"
+ - "\\bsolve\\b"
+ case_sensitive: false
+
+# Embedding rules for semantic similarity matching
+embedding_rules:
+ - category: "coding"
+ threshold: 0.75
+ keywords:
+ - "write a function"
+ - "implement algorithm"
+ - "debug code"
+ aggregation_method: "mean"
+
+# Decisions define routing logic with AND/OR rule combinations
+# Strategy: "priority" (select highest priority) or "confidence" (select highest confidence)
+strategy: "priority"
+
+decisions:
+ # Decision 1: Complex coding task requiring reasoning
+ - name: "complex-coding"
+ description: "Complex programming tasks that benefit from step-by-step reasoning"
+ priority: 20
+ rules:
+ operator: "AND" # All conditions must match
+ conditions:
+ - type: "keyword"
+ name: "coding"
+ - type: "domain"
+ name: "coding"
+ modelRefs:
+ - model: "deepseek-coder"
+ use_reasoning: true
+ reasoning_effort: "high"
+ plugins:
+ - type: "header_mutation"
+ configuration:
+ add:
+ - name: "X-Decision-Name"
+ value: "complex-coding"
+ - name: "X-Reasoning-Enabled"
+ value: "true"
+ update:
+ - name: "User-Agent"
+ value: "semantic-router/coding"
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
+
+ # Decision 2: Simple coding task
+ - name: "simple-coding"
+ description: "Simple programming tasks"
+ priority: 10
+ rules:
+ operator: "OR" # Any condition matches
+ conditions:
+ - type: "keyword"
+ name: "coding"
+ - type: "embedding"
+ name: "coding"
+ modelRefs:
+ - model: "codellama"
+ use_reasoning: false
+
+ # Decision 3: Math problems requiring reasoning
+ - name: "math-reasoning"
+ description: "Mathematical problems that benefit from reasoning"
+ priority: 15
+ rules:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "math"
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: "deepseek-math"
+ use_reasoning: true
+ reasoning_effort: "high"
+ plugins:
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ threshold: 0.8
+
+ # Decision 4: Business queries (no reasoning needed)
+ - name: "business-query"
+ description: "Business and management queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: "general-model"
+ use_reasoning: false
+ plugins:
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.9
+
+# Model configuration
+model_config:
+ "deepseek-coder":
+ reasoning_family: "deepseek"
+ preferred_endpoints: ["endpoint1"]
+
+ "deepseek-math":
+ reasoning_family: "deepseek"
+ preferred_endpoints: ["endpoint1"]
+
+ "codellama":
+ preferred_endpoints: ["endpoint2"]
+
+ "general-model":
+ preferred_endpoints: ["endpoint1", "endpoint2"]
+
+# vLLM endpoints
+vllm_endpoints:
+ - name: "endpoint1"
+ address: "127.0.0.1"
+ port: 8000
+ weight: 1
+
+ - name: "endpoint2"
+ address: "127.0.0.1"
+ port: 8001
+ weight: 1
+
diff --git a/src/semantic-router/go.mod b/src/semantic-router/go.mod
index 9e9b869f1..28d296e0f 100644
--- a/src/semantic-router/go.mod
+++ b/src/semantic-router/go.mod
@@ -21,7 +21,6 @@ require (
github.com/openai/openai-go v1.12.0
github.com/prometheus/client_golang v1.23.0
github.com/prometheus/client_model v0.6.2
- github.com/samber/lo v1.52.0
github.com/stretchr/testify v1.11.1
github.com/vllm-project/semantic-router/candle-binding v0.0.0-00010101000000-000000000000
go.opentelemetry.io/otel v1.38.0
@@ -34,7 +33,9 @@ require (
google.golang.org/grpc v1.75.0
gopkg.in/yaml.v2 v2.4.0
gopkg.in/yaml.v3 v3.0.1
- k8s.io/apimachinery v0.31.4
+ k8s.io/apimachinery v0.34.2
+ k8s.io/client-go v0.34.2
+ sigs.k8s.io/controller-runtime v0.19.4
sigs.k8s.io/yaml v1.6.0
)
@@ -49,21 +50,27 @@ require (
github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f // indirect
github.com/cockroachdb/redact v1.1.3 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
+ github.com/emicklei/go-restful/v3 v3.12.2 // indirect
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
- github.com/fxamacker/cbor/v2 v2.7.0 // indirect
+ github.com/evanphx/json-patch/v5 v5.9.0 // indirect
+ github.com/fxamacker/cbor/v2 v2.9.0 // indirect
github.com/getsentry/sentry-go v0.12.0 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
+ github.com/go-openapi/jsonpointer v0.21.0 // indirect
+ github.com/go-openapi/jsonreference v0.20.2 // indirect
+ github.com/go-openapi/swag v0.23.0 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.4 // indirect
+ github.com/google/gnostic-models v0.7.0 // indirect
github.com/google/go-cmp v0.7.0 // indirect
- github.com/google/gofuzz v1.2.0 // indirect
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
github.com/invopop/jsonschema v0.13.0 // indirect
+ github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/kr/text v0.2.0 // indirect
@@ -71,7 +78,7 @@ require (
github.com/mailru/easyjson v0.7.7 // indirect
github.com/milvus-io/milvus-proto/go-api/v2 v2.4.10-0.20240819025435-512e3b98866a // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
- github.com/modern-go/reflect2 v1.0.2 // indirect
+ github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
@@ -80,6 +87,7 @@ require (
github.com/prometheus/procfs v0.16.1 // indirect
github.com/rogpeppe/go-internal v1.13.1 // indirect
github.com/spf13/cast v1.7.1 // indirect
+ github.com/spf13/pflag v1.0.6 // indirect
github.com/tidwall/gjson v1.14.4 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
@@ -94,16 +102,26 @@ require (
go.uber.org/automaxprocs v1.6.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.yaml.in/yaml/v2 v2.4.2 // indirect
+ go.yaml.in/yaml/v3 v3.0.4 // indirect
+ golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc // indirect
golang.org/x/net v0.43.0 // indirect
+ golang.org/x/oauth2 v0.30.0 // indirect
golang.org/x/sync v0.16.0 // indirect
+ golang.org/x/term v0.34.0 // indirect
golang.org/x/text v0.28.0 // indirect
+ golang.org/x/time v0.9.0 // indirect
golang.org/x/tools v0.35.0 // indirect
+ gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 // indirect
google.golang.org/protobuf v1.36.9 // indirect
+ gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
+ k8s.io/api v0.34.2 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
- k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect
- sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
- sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
+ k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect
+ k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect
+ sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
+ sigs.k8s.io/randfill v1.0.0 // indirect
+ sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
)
diff --git a/src/semantic-router/go.sum b/src/semantic-router/go.sum
index 3e25a791b..7ab363ef9 100644
--- a/src/semantic-router/go.sum
+++ b/src/semantic-router/go.sum
@@ -45,6 +45,8 @@ github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6ps
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM=
+github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
+github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
@@ -55,6 +57,10 @@ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7
github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8=
github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU=
github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw=
+github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k=
+github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ=
+github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg=
+github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod h1:duJ4Jxv5lDcvg4QuQr0oowTf7dz4/CR8NtyCooz9HL8=
github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
@@ -62,8 +68,8 @@ github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7z
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
-github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
-github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
+github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
+github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
github.com/gavv/httpexpect v2.0.0+incompatible/go.mod h1:x+9tiU1YnrOvnB725RkpoLv1M62hOWzwo5OXotisrKc=
github.com/getsentry/sentry-go v0.12.0 h1:era7g0re5iY13bHSdN/xMkyV+5zZppjRVQhZrXCaEIk=
github.com/getsentry/sentry-go v0.12.0/go.mod h1:NSap0JBYWzHND8oMbyi0+XZhUalc1TBdRL1M71JZW2c=
@@ -81,7 +87,17 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
+github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8=
+github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
+github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
+github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
+github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
+github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
+github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
+github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
@@ -113,13 +129,14 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/gomodule/redigo v1.7.1-0.20190724094224-574c33c3df38/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4=
+github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo=
+github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
@@ -150,6 +167,7 @@ github.com/iris-contrib/go.uuid v2.0.0+incompatible/go.mod h1:iz2lgM/1UnEf1kP0L/
github.com/iris-contrib/jade v1.1.3/go.mod h1:H/geBymxJhShH5kecoiOCSssPX7QWYH7UaeZTSWddIk=
github.com/iris-contrib/pongo2 v0.0.1/go.mod h1:Ssh+00+3GAZqSQb30AvBRNxBx7rf0GqwkjqxNd0u65g=
github.com/iris-contrib/schema v0.0.1/go.mod h1:urYA3uvUNG1TIIjOSCzHr9/LmbQo8LrOcOqfqxa4hXw=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
@@ -172,6 +190,7 @@ github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
@@ -210,8 +229,9 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
-github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=
+github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
@@ -259,8 +279,6 @@ github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
-github.com/samber/lo v1.52.0 h1:Rvi+3BFHES3A8meP33VPAxiBZX/Aws5RxrschYGjomw=
-github.com/samber/lo v1.52.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0=
github.com/schollz/closestmatch v2.1.0+incompatible/go.mod h1:RtP1ddjLong6gTkbtmuhtR2uUrrJOpYzYRvbcPAid+g=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
@@ -274,11 +292,13 @@ github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cA
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
-github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
-github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
+github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
@@ -286,6 +306,9 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
@@ -369,6 +392,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh
golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc h1:mCRnTeVUjcrhlRmO0VK8a6k6Rrf6TF9htwo2pJVSjIU=
+golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc/go.mod h1:V1LtkGg67GoY2N1AnLN78QLrzxkLyJw7RJb1gzOOz9w=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
@@ -397,6 +422,8 @@ golang.org/x/net v0.0.0-20211008194852-3b03d305991f/go.mod h1:9nx3DQGgdP8bBQD5qx
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
+golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -431,6 +458,8 @@ golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4=
+golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
@@ -440,6 +469,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
+golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -459,6 +490,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw=
+gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
@@ -503,6 +536,8 @@ gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
+gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
+gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE=
gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y=
@@ -513,7 +548,6 @@ gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3M
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20191120175047-4206685974f2/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
@@ -523,15 +557,27 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
-k8s.io/apimachinery v0.31.4 h1:8xjE2C4CzhYVm9DGf60yohpNUh5AEBnPxCryPBECmlM=
-k8s.io/apimachinery v0.31.4/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
+k8s.io/api v0.34.2 h1:fsSUNZhV+bnL6Aqrp6O7lMTy6o5x2C4XLjnh//8SLYY=
+k8s.io/api v0.34.2/go.mod h1:MMBPaWlED2a8w4RSeanD76f7opUoypY8TFYkSM+3XHw=
+k8s.io/apiextensions-apiserver v0.31.0 h1:fZgCVhGwsclj3qCw1buVXCV6khjRzKC5eCFt24kyLSk=
+k8s.io/apiextensions-apiserver v0.31.0/go.mod h1:b9aMDEYaEe5sdK+1T0KU78ApR/5ZVp4i56VacZYEHxk=
+k8s.io/apimachinery v0.34.2 h1:zQ12Uk3eMHPxrsbUJgNF8bTauTVR2WgqJsTmwTE/NW4=
+k8s.io/apimachinery v0.34.2/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
+k8s.io/client-go v0.34.2 h1:Co6XiknN+uUZqiddlfAjT68184/37PS4QAzYvQvDR8M=
+k8s.io/client-go v0.34.2/go.mod h1:2VYDl1XXJsdcAxw7BenFslRQX28Dxz91U9MWKjX97fE=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
-k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
-k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
-sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
-sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
-sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
-sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
+k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA=
+k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts=
+k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y=
+k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+sigs.k8s.io/controller-runtime v0.19.4 h1:SUmheabttt0nx8uJtoII4oIP27BVVvAKFvdvGFwV/Qo=
+sigs.k8s.io/controller-runtime v0.19.4/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4=
+sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
+sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
+sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
+sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
+sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=
+sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE=
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=
diff --git a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_helpers.go b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_helpers.go
deleted file mode 100644
index fef12657d..000000000
--- a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_helpers.go
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
-Copyright 2025 vLLM Semantic Router.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package v1alpha1
-
-import (
- "encoding/json"
- "fmt"
-
- "k8s.io/apimachinery/pkg/runtime"
-)
-
-// FilterConfigHelper provides helper methods for working with filter configurations
-type FilterConfigHelper struct{}
-
-// NewFilterConfigHelper creates a new FilterConfigHelper
-func NewFilterConfigHelper() *FilterConfigHelper {
- return &FilterConfigHelper{}
-}
-
-// MarshalFilterConfig marshals a filter configuration to RawExtension
-func (h *FilterConfigHelper) MarshalFilterConfig(config interface{}) (*runtime.RawExtension, error) {
- if config == nil {
- return nil, nil
- }
-
- data, err := json.Marshal(config)
- if err != nil {
- return nil, fmt.Errorf("failed to marshal filter config: %w", err)
- }
-
- return &runtime.RawExtension{Raw: data}, nil
-}
-
-// UnmarshalPIIDetectionConfig unmarshals a PIIDetectionConfig from RawExtension
-func (h *FilterConfigHelper) UnmarshalPIIDetectionConfig(raw *runtime.RawExtension) (*PIIDetectionConfig, error) {
- if raw == nil || len(raw.Raw) == 0 {
- return &PIIDetectionConfig{}, nil
- }
-
- var config PIIDetectionConfig
- if err := json.Unmarshal(raw.Raw, &config); err != nil {
- return nil, fmt.Errorf("failed to unmarshal PIIDetectionConfig: %w", err)
- }
-
- return &config, nil
-}
-
-// UnmarshalPromptGuardConfig unmarshals a PromptGuardConfig from RawExtension
-func (h *FilterConfigHelper) UnmarshalPromptGuardConfig(raw *runtime.RawExtension) (*PromptGuardConfig, error) {
- if raw == nil || len(raw.Raw) == 0 {
- return &PromptGuardConfig{}, nil
- }
-
- var config PromptGuardConfig
- if err := json.Unmarshal(raw.Raw, &config); err != nil {
- return nil, fmt.Errorf("failed to unmarshal PromptGuardConfig: %w", err)
- }
-
- return &config, nil
-}
-
-// UnmarshalSemanticCacheConfig unmarshals a SemanticCacheConfig from RawExtension
-func (h *FilterConfigHelper) UnmarshalSemanticCacheConfig(raw *runtime.RawExtension) (*SemanticCacheConfig, error) {
- if raw == nil || len(raw.Raw) == 0 {
- return &SemanticCacheConfig{}, nil
- }
-
- var config SemanticCacheConfig
- if err := json.Unmarshal(raw.Raw, &config); err != nil {
- return nil, fmt.Errorf("failed to unmarshal SemanticCacheConfig: %w", err)
- }
-
- return &config, nil
-}
-
-// UnmarshalReasoningControlConfig unmarshals a ReasoningControlConfig from RawExtension
-func (h *FilterConfigHelper) UnmarshalReasoningControlConfig(raw *runtime.RawExtension) (*ReasoningControlConfig, error) {
- if raw == nil || len(raw.Raw) == 0 {
- return &ReasoningControlConfig{}, nil
- }
-
- var config ReasoningControlConfig
- if err := json.Unmarshal(raw.Raw, &config); err != nil {
- return nil, fmt.Errorf("failed to unmarshal ReasoningControlConfig: %w", err)
- }
-
- return &config, nil
-}
-
-// MarshalToolSelectionConfig marshals a ToolSelectionConfig to RawExtension
-func (h *FilterConfigHelper) MarshalToolSelectionConfig(config *ToolSelectionConfig) (*runtime.RawExtension, error) {
- if config == nil {
- return &runtime.RawExtension{}, nil
- }
-
- data, err := json.Marshal(config)
- if err != nil {
- return nil, fmt.Errorf("failed to marshal ToolSelectionConfig: %w", err)
- }
-
- return &runtime.RawExtension{Raw: data}, nil
-}
-
-// UnmarshalToolSelectionConfig unmarshals a ToolSelectionConfig from RawExtension
-func (h *FilterConfigHelper) UnmarshalToolSelectionConfig(raw *runtime.RawExtension) (*ToolSelectionConfig, error) {
- if raw == nil || len(raw.Raw) == 0 {
- return &ToolSelectionConfig{}, nil
- }
-
- var config ToolSelectionConfig
- if err := json.Unmarshal(raw.Raw, &config); err != nil {
- return nil, fmt.Errorf("failed to unmarshal ToolSelectionConfig: %w", err)
- }
-
- return &config, nil
-}
-
-// UnmarshalFilterConfig unmarshals a filter configuration based on the filter type
-func (h *FilterConfigHelper) UnmarshalFilterConfig(filterType FilterType, raw *runtime.RawExtension) (interface{}, error) {
- switch filterType {
- case FilterTypePIIDetection:
- return h.UnmarshalPIIDetectionConfig(raw)
- case FilterTypePromptGuard:
- return h.UnmarshalPromptGuardConfig(raw)
- case FilterTypeSemanticCache:
- return h.UnmarshalSemanticCacheConfig(raw)
- case FilterTypeReasoningControl:
- return h.UnmarshalReasoningControlConfig(raw)
- case FilterTypeToolSelection:
- return h.UnmarshalToolSelectionConfig(raw)
- default:
- return nil, fmt.Errorf("unsupported filter type: %s", filterType)
- }
-}
-
-// ValidateFilterConfig validates a filter configuration
-func (h *FilterConfigHelper) ValidateFilterConfig(filter *Filter) error {
- if filter == nil {
- return fmt.Errorf("filter cannot be nil")
- }
-
- // Validate filter type
- switch filter.Type {
- case FilterTypePIIDetection, FilterTypePromptGuard, FilterTypeSemanticCache, FilterTypeReasoningControl, FilterTypeToolSelection:
- // Valid filter types
- default:
- return fmt.Errorf("invalid filter type: %s", filter.Type)
- }
-
- // If config is provided, try to unmarshal it to validate structure
- if filter.Config != nil {
- _, err := h.UnmarshalFilterConfig(filter.Type, filter.Config)
- if err != nil {
- return fmt.Errorf("invalid filter config for type %s: %w", filter.Type, err)
- }
- }
-
- return nil
-}
-
-// CreatePIIDetectionFilter creates a PIIDetection filter with the given configuration
-func CreatePIIDetectionFilter(config *PIIDetectionConfig) (*Filter, error) {
- helper := NewFilterConfigHelper()
- rawConfig, err := helper.MarshalFilterConfig(config)
- if err != nil {
- return nil, err
- }
-
- enabled := true
- return &Filter{
- Type: FilterTypePIIDetection,
- Config: rawConfig,
- Enabled: &enabled,
- }, nil
-}
-
-// CreatePromptGuardFilter creates a PromptGuard filter with the given configuration
-func CreatePromptGuardFilter(config *PromptGuardConfig) (*Filter, error) {
- helper := NewFilterConfigHelper()
- rawConfig, err := helper.MarshalFilterConfig(config)
- if err != nil {
- return nil, err
- }
-
- enabled := true
- return &Filter{
- Type: FilterTypePromptGuard,
- Config: rawConfig,
- Enabled: &enabled,
- }, nil
-}
-
-// CreateSemanticCacheFilter creates a SemanticCache filter with the given configuration
-func CreateSemanticCacheFilter(config *SemanticCacheConfig) (*Filter, error) {
- helper := NewFilterConfigHelper()
- rawConfig, err := helper.MarshalFilterConfig(config)
- if err != nil {
- return nil, err
- }
-
- enabled := true
- return &Filter{
- Type: FilterTypeSemanticCache,
- Config: rawConfig,
- Enabled: &enabled,
- }, nil
-}
-
-// CreateReasoningControlFilter creates a ReasoningControl filter with the given configuration
-func CreateReasoningControlFilter(config *ReasoningControlConfig) (*Filter, error) {
- helper := NewFilterConfigHelper()
- rawConfig, err := helper.MarshalFilterConfig(config)
- if err != nil {
- return nil, err
- }
-
- enabled := true
- return &Filter{
- Type: FilterTypeReasoningControl,
- Config: rawConfig,
- Enabled: &enabled,
- }, nil
-}
-
-// CreateToolSelectionFilter creates a ToolSelection filter with the given configuration
-func CreateToolSelectionFilter(config *ToolSelectionConfig) (*Filter, error) {
- helper := NewFilterConfigHelper()
- rawConfig, err := helper.MarshalFilterConfig(config)
- if err != nil {
- return nil, err
- }
-
- enabled := true
- return &Filter{
- Type: FilterTypeToolSelection,
- Config: rawConfig,
- Enabled: &enabled,
- }, nil
-}
diff --git a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_types.go b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_types.go
deleted file mode 100644
index d5689405f..000000000
--- a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_types.go
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
-Copyright 2025 vLLM Semantic Router.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package v1alpha1
-
-// PIIDetectionConfig defines the configuration for PII detection filter
-type PIIDetectionConfig struct {
- // AllowByDefault defines whether PII is allowed by default
- // +optional
- // +kubebuilder:default=false
- AllowByDefault *bool `json:"allowByDefault,omitempty"`
-
- // PIITypesAllowed defines the list of PII types that are allowed
- // +optional
- // +kubebuilder:validation:MaxItems=50
- PIITypesAllowed []string `json:"pii_types_allowed,omitempty"`
-
- // Threshold defines the confidence threshold for PII detection (0.0-1.0)
- // +optional
- // +kubebuilder:validation:Minimum=0
- // +kubebuilder:validation:Maximum=1
- // +kubebuilder:default=0.7
- Threshold *float64 `json:"threshold,omitempty"`
-
- // Action defines what to do when PII is detected
- // +optional
- // +kubebuilder:validation:Enum=block;mask;allow
- // +kubebuilder:default=block
- Action *string `json:"action,omitempty"`
-}
-
-// PromptGuardConfig defines the configuration for prompt guard filter
-type PromptGuardConfig struct {
- // Threshold defines the confidence threshold for jailbreak detection (0.0-1.0)
- // +optional
- // +kubebuilder:validation:Minimum=0
- // +kubebuilder:validation:Maximum=1
- // +kubebuilder:default=0.7
- Threshold *float64 `json:"threshold,omitempty"`
-
- // Action defines what to do when a jailbreak attempt is detected
- // +optional
- // +kubebuilder:validation:Enum=block;warn;allow
- // +kubebuilder:default=block
- Action *string `json:"action,omitempty"`
-
- // CustomRules defines additional custom security rules
- // +optional
- // +kubebuilder:validation:MaxItems=100
- CustomRules []SecurityRule `json:"customRules,omitempty"`
-}
-
-// SecurityRule defines a custom security rule
-type SecurityRule struct {
- // Name defines the name of the security rule
- // +kubebuilder:validation:Required
- // +kubebuilder:validation:MinLength=1
- // +kubebuilder:validation:MaxLength=100
- Name string `json:"name"`
-
- // Pattern defines the regex pattern to match
- // +kubebuilder:validation:Required
- // +kubebuilder:validation:MinLength=1
- // +kubebuilder:validation:MaxLength=1000
- Pattern string `json:"pattern"`
-
- // Action defines the action to take when this rule matches
- // +kubebuilder:validation:Required
- // +kubebuilder:validation:Enum=block;warn;allow
- Action string `json:"action"`
-
- // Description provides an optional description of this rule
- // +optional
- // +kubebuilder:validation:MaxLength=500
- Description string `json:"description,omitempty"`
-}
-
-// SemanticCacheConfig defines the configuration for semantic cache filter
-type SemanticCacheConfig struct {
- // SimilarityThreshold defines the similarity threshold for cache hits (0.0-1.0)
- // +optional
- // +kubebuilder:validation:Minimum=0
- // +kubebuilder:validation:Maximum=1
- // +kubebuilder:default=0.8
- SimilarityThreshold *float64 `json:"similarityThreshold,omitempty"`
-
- // MaxEntries defines the maximum number of cache entries
- // +optional
- // +kubebuilder:validation:Minimum=1
- // +kubebuilder:validation:Maximum=1000000
- // +kubebuilder:default=1000
- MaxEntries *int32 `json:"maxEntries,omitempty"`
-
- // TTLSeconds defines the time-to-live for cache entries in seconds
- // +optional
- // +kubebuilder:validation:Minimum=1
- // +kubebuilder:validation:Maximum=86400
- // +kubebuilder:default=3600
- TTLSeconds *int32 `json:"ttlSeconds,omitempty"`
-
- // Backend defines the cache backend type
- // +optional
- // +kubebuilder:validation:Enum=memory;redis;milvus
- // +kubebuilder:default=memory
- Backend *string `json:"backend,omitempty"`
-
- // EmbeddingModel defines which embedding model to use for semantic similarity
- // +optional
- // +kubebuilder:validation:Enum=bert;qwen3;gemma
- // +kubebuilder:default=bert
- EmbeddingModel *string `json:"embeddingModel,omitempty"`
-
- // BackendConfig defines backend-specific configuration
- // +optional
- BackendConfig map[string]string `json:"backendConfig,omitempty"`
-}
-
-// ReasoningControlConfig defines the configuration for reasoning control filter
-type ReasoningControlConfig struct {
- // ReasonFamily defines the reasoning family to use
- // +optional
- // +kubebuilder:validation:Enum=gpt-oss;deepseek;qwen3;claude
- ReasonFamily *string `json:"reasonFamily,omitempty"`
-
- // EnableReasoning defines whether reasoning mode is enabled
- // +optional
- // +kubebuilder:default=true
- EnableReasoning *bool `json:"enableReasoning,omitempty"`
-
- // ReasoningEffort defines the reasoning effort level
- // +optional
- // +kubebuilder:validation:Enum=low;medium;high
- // +kubebuilder:default=medium
- ReasoningEffort *string `json:"reasoningEffort,omitempty"`
-
- // MaxReasoningSteps defines the maximum number of reasoning steps
- // +optional
- // +kubebuilder:validation:Minimum=1
- // +kubebuilder:validation:Maximum=100
- // +kubebuilder:default=10
- MaxReasoningSteps *int32 `json:"maxReasoningSteps,omitempty"`
-
- // ReasoningTimeout defines the timeout for reasoning in seconds
- // +optional
- // +kubebuilder:validation:Minimum=1
- // +kubebuilder:validation:Maximum=300
- // +kubebuilder:default=30
- ReasoningTimeout *int32 `json:"reasoningTimeout,omitempty"`
-}
-
-// ToolSelectionConfig defines the configuration for automatic tool selection filter
-type ToolSelectionConfig struct {
- // TopK defines the number of top tools to select based on similarity
- // +optional
- // +kubebuilder:validation:Minimum=1
- // +kubebuilder:validation:Maximum=20
- // +kubebuilder:default=3
- TopK *int32 `json:"topK,omitempty"`
-
- // SimilarityThreshold defines the similarity threshold for tool selection (0.0-1.0)
- // +optional
- // +kubebuilder:validation:Minimum=0
- // +kubebuilder:validation:Maximum=1
- // +kubebuilder:default=0.2
- SimilarityThreshold *float64 `json:"similarityThreshold,omitempty"`
-
- // ToolsDBPath defines the path to the tools database file
- // +optional
- // +kubebuilder:default="config/tools_db.json"
- ToolsDBPath *string `json:"toolsDBPath,omitempty"`
-
- // FallbackToEmpty defines whether to return empty tools on failure
- // +optional
- // +kubebuilder:default=true
- FallbackToEmpty *bool `json:"fallbackToEmpty,omitempty"`
-
- // Categories defines the tool categories to include in selection
- // +optional
- // +kubebuilder:validation:MaxItems=20
- Categories []string `json:"categories,omitempty"`
-
- // Tags defines the tool tags to include in selection
- // +optional
- // +kubebuilder:validation:MaxItems=50
- Tags []string `json:"tags,omitempty"`
-}
-
-// FilterCondition defines a condition for applying filters
-type FilterCondition struct {
- // Type defines the condition type
- // +kubebuilder:validation:Required
- // +kubebuilder:validation:Enum=Always;Never;OnMatch;OnNoMatch
- Type FilterConditionType `json:"type"`
-
- // Value defines the condition value (used with OnMatch/OnNoMatch)
- // +optional
- Value string `json:"value,omitempty"`
-}
-
-// FilterConditionType defines the supported filter condition types
-// +kubebuilder:validation:Enum=Always;Never;OnMatch;OnNoMatch
-type FilterConditionType string
-
-const (
- // FilterConditionAlways means the filter is always applied
- FilterConditionAlways FilterConditionType = "Always"
- // FilterConditionNever means the filter is never applied
- FilterConditionNever FilterConditionType = "Never"
- // FilterConditionOnMatch means the filter is applied when a condition matches
- FilterConditionOnMatch FilterConditionType = "OnMatch"
- // FilterConditionOnNoMatch means the filter is applied when a condition doesn't match
- FilterConditionOnNoMatch FilterConditionType = "OnNoMatch"
-)
diff --git a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/register.go b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/register.go
index 368bfb9a6..9f1b8854e 100644
--- a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/register.go
+++ b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/register.go
@@ -39,8 +39,10 @@ func Resource(resource string) schema.GroupResource {
// addKnownTypes adds the set of types defined in this package to the supplied scheme.
func addKnownTypes(scheme *runtime.Scheme) error {
scheme.AddKnownTypes(GroupVersion,
- &SemanticRoute{},
- &SemanticRouteList{},
+ &IntelligentPool{},
+ &IntelligentPoolList{},
+ &IntelligentRoute{},
+ &IntelligentRouteList{},
)
metav1.AddToGroupVersion(scheme, GroupVersion)
return nil
diff --git a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/types.go b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/types.go
index ba4ba0b5d..6707c6be2 100644
--- a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/types.go
+++ b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/types.go
@@ -18,162 +18,127 @@ package v1alpha1
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- "k8s.io/apimachinery/pkg/runtime"
)
-// SemanticRoute defines a semantic routing rule for LLM requests
+// IntelligentPool defines a pool of models with their configurations
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
-// +kubebuilder:resource:scope=Namespaced,shortName=sr
-// +kubebuilder:printcolumn:name="Rules",type="integer",JSONPath=".spec.rules",description="Number of routing rules"
+// +kubebuilder:resource:scope=Namespaced,shortName=ipool
+// +kubebuilder:printcolumn:name="Default Model",type="string",JSONPath=".spec.defaultModel",description="Default model name"
+// +kubebuilder:printcolumn:name="Models",type="integer",JSONPath=".status.modelCount",description="Number of models"
+// +kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status",description="Ready status"
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
-type SemanticRoute struct {
- metav1.TypeMeta `json:",inline"`
- metav1.ObjectMeta `json:"metadata,omitempty"`
+type IntelligentPool struct {
+ metav1.TypeMeta `json:",inline" yaml:",inline"`
+ metav1.ObjectMeta `json:"metadata,omitempty" yaml:"metadata,omitempty"`
- Spec SemanticRouteSpec `json:"spec,omitempty"`
- Status SemanticRouteStatus `json:"status,omitempty"`
+ Spec IntelligentPoolSpec `json:"spec,omitempty" yaml:"spec,omitempty"`
+ Status IntelligentPoolStatus `json:"status,omitempty" yaml:"status,omitempty"`
}
-// SemanticRouteSpec defines the desired state of SemanticRoute
-type SemanticRouteSpec struct {
- // Rules defines the routing rules to be applied
+// IntelligentPoolSpec defines the desired state of IntelligentPool
+type IntelligentPoolSpec struct {
+ // DefaultModel specifies the default model to use when no specific model is selected
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinLength=1
+ // +kubebuilder:validation:MaxLength=100
+ DefaultModel string `json:"defaultModel" yaml:"defaultModel"`
+
+ // Models defines the list of available models in this pool
+ // +kubebuilder:validation:Required
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
- Rules []RouteRule `json:"rules"`
+ Models []ModelConfig `json:"models" yaml:"models"`
}
-// SemanticRouteStatus defines the observed state of SemanticRoute
-type SemanticRouteStatus struct {
- // Conditions represent the latest available observations of the SemanticRoute's current state
- // +optional
- Conditions []metav1.Condition `json:"conditions,omitempty"`
-
- // ObservedGeneration reflects the generation of the most recently observed SemanticRoute
- // +optional
- ObservedGeneration int64 `json:"observedGeneration,omitempty"`
+// ModelConfig defines the configuration for a single model
+type ModelConfig struct {
+ // Name is the unique identifier for this model
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinLength=1
+ // +kubebuilder:validation:MaxLength=100
+ Name string `json:"name" yaml:"name"`
- // ActiveRules indicates the number of currently active routing rules
+ // ReasoningFamily specifies the reasoning syntax family (e.g., "qwen3", "deepseek")
+ // Must be defined in the global static configuration's ReasoningFamilies
// +optional
- ActiveRules int32 `json:"activeRules,omitempty"`
-}
+ // +kubebuilder:validation:MaxLength=50
+ ReasoningFamily string `json:"reasoningFamily,omitempty" yaml:"reasoningFamily,omitempty"`
-// RouteRule defines a single routing rule
-type RouteRule struct {
- // Intents defines the intent categories that this rule should match
- // +kubebuilder:validation:MinItems=1
- // +kubebuilder:validation:MaxItems=50
- Intents []Intent `json:"intents"`
-
- // ModelRefs defines the target models for this routing rule
- // +kubebuilder:validation:MinItems=1
- // +kubebuilder:validation:MaxItems=10
- ModelRefs []ModelRef `json:"modelRefs"`
-
- // Filters defines the optional filters to be applied to requests matching this rule
+ // Pricing defines the cost structure for this model
// +optional
- // +kubebuilder:validation:MaxItems=20
- Filters []Filter `json:"filters,omitempty"`
+ Pricing *ModelPricing `json:"pricing,omitempty" yaml:"pricing,omitempty"`
- // DefaultModel defines the fallback model if no modelRefs are available
+ // LoRAs defines the list of LoRA adapters available for this model
// +optional
- DefaultModel *ModelRef `json:"defaultModel,omitempty"`
+ // +kubebuilder:validation:MaxItems=50
+ LoRAs []LoRAConfig `json:"loras,omitempty" yaml:"loras,omitempty"`
}
-// Intent defines an intent category for routing
-type Intent struct {
- // Category defines the intent category name (e.g., "math", "computer science", "creative")
- // +kubebuilder:validation:Required
- // +kubebuilder:validation:MinLength=1
- // +kubebuilder:validation:MaxLength=100
- // +kubebuilder:validation:Pattern=^[a-zA-Z0-9\s\-_]+$
- Category string `json:"category"`
-
- // Description provides an optional description of this intent category
+// ModelPricing defines the pricing structure for a model
+type ModelPricing struct {
+ // InputTokenPrice is the cost per input token
// +optional
- // +kubebuilder:validation:MaxLength=500
- Description string `json:"description,omitempty"`
+ // +kubebuilder:validation:Minimum=0
+ InputTokenPrice float64 `json:"inputTokenPrice,omitempty" yaml:"inputTokenPrice,omitempty"`
- // Threshold defines the confidence threshold for this intent (0.0-1.0)
+ // OutputTokenPrice is the cost per output token
// +optional
// +kubebuilder:validation:Minimum=0
- // +kubebuilder:validation:Maximum=1
- // +kubebuilder:default=0.7
- Threshold *float64 `json:"threshold,omitempty"`
+ OutputTokenPrice float64 `json:"outputTokenPrice,omitempty" yaml:"outputTokenPrice,omitempty"`
}
-// ModelRef defines a reference to a model endpoint
-type ModelRef struct {
- // ModelName defines the name of the model
+// LoRAConfig defines a LoRA adapter configuration
+type LoRAConfig struct {
+ // Name is the unique identifier for this LoRA adapter
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=100
- ModelName string `json:"modelName"`
-
- // Address defines the endpoint address
- // +kubebuilder:validation:Required
- // +kubebuilder:validation:MinLength=1
- // +kubebuilder:validation:MaxLength=255
- Address string `json:"address"`
-
- // Port defines the endpoint port
- // +kubebuilder:validation:Required
- // +kubebuilder:validation:Minimum=1
- // +kubebuilder:validation:Maximum=65535
- Port int32 `json:"port"`
+ Name string `json:"name" yaml:"name"`
- // Weight defines the traffic weight for this model (0-100)
+ // Description provides a human-readable description of this LoRA adapter
// +optional
- // +kubebuilder:validation:Minimum=0
- // +kubebuilder:validation:Maximum=100
- // +kubebuilder:default=100
- Weight *int32 `json:"weight,omitempty"`
-
- // Priority defines the priority of this model reference (higher values = higher priority)
- // +optional
- // +kubebuilder:validation:Minimum=0
- // +kubebuilder:validation:Maximum=1000
- Priority *int32 `json:"priority,omitempty"`
+ // +kubebuilder:validation:MaxLength=500
+ Description string `json:"description,omitempty" yaml:"description,omitempty"`
}
-// Filter defines a filter to be applied to requests
-type Filter struct {
- // Type defines the filter type
- // +kubebuilder:validation:Required
- // +kubebuilder:validation:Enum=PIIDetection;PromptGuard;SemanticCache;ReasoningControl
- Type FilterType `json:"type"`
+// IntelligentPoolStatus defines the observed state of IntelligentPool
+type IntelligentPoolStatus struct {
+ // Conditions represent the latest available observations of the IntelligentPool's state
+ // +optional
+ Conditions []metav1.Condition `json:"conditions,omitempty"`
- // Config defines the filter-specific configuration
+ // ObservedGeneration reflects the generation of the most recently observed IntelligentPool
// +optional
- Config *runtime.RawExtension `json:"config,omitempty"`
+ ObservedGeneration int64 `json:"observedGeneration,omitempty"`
- // Enabled defines whether this filter is enabled
+ // ModelCount indicates the number of models in the pool
// +optional
- // +kubebuilder:default=true
- Enabled *bool `json:"enabled,omitempty"`
+ ModelCount int32 `json:"modelCount,omitempty"`
}
-// FilterType defines the supported filter types
-// +kubebuilder:validation:Enum=PIIDetection;PromptGuard;SemanticCache;ReasoningControl;ToolSelection
-type FilterType string
-
-const (
- // FilterTypePIIDetection enables PII detection and filtering
- FilterTypePIIDetection FilterType = "PIIDetection"
- // FilterTypePromptGuard enables prompt security and jailbreak detection
- FilterTypePromptGuard FilterType = "PromptGuard"
- // FilterTypeSemanticCache enables semantic caching for performance optimization
- FilterTypeSemanticCache FilterType = "SemanticCache"
- // FilterTypeReasoningControl enables reasoning mode control
- FilterTypeReasoningControl FilterType = "ReasoningControl"
- // FilterTypeToolSelection enables automatic tool selection based on semantic similarity
- FilterTypeToolSelection FilterType = "ToolSelection"
-)
-
-// SemanticRouteList contains a list of SemanticRoute
+// IntelligentPoolList contains a list of IntelligentPool
// +kubebuilder:object:root=true
-type SemanticRouteList struct {
+type IntelligentPoolList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
- Items []SemanticRoute `json:"items"`
+ Items []IntelligentPool `json:"items"`
+}
+
+// IntelligentRoute defines intelligent routing rules and decisions
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:resource:scope=Namespaced,shortName=iroute
+// +kubebuilder:printcolumn:name="Decisions",type="integer",JSONPath=".status.statistics.decisions",description="Number of decisions"
+// +kubebuilder:printcolumn:name="Keywords",type="integer",JSONPath=".status.statistics.keywords",description="Number of keyword signals"
+// +kubebuilder:printcolumn:name="Embeddings",type="integer",JSONPath=".status.statistics.embeddings",description="Number of embedding signals"
+// +kubebuilder:printcolumn:name="Domains",type="integer",JSONPath=".status.statistics.domains",description="Number of domain signals"
+// +kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status",description="Ready status"
+// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
+type IntelligentRoute struct {
+ metav1.TypeMeta `json:",inline"`
+ metav1.ObjectMeta `json:"metadata,omitempty"`
+
+ Spec IntelligentRouteSpec `json:"spec,omitempty"`
+ Status IntelligentRouteStatus `json:"status,omitempty"`
}
diff --git a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/types_route.go b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/types_route.go
new file mode 100644
index 000000000..7bc81854e
--- /dev/null
+++ b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/types_route.go
@@ -0,0 +1,301 @@
+/*
+Copyright 2025 vLLM Semantic Router.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+)
+
+// IntelligentRouteSpec defines the desired state of IntelligentRoute
+type IntelligentRouteSpec struct {
+ // Signals defines signal extraction rules for routing decisions
+ // +optional
+ Signals Signals `json:"signals,omitempty" yaml:"signals,omitempty"`
+
+ // Decisions defines the routing decisions based on signal combinations
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinItems=1
+ // +kubebuilder:validation:MaxItems=100
+ Decisions []Decision `json:"decisions" yaml:"decisions"`
+}
+
+// Signals defines signal extraction rules
+type Signals struct {
+ // Keywords defines keyword-based signal extraction rules
+ // +optional
+ // +kubebuilder:validation:MaxItems=100
+ Keywords []KeywordSignal `json:"keywords,omitempty" yaml:"keywords,omitempty"`
+
+ // Embeddings defines embedding-based signal extraction rules
+ // +optional
+ // +kubebuilder:validation:MaxItems=100
+ Embeddings []EmbeddingSignal `json:"embeddings,omitempty" yaml:"embeddings,omitempty"`
+
+ // Domains defines MMLU domain categories for classification
+ // +optional
+ // +kubebuilder:validation:MaxItems=14
+ Domains []DomainSignal `json:"domains,omitempty" yaml:"domains,omitempty"`
+}
+
+// DomainSignal defines a domain category for classification
+type DomainSignal struct {
+ // Name is the unique identifier for this domain
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinLength=1
+ // +kubebuilder:validation:MaxLength=100
+ Name string `json:"name" yaml:"name"`
+
+ // Description provides a human-readable description of this domain
+ // +optional
+ // +kubebuilder:validation:MaxLength=500
+ Description string `json:"description,omitempty" yaml:"description,omitempty"`
+}
+
+// KeywordSignal defines a keyword-based signal extraction rule
+type KeywordSignal struct {
+ // Name is the unique identifier for this rule (also used as category name)
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinLength=1
+ // +kubebuilder:validation:MaxLength=100
+ Name string `json:"name" yaml:"name"`
+
+ // Operator defines the logical operator for keywords (AND/OR)
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:Enum=AND;OR
+ Operator string `json:"operator" yaml:"operator"`
+
+ // Keywords is the list of keywords to match
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinItems=1
+ // +kubebuilder:validation:MaxItems=100
+ Keywords []string `json:"keywords" yaml:"keywords"`
+
+ // CaseSensitive specifies whether keyword matching is case-sensitive
+ // +optional
+ // +kubebuilder:default=false
+ CaseSensitive bool `json:"caseSensitive" yaml:"caseSensitive"`
+}
+
+// EmbeddingSignal defines an embedding-based signal extraction rule
+type EmbeddingSignal struct {
+ // Name is the unique identifier for this signal
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinLength=1
+ // +kubebuilder:validation:MaxLength=100
+ Name string `json:"name" yaml:"name"`
+
+ // Threshold is the similarity threshold for matching (0.0-1.0)
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:Minimum=0
+ // +kubebuilder:validation:Maximum=1
+ Threshold float32 `json:"threshold" yaml:"threshold"`
+
+ // Candidates is the list of candidate phrases for semantic matching
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinItems=1
+ // +kubebuilder:validation:MaxItems=100
+ Candidates []string `json:"candidates" yaml:"candidates"`
+
+ // AggregationMethod defines how to aggregate multiple candidate similarities
+ // +optional
+ // +kubebuilder:validation:Enum=mean;max;any
+ // +kubebuilder:default=max
+ AggregationMethod string `json:"aggregationMethod,omitempty" yaml:"aggregationMethod,omitempty"`
+}
+
+// Decision defines a routing decision based on rule combinations
+type Decision struct {
+ // Name is the unique identifier for this decision
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinLength=1
+ // +kubebuilder:validation:MaxLength=100
+ Name string `json:"name" yaml:"name"`
+
+ // Priority defines the priority of this decision (higher values = higher priority)
+ // Used when strategy is "priority"
+ // +optional
+ // +kubebuilder:validation:Minimum=0
+ // +kubebuilder:validation:Maximum=1000
+ // +kubebuilder:default=0
+ Priority int32 `json:"priority" yaml:"priority"`
+
+ // Description provides a human-readable description of this decision
+ // +optional
+ // +kubebuilder:validation:MaxLength=500
+ Description string `json:"description,omitempty" yaml:"description,omitempty"`
+
+ // Signals defines the signal combination logic
+ // +kubebuilder:validation:Required
+ Signals SignalCombination `json:"signals" yaml:"signals"`
+
+ // ModelRefs defines the model references for this decision (currently only one model is supported)
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinItems=1
+ // +kubebuilder:validation:MaxItems=1
+ ModelRefs []ModelRef `json:"modelRefs" yaml:"modelRefs"`
+
+ // Plugins defines the plugins to apply for this decision
+ // +optional
+ // +kubebuilder:validation:MaxItems=10
+ Plugins []DecisionPlugin `json:"plugins,omitempty" yaml:"plugins,omitempty"`
+}
+
+// SignalCombination defines how to combine multiple signals
+type SignalCombination struct {
+ // Operator defines the logical operator for combining conditions (AND/OR)
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:Enum=AND;OR
+ Operator string `json:"operator" yaml:"operator"`
+
+ // Conditions defines the list of signal conditions
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinItems=1
+ // +kubebuilder:validation:MaxItems=50
+ Conditions []SignalCondition `json:"conditions" yaml:"conditions"`
+}
+
+// SignalCondition defines a single signal condition
+type SignalCondition struct {
+ // Type defines the type of signal (keyword/embedding/domain)
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:Enum=keyword;embedding;domain
+ Type string `json:"type" yaml:"type"`
+
+ // Name is the name of the signal to reference
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinLength=1
+ // +kubebuilder:validation:MaxLength=100
+ Name string `json:"name" yaml:"name"`
+}
+
+// ModelRef defines a model reference without score
+type ModelRef struct {
+ // Model is the name of the model (must exist in IntelligentPool)
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinLength=1
+ // +kubebuilder:validation:MaxLength=100
+ Model string `json:"model" yaml:"model"`
+
+ // LoRAName is the name of the LoRA adapter to use (must exist in the model's LoRAs)
+ // +optional
+ // +kubebuilder:validation:MaxLength=100
+ LoRAName string `json:"loraName,omitempty" yaml:"loraName,omitempty"`
+
+ // UseReasoning specifies whether to enable reasoning mode for this model
+ // +optional
+ // +kubebuilder:default=false
+ UseReasoning bool `json:"useReasoning" yaml:"useReasoning"`
+
+ // ReasoningDescription provides context for when to use reasoning
+ // +optional
+ // +kubebuilder:validation:MaxLength=500
+ ReasoningDescription string `json:"reasoningDescription,omitempty" yaml:"reasoningDescription,omitempty"`
+
+ // ReasoningEffort defines the reasoning effort level (low/medium/high)
+ // +optional
+ // +kubebuilder:validation:Enum=low;medium;high
+ ReasoningEffort string `json:"reasoningEffort,omitempty" yaml:"reasoningEffort,omitempty"`
+}
+
+// DecisionPlugin defines a plugin configuration for a decision
+type DecisionPlugin struct {
+ // Type is the plugin type (semantic-cache, jailbreak, pii, system_prompt, header_mutation)
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:Enum=semantic-cache;jailbreak;pii;system_prompt;header_mutation
+ Type string `json:"type" yaml:"type"`
+
+ // Configuration is the plugin-specific configuration as a raw JSON object
+ // +optional
+ // +kubebuilder:pruning:PreserveUnknownFields
+ // +kubebuilder:validation:Schemaless
+ Configuration *runtime.RawExtension `json:"configuration,omitempty" yaml:"configuration,omitempty"`
+}
+
+// ModelScore defines the model selection score (deprecated, use ModelRef instead)
+// This type is kept for backward compatibility with existing CRDs
+type ModelScore struct {
+ // Model is the name of the model (must exist in IntelligentPool)
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:MinLength=1
+ // +kubebuilder:validation:MaxLength=100
+ Model string `json:"model" yaml:"model"`
+
+ // Score is the selection score for this model (0.0-1.0)
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:Minimum=0
+ // +kubebuilder:validation:Maximum=1
+ Score float32 `json:"score" yaml:"score"`
+
+ // LoRAName is the name of the LoRA adapter to use (must exist in the model's LoRAs)
+ // +optional
+ // +kubebuilder:validation:MaxLength=100
+ LoRAName string `json:"loraName,omitempty" yaml:"loraName,omitempty"`
+
+ // UseReasoning specifies whether to enable reasoning mode for this model
+ // +optional
+ // +kubebuilder:default=false
+ UseReasoning bool `json:"useReasoning" yaml:"useReasoning"`
+
+ // ReasoningDescription provides context for when to use reasoning
+ // +optional
+ // +kubebuilder:validation:MaxLength=500
+ ReasoningDescription string `json:"reasoningDescription,omitempty" yaml:"reasoningDescription,omitempty"`
+
+ // ReasoningEffort defines the reasoning effort level (low/medium/high)
+ // +optional
+ // +kubebuilder:validation:Enum=low;medium;high
+ ReasoningEffort string `json:"reasoningEffort,omitempty" yaml:"reasoningEffort,omitempty"`
+}
+
+// IntelligentRouteStatus defines the observed state of IntelligentRoute
+type IntelligentRouteStatus struct {
+ // Conditions represent the latest available observations of the IntelligentRoute's state
+ // +optional
+ Conditions []metav1.Condition `json:"conditions,omitempty" yaml:"conditions,omitempty"`
+
+ // ObservedGeneration reflects the generation of the most recently observed IntelligentRoute
+ // +optional
+ ObservedGeneration int64 `json:"observedGeneration,omitempty" yaml:"observedGeneration,omitempty"`
+
+ // Statistics provides statistics about configured decisions and signals
+ // +optional
+ Statistics *RouteStatistics `json:"statistics,omitempty" yaml:"statistics,omitempty"`
+}
+
+// RouteStatistics provides statistics about the IntelligentRoute configuration
+type RouteStatistics struct {
+ // Decisions indicates the number of decisions
+ Decisions int32 `json:"decisions" yaml:"decisions"`
+
+ // Keywords indicates the number of keyword signals
+ Keywords int32 `json:"keywords" yaml:"keywords"`
+
+ // Embeddings indicates the number of embedding signals
+ Embeddings int32 `json:"embeddings" yaml:"embeddings"`
+
+ // Domains indicates the number of domain signals
+ Domains int32 `json:"domains" yaml:"domains"`
+}
+
+// IntelligentRouteList contains a list of IntelligentRoute
+// +kubebuilder:object:root=true
+type IntelligentRouteList struct {
+ metav1.TypeMeta `json:",inline" yaml:",inline"`
+ metav1.ListMeta `json:"metadata,omitempty" yaml:"metadata,omitempty"`
+ Items []IntelligentRoute `json:"items" yaml:"items"`
+}
diff --git a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/zz_generated.deepcopy.go b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/zz_generated.deepcopy.go
index d4aab7df4..49601221d 100644
--- a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/zz_generated.deepcopy.go
+++ b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/zz_generated.deepcopy.go
@@ -26,452 +26,485 @@ import (
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *Filter) DeepCopyInto(out *Filter) {
+func (in *Decision) DeepCopyInto(out *Decision) {
*out = *in
- if in.Config != nil {
- in, out := &in.Config, &out.Config
- *out = new(runtime.RawExtension)
- (*in).DeepCopyInto(*out)
+ in.Signals.DeepCopyInto(&out.Signals)
+ if in.ModelRefs != nil {
+ in, out := &in.ModelRefs, &out.ModelRefs
+ *out = make([]ModelRef, len(*in))
+ copy(*out, *in)
}
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
+ if in.Plugins != nil {
+ in, out := &in.Plugins, &out.Plugins
+ *out = make([]DecisionPlugin, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
}
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Filter.
-func (in *Filter) DeepCopy() *Filter {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Decision.
+func (in *Decision) DeepCopy() *Decision {
if in == nil {
return nil
}
- out := new(Filter)
+ out := new(Decision)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *FilterCondition) DeepCopyInto(out *FilterCondition) {
+func (in *DecisionPlugin) DeepCopyInto(out *DecisionPlugin) {
*out = *in
+ if in.Configuration != nil {
+ in, out := &in.Configuration, &out.Configuration
+ *out = new(runtime.RawExtension)
+ (*in).DeepCopyInto(*out)
+ }
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FilterCondition.
-func (in *FilterCondition) DeepCopy() *FilterCondition {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DecisionPlugin.
+func (in *DecisionPlugin) DeepCopy() *DecisionPlugin {
if in == nil {
return nil
}
- out := new(FilterCondition)
+ out := new(DecisionPlugin)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *FilterConfigHelper) DeepCopyInto(out *FilterConfigHelper) {
+func (in *DomainSignal) DeepCopyInto(out *DomainSignal) {
*out = *in
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FilterConfigHelper.
-func (in *FilterConfigHelper) DeepCopy() *FilterConfigHelper {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DomainSignal.
+func (in *DomainSignal) DeepCopy() *DomainSignal {
if in == nil {
return nil
}
- out := new(FilterConfigHelper)
+ out := new(DomainSignal)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *Intent) DeepCopyInto(out *Intent) {
+func (in *EmbeddingSignal) DeepCopyInto(out *EmbeddingSignal) {
*out = *in
- if in.Threshold != nil {
- in, out := &in.Threshold, &out.Threshold
- *out = new(float64)
- **out = **in
+ if in.Candidates != nil {
+ in, out := &in.Candidates, &out.Candidates
+ *out = make([]string, len(*in))
+ copy(*out, *in)
}
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Intent.
-func (in *Intent) DeepCopy() *Intent {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingSignal.
+func (in *EmbeddingSignal) DeepCopy() *EmbeddingSignal {
if in == nil {
return nil
}
- out := new(Intent)
+ out := new(EmbeddingSignal)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ModelRef) DeepCopyInto(out *ModelRef) {
+func (in *IntelligentPool) DeepCopyInto(out *IntelligentPool) {
*out = *in
- if in.Weight != nil {
- in, out := &in.Weight, &out.Weight
- *out = new(int32)
- **out = **in
- }
- if in.Priority != nil {
- in, out := &in.Priority, &out.Priority
- *out = new(int32)
- **out = **in
- }
+ out.TypeMeta = in.TypeMeta
+ in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+ in.Spec.DeepCopyInto(&out.Spec)
+ in.Status.DeepCopyInto(&out.Status)
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelRef.
-func (in *ModelRef) DeepCopy() *ModelRef {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IntelligentPool.
+func (in *IntelligentPool) DeepCopy() *IntelligentPool {
if in == nil {
return nil
}
- out := new(ModelRef)
+ out := new(IntelligentPool)
in.DeepCopyInto(out)
return out
}
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *IntelligentPool) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *PIIDetectionConfig) DeepCopyInto(out *PIIDetectionConfig) {
+func (in *IntelligentPoolList) DeepCopyInto(out *IntelligentPoolList) {
*out = *in
- if in.AllowByDefault != nil {
- in, out := &in.AllowByDefault, &out.AllowByDefault
- *out = new(bool)
- **out = **in
+ out.TypeMeta = in.TypeMeta
+ in.ListMeta.DeepCopyInto(&out.ListMeta)
+ if in.Items != nil {
+ in, out := &in.Items, &out.Items
+ *out = make([]IntelligentPool, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
}
- if in.PIITypesAllowed != nil {
- in, out := &in.PIITypesAllowed, &out.PIITypesAllowed
- *out = make([]string, len(*in))
- copy(*out, *in)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IntelligentPoolList.
+func (in *IntelligentPoolList) DeepCopy() *IntelligentPoolList {
+ if in == nil {
+ return nil
}
- if in.Threshold != nil {
- in, out := &in.Threshold, &out.Threshold
- *out = new(float64)
- **out = **in
+ out := new(IntelligentPoolList)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *IntelligentPoolList) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
}
- if in.Action != nil {
- in, out := &in.Action, &out.Action
- *out = new(string)
- **out = **in
+ return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *IntelligentPoolSpec) DeepCopyInto(out *IntelligentPoolSpec) {
+ *out = *in
+ if in.Models != nil {
+ in, out := &in.Models, &out.Models
+ *out = make([]ModelConfig, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
}
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PIIDetectionConfig.
-func (in *PIIDetectionConfig) DeepCopy() *PIIDetectionConfig {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IntelligentPoolSpec.
+func (in *IntelligentPoolSpec) DeepCopy() *IntelligentPoolSpec {
if in == nil {
return nil
}
- out := new(PIIDetectionConfig)
+ out := new(IntelligentPoolSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *PromptGuardConfig) DeepCopyInto(out *PromptGuardConfig) {
+func (in *IntelligentPoolStatus) DeepCopyInto(out *IntelligentPoolStatus) {
*out = *in
- if in.Threshold != nil {
- in, out := &in.Threshold, &out.Threshold
- *out = new(float64)
- **out = **in
- }
- if in.Action != nil {
- in, out := &in.Action, &out.Action
- *out = new(string)
- **out = **in
- }
- if in.CustomRules != nil {
- in, out := &in.CustomRules, &out.CustomRules
- *out = make([]SecurityRule, len(*in))
- copy(*out, *in)
+ if in.Conditions != nil {
+ in, out := &in.Conditions, &out.Conditions
+ *out = make([]v1.Condition, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
}
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PromptGuardConfig.
-func (in *PromptGuardConfig) DeepCopy() *PromptGuardConfig {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IntelligentPoolStatus.
+func (in *IntelligentPoolStatus) DeepCopy() *IntelligentPoolStatus {
if in == nil {
return nil
}
- out := new(PromptGuardConfig)
+ out := new(IntelligentPoolStatus)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ReasoningControlConfig) DeepCopyInto(out *ReasoningControlConfig) {
+func (in *IntelligentRoute) DeepCopyInto(out *IntelligentRoute) {
*out = *in
- if in.ReasonFamily != nil {
- in, out := &in.ReasonFamily, &out.ReasonFamily
- *out = new(string)
- **out = **in
- }
- if in.EnableReasoning != nil {
- in, out := &in.EnableReasoning, &out.EnableReasoning
- *out = new(bool)
- **out = **in
- }
- if in.ReasoningEffort != nil {
- in, out := &in.ReasoningEffort, &out.ReasoningEffort
- *out = new(string)
- **out = **in
- }
- if in.MaxReasoningSteps != nil {
- in, out := &in.MaxReasoningSteps, &out.MaxReasoningSteps
- *out = new(int32)
- **out = **in
- }
- if in.ReasoningTimeout != nil {
- in, out := &in.ReasoningTimeout, &out.ReasoningTimeout
- *out = new(int32)
- **out = **in
- }
+ out.TypeMeta = in.TypeMeta
+ in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+ in.Spec.DeepCopyInto(&out.Spec)
+ in.Status.DeepCopyInto(&out.Status)
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReasoningControlConfig.
-func (in *ReasoningControlConfig) DeepCopy() *ReasoningControlConfig {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IntelligentRoute.
+func (in *IntelligentRoute) DeepCopy() *IntelligentRoute {
if in == nil {
return nil
}
- out := new(ReasoningControlConfig)
+ out := new(IntelligentRoute)
in.DeepCopyInto(out)
return out
}
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *IntelligentRoute) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *RouteRule) DeepCopyInto(out *RouteRule) {
+func (in *IntelligentRouteList) DeepCopyInto(out *IntelligentRouteList) {
*out = *in
- if in.Intents != nil {
- in, out := &in.Intents, &out.Intents
- *out = make([]Intent, len(*in))
+ out.TypeMeta = in.TypeMeta
+ in.ListMeta.DeepCopyInto(&out.ListMeta)
+ if in.Items != nil {
+ in, out := &in.Items, &out.Items
+ *out = make([]IntelligentRoute, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
- if in.ModelRefs != nil {
- in, out := &in.ModelRefs, &out.ModelRefs
- *out = make([]ModelRef, len(*in))
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IntelligentRouteList.
+func (in *IntelligentRouteList) DeepCopy() *IntelligentRouteList {
+ if in == nil {
+ return nil
+ }
+ out := new(IntelligentRouteList)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *IntelligentRouteList) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *IntelligentRouteSpec) DeepCopyInto(out *IntelligentRouteSpec) {
+ *out = *in
+ in.Signals.DeepCopyInto(&out.Signals)
+ if in.Decisions != nil {
+ in, out := &in.Decisions, &out.Decisions
+ *out = make([]Decision, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
- if in.Filters != nil {
- in, out := &in.Filters, &out.Filters
- *out = make([]Filter, len(*in))
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IntelligentRouteSpec.
+func (in *IntelligentRouteSpec) DeepCopy() *IntelligentRouteSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(IntelligentRouteSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *IntelligentRouteStatus) DeepCopyInto(out *IntelligentRouteStatus) {
+ *out = *in
+ if in.Conditions != nil {
+ in, out := &in.Conditions, &out.Conditions
+ *out = make([]v1.Condition, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
- if in.DefaultModel != nil {
- in, out := &in.DefaultModel, &out.DefaultModel
- *out = new(ModelRef)
- (*in).DeepCopyInto(*out)
+ if in.Statistics != nil {
+ in, out := &in.Statistics, &out.Statistics
+ *out = new(RouteStatistics)
+ **out = **in
}
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RouteRule.
-func (in *RouteRule) DeepCopy() *RouteRule {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IntelligentRouteStatus.
+func (in *IntelligentRouteStatus) DeepCopy() *IntelligentRouteStatus {
if in == nil {
return nil
}
- out := new(RouteRule)
+ out := new(IntelligentRouteStatus)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *SecurityRule) DeepCopyInto(out *SecurityRule) {
+func (in *KeywordSignal) DeepCopyInto(out *KeywordSignal) {
*out = *in
+ if in.Keywords != nil {
+ in, out := &in.Keywords, &out.Keywords
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SecurityRule.
-func (in *SecurityRule) DeepCopy() *SecurityRule {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KeywordSignal.
+func (in *KeywordSignal) DeepCopy() *KeywordSignal {
if in == nil {
return nil
}
- out := new(SecurityRule)
+ out := new(KeywordSignal)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *SemanticCacheConfig) DeepCopyInto(out *SemanticCacheConfig) {
+func (in *LoRAConfig) DeepCopyInto(out *LoRAConfig) {
*out = *in
- if in.SimilarityThreshold != nil {
- in, out := &in.SimilarityThreshold, &out.SimilarityThreshold
- *out = new(float64)
- **out = **in
- }
- if in.MaxEntries != nil {
- in, out := &in.MaxEntries, &out.MaxEntries
- *out = new(int32)
- **out = **in
- }
- if in.TTLSeconds != nil {
- in, out := &in.TTLSeconds, &out.TTLSeconds
- *out = new(int32)
- **out = **in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LoRAConfig.
+func (in *LoRAConfig) DeepCopy() *LoRAConfig {
+ if in == nil {
+ return nil
}
- if in.Backend != nil {
- in, out := &in.Backend, &out.Backend
- *out = new(string)
+ out := new(LoRAConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ModelConfig) DeepCopyInto(out *ModelConfig) {
+ *out = *in
+ if in.Pricing != nil {
+ in, out := &in.Pricing, &out.Pricing
+ *out = new(ModelPricing)
**out = **in
}
- if in.BackendConfig != nil {
- in, out := &in.BackendConfig, &out.BackendConfig
- *out = make(map[string]string, len(*in))
- for key, val := range *in {
- (*out)[key] = val
- }
+ if in.LoRAs != nil {
+ in, out := &in.LoRAs, &out.LoRAs
+ *out = make([]LoRAConfig, len(*in))
+ copy(*out, *in)
}
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SemanticCacheConfig.
-func (in *SemanticCacheConfig) DeepCopy() *SemanticCacheConfig {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelConfig.
+func (in *ModelConfig) DeepCopy() *ModelConfig {
if in == nil {
return nil
}
- out := new(SemanticCacheConfig)
+ out := new(ModelConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *SemanticRoute) DeepCopyInto(out *SemanticRoute) {
+func (in *ModelPricing) DeepCopyInto(out *ModelPricing) {
*out = *in
- out.TypeMeta = in.TypeMeta
- in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
- in.Spec.DeepCopyInto(&out.Spec)
- in.Status.DeepCopyInto(&out.Status)
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SemanticRoute.
-func (in *SemanticRoute) DeepCopy() *SemanticRoute {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelPricing.
+func (in *ModelPricing) DeepCopy() *ModelPricing {
if in == nil {
return nil
}
- out := new(SemanticRoute)
+ out := new(ModelPricing)
in.DeepCopyInto(out)
return out
}
-// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
-func (in *SemanticRoute) DeepCopyObject() runtime.Object {
- if c := in.DeepCopy(); c != nil {
- return c
- }
- return nil
-}
-
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *SemanticRouteList) DeepCopyInto(out *SemanticRouteList) {
+func (in *ModelRef) DeepCopyInto(out *ModelRef) {
*out = *in
- out.TypeMeta = in.TypeMeta
- in.ListMeta.DeepCopyInto(&out.ListMeta)
- if in.Items != nil {
- in, out := &in.Items, &out.Items
- *out = make([]SemanticRoute, len(*in))
- for i := range *in {
- (*in)[i].DeepCopyInto(&(*out)[i])
- }
- }
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SemanticRouteList.
-func (in *SemanticRouteList) DeepCopy() *SemanticRouteList {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelRef.
+func (in *ModelRef) DeepCopy() *ModelRef {
if in == nil {
return nil
}
- out := new(SemanticRouteList)
+ out := new(ModelRef)
in.DeepCopyInto(out)
return out
}
-// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
-func (in *SemanticRouteList) DeepCopyObject() runtime.Object {
- if c := in.DeepCopy(); c != nil {
- return c
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ModelScore) DeepCopyInto(out *ModelScore) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelScore.
+func (in *ModelScore) DeepCopy() *ModelScore {
+ if in == nil {
+ return nil
}
- return nil
+ out := new(ModelScore)
+ in.DeepCopyInto(out)
+ return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *SemanticRouteSpec) DeepCopyInto(out *SemanticRouteSpec) {
+func (in *RouteStatistics) DeepCopyInto(out *RouteStatistics) {
*out = *in
- if in.Rules != nil {
- in, out := &in.Rules, &out.Rules
- *out = make([]RouteRule, len(*in))
- for i := range *in {
- (*in)[i].DeepCopyInto(&(*out)[i])
- }
- }
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SemanticRouteSpec.
-func (in *SemanticRouteSpec) DeepCopy() *SemanticRouteSpec {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RouteStatistics.
+func (in *RouteStatistics) DeepCopy() *RouteStatistics {
if in == nil {
return nil
}
- out := new(SemanticRouteSpec)
+ out := new(RouteStatistics)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *SemanticRouteStatus) DeepCopyInto(out *SemanticRouteStatus) {
+func (in *SignalCombination) DeepCopyInto(out *SignalCombination) {
*out = *in
if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions
- *out = make([]v1.Condition, len(*in))
- for i := range *in {
- (*in)[i].DeepCopyInto(&(*out)[i])
- }
+ *out = make([]SignalCondition, len(*in))
+ copy(*out, *in)
}
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SemanticRouteStatus.
-func (in *SemanticRouteStatus) DeepCopy() *SemanticRouteStatus {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SignalCombination.
+func (in *SignalCombination) DeepCopy() *SignalCombination {
if in == nil {
return nil
}
- out := new(SemanticRouteStatus)
+ out := new(SignalCombination)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ToolSelectionConfig) DeepCopyInto(out *ToolSelectionConfig) {
+func (in *SignalCondition) DeepCopyInto(out *SignalCondition) {
*out = *in
- if in.TopK != nil {
- in, out := &in.TopK, &out.TopK
- *out = new(int32)
- **out = **in
- }
- if in.SimilarityThreshold != nil {
- in, out := &in.SimilarityThreshold, &out.SimilarityThreshold
- *out = new(float64)
- **out = **in
- }
- if in.ToolsDBPath != nil {
- in, out := &in.ToolsDBPath, &out.ToolsDBPath
- *out = new(string)
- **out = **in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SignalCondition.
+func (in *SignalCondition) DeepCopy() *SignalCondition {
+ if in == nil {
+ return nil
}
- if in.FallbackToEmpty != nil {
- in, out := &in.FallbackToEmpty, &out.FallbackToEmpty
- *out = new(bool)
- **out = **in
+ out := new(SignalCondition)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *Signals) DeepCopyInto(out *Signals) {
+ *out = *in
+ if in.Keywords != nil {
+ in, out := &in.Keywords, &out.Keywords
+ *out = make([]KeywordSignal, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
}
- if in.Categories != nil {
- in, out := &in.Categories, &out.Categories
- *out = make([]string, len(*in))
- copy(*out, *in)
+ if in.Embeddings != nil {
+ in, out := &in.Embeddings, &out.Embeddings
+ *out = make([]EmbeddingSignal, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
}
- if in.Tags != nil {
- in, out := &in.Tags, &out.Tags
- *out = make([]string, len(*in))
+ if in.Domains != nil {
+ in, out := &in.Domains, &out.Domains
+ *out = make([]DomainSignal, len(*in))
copy(*out, *in)
}
}
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ToolSelectionConfig.
-func (in *ToolSelectionConfig) DeepCopy() *ToolSelectionConfig {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Signals.
+func (in *Signals) DeepCopy() *Signals {
if in == nil {
return nil
}
- out := new(ToolSelectionConfig)
+ out := new(Signals)
in.DeepCopyInto(out)
return out
}
diff --git a/src/semantic-router/pkg/apiserver/route_system_prompt.go b/src/semantic-router/pkg/apiserver/route_system_prompt.go
index 0aeb7222a..6a060d0d2 100644
--- a/src/semantic-router/pkg/apiserver/route_system_prompt.go
+++ b/src/semantic-router/pkg/apiserver/route_system_prompt.go
@@ -38,12 +38,17 @@ func (s *ClassificationAPIServer) handleGetSystemPrompts(w http.ResponseWriter,
}
var systemPrompts []SystemPromptInfo
- for _, category := range cfg.Categories {
+ for _, decision := range cfg.Decisions {
+ systemPromptConfig := decision.GetSystemPromptConfig()
+ prompt := ""
+ if systemPromptConfig != nil {
+ prompt = systemPromptConfig.SystemPrompt
+ }
systemPrompts = append(systemPrompts, SystemPromptInfo{
- Category: category.Name,
- Prompt: category.SystemPrompt,
- Enabled: category.IsSystemPromptEnabled(),
- Mode: category.GetSystemPromptMode(),
+ Category: decision.Name,
+ Prompt: prompt,
+ Enabled: decision.IsSystemPromptEnabled(),
+ Mode: decision.GetSystemPromptMode(),
})
}
@@ -85,50 +90,78 @@ func (s *ClassificationAPIServer) handleUpdateSystemPrompts(w http.ResponseWrite
// Create a copy of the config to modify
newCfg := *cfg
- newCategories := make([]config.Category, len(cfg.Categories))
- copy(newCategories, cfg.Categories)
- newCfg.Categories = newCategories
+ newDecisions := make([]config.Decision, len(cfg.Decisions))
+ copy(newDecisions, cfg.Decisions)
+ newCfg.Decisions = newDecisions
updated := false
if req.Category == "" {
- // Update all categories
- for i := range newCfg.Categories {
- if newCfg.Categories[i].SystemPrompt != "" {
- if req.Enabled != nil {
- newCfg.Categories[i].SystemPromptEnabled = req.Enabled
- }
- if req.Mode != "" {
- newCfg.Categories[i].SystemPromptMode = req.Mode
+ // Update all decisions
+ for i := range newCfg.Decisions {
+ systemPromptConfig := newCfg.Decisions[i].GetSystemPromptConfig()
+ if systemPromptConfig != nil && systemPromptConfig.SystemPrompt != "" {
+ // Update the plugin configuration
+ for j := range newCfg.Decisions[i].Plugins {
+ if newCfg.Decisions[i].Plugins[j].Type == "system_prompt" {
+ // Convert Configuration to map[string]interface{}
+ configMap, ok := newCfg.Decisions[i].Plugins[j].Configuration.(map[string]interface{})
+ if !ok {
+ // If not a map, create a new one
+ configMap = make(map[string]interface{})
+ }
+ if req.Enabled != nil {
+ configMap["enabled"] = *req.Enabled
+ }
+ if req.Mode != "" {
+ configMap["mode"] = req.Mode
+ }
+ newCfg.Decisions[i].Plugins[j].Configuration = configMap
+ updated = true
+ break
+ }
}
- updated = true
}
}
} else {
- // Update specific category
- for i := range newCfg.Categories {
- if newCfg.Categories[i].Name == req.Category {
- if newCfg.Categories[i].SystemPrompt == "" {
- http.Error(w, fmt.Sprintf("Category '%s' has no system prompt configured", req.Category), http.StatusBadRequest)
+ // Update specific decision
+ for i := range newCfg.Decisions {
+ if newCfg.Decisions[i].Name == req.Category {
+ systemPromptConfig := newCfg.Decisions[i].GetSystemPromptConfig()
+ if systemPromptConfig == nil || systemPromptConfig.SystemPrompt == "" {
+ http.Error(w, fmt.Sprintf("Decision '%s' has no system prompt configured", req.Category), http.StatusBadRequest)
return
}
- if req.Enabled != nil {
- newCfg.Categories[i].SystemPromptEnabled = req.Enabled
+ // Update the plugin configuration
+ for j := range newCfg.Decisions[i].Plugins {
+ if newCfg.Decisions[i].Plugins[j].Type == "system_prompt" {
+ // Convert Configuration to map[string]interface{}
+ configMap, ok := newCfg.Decisions[i].Plugins[j].Configuration.(map[string]interface{})
+ if !ok {
+ // If not a map, create a new one
+ configMap = make(map[string]interface{})
+ }
+ if req.Enabled != nil {
+ configMap["enabled"] = *req.Enabled
+ }
+ if req.Mode != "" {
+ configMap["mode"] = req.Mode
+ }
+ newCfg.Decisions[i].Plugins[j].Configuration = configMap
+ updated = true
+ break
+ }
}
- if req.Mode != "" {
- newCfg.Categories[i].SystemPromptMode = req.Mode
- }
- updated = true
break
}
}
if !updated {
- http.Error(w, fmt.Sprintf("Category '%s' not found", req.Category), http.StatusNotFound)
+ http.Error(w, fmt.Sprintf("Decision '%s' not found", req.Category), http.StatusNotFound)
return
}
}
if !updated {
- http.Error(w, "No categories with system prompts found to update", http.StatusBadRequest)
+ http.Error(w, "No decisions with system prompts found to update", http.StatusBadRequest)
return
}
@@ -136,14 +169,19 @@ func (s *ClassificationAPIServer) handleUpdateSystemPrompts(w http.ResponseWrite
s.config = &newCfg
s.classificationSvc.UpdateConfig(&newCfg)
- // Return the updated system prompts
+ // Return the updated system prompts from decisions
var systemPrompts []SystemPromptInfo
- for _, category := range newCfg.Categories {
+ for _, decision := range newCfg.Decisions {
+ systemPromptConfig := decision.GetSystemPromptConfig()
+ prompt := ""
+ if systemPromptConfig != nil {
+ prompt = systemPromptConfig.SystemPrompt
+ }
systemPrompts = append(systemPrompts, SystemPromptInfo{
- Category: category.Name,
- Prompt: category.SystemPrompt,
- Enabled: category.IsSystemPromptEnabled(),
- Mode: category.GetSystemPromptMode(),
+ Category: decision.Name,
+ Prompt: prompt,
+ Enabled: decision.IsSystemPromptEnabled(),
+ Mode: decision.GetSystemPromptMode(),
})
}
diff --git a/src/semantic-router/pkg/apiserver/server.go b/src/semantic-router/pkg/apiserver/server.go
index cda5a8402..86b4a348c 100644
--- a/src/semantic-router/pkg/apiserver/server.go
+++ b/src/semantic-router/pkg/apiserver/server.go
@@ -17,10 +17,11 @@ import (
// Init starts the API server
func Init(configPath string, port int, enableSystemPromptAPI bool) error {
- // Load configuration
- cfg, err := config.Load(configPath)
- if err != nil {
- return fmt.Errorf("failed to load config: %w", err)
+ // Get the global configuration instead of loading from file
+ // This ensures we use the same config as the rest of the application
+ cfg := config.Get()
+ if cfg == nil {
+ return fmt.Errorf("configuration not initialized")
}
// Create classification service - try to get global service with retry
@@ -39,7 +40,7 @@ func Init(configPath string, port int, enableSystemPromptAPI bool) error {
}
// Initialize batch metrics configuration
- if cfg != nil && cfg.API.BatchClassification.Metrics.Enabled {
+ if cfg.API.BatchClassification.Metrics.Enabled {
metricsConfig := metrics.BatchMetricsConfig{
Enabled: cfg.API.BatchClassification.Metrics.Enabled,
DetailedGoroutineTracking: cfg.API.BatchClassification.Metrics.DetailedGoroutineTracking,
diff --git a/src/semantic-router/pkg/apiserver/server_test.go b/src/semantic-router/pkg/apiserver/server_test.go
index d98040fb9..ea52ace8f 100644
--- a/src/semantic-router/pkg/apiserver/server_test.go
+++ b/src/semantic-router/pkg/apiserver/server_test.go
@@ -449,387 +449,6 @@ func TestOpenAIModelsEndpointWithConfigModels(t *testing.T) {
}
}
-// TestSystemPromptEndpointSecurity tests that system prompt endpoints are only accessible when explicitly enabled
-func TestSystemPromptEndpointSecurity(t *testing.T) {
- // Create test configuration with categories that have system prompts
- cfg := &config.RouterConfig{
- IntelligentRouting: config.IntelligentRouting{
- Categories: []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "math",
- },
- DomainAwarePolicies: config.DomainAwarePolicies{
- SystemPromptPolicy: config.SystemPromptPolicy{
- SystemPrompt: "You are a math expert.",
- SystemPromptEnabled: &[]bool{true}[0], // Pointer to true
- SystemPromptMode: "replace",
- },
- },
- },
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "coding",
- },
- DomainAwarePolicies: config.DomainAwarePolicies{
- SystemPromptPolicy: config.SystemPromptPolicy{
- SystemPrompt: "You are a coding assistant.",
- SystemPromptEnabled: &[]bool{false}[0], // Pointer to false
- SystemPromptMode: "insert",
- },
- },
- },
- },
- },
- }
-
- tests := []struct {
- name string
- enableSystemPromptAPI bool
- method string
- path string
- requestBody string
- expectedStatus int
- description string
- }{
- {
- name: "GET system prompts - disabled API",
- enableSystemPromptAPI: false,
- method: "GET",
- path: "/config/system-prompts",
- expectedStatus: http.StatusNotFound,
- description: "Should return 404 when system prompt API is disabled",
- },
- {
- name: "PUT system prompts - disabled API",
- enableSystemPromptAPI: false,
- method: "PUT",
- path: "/config/system-prompts",
- requestBody: `{"enabled": true}`,
- expectedStatus: http.StatusNotFound,
- description: "Should return 404 when system prompt API is disabled",
- },
- {
- name: "GET system prompts - enabled API",
- enableSystemPromptAPI: true,
- method: "GET",
- path: "/config/system-prompts",
- expectedStatus: http.StatusOK,
- description: "Should return 200 when system prompt API is enabled",
- },
- {
- name: "PUT system prompts - enabled API - valid request",
- enableSystemPromptAPI: true,
- method: "PUT",
- path: "/config/system-prompts",
- requestBody: `{"category": "math", "enabled": false}`,
- expectedStatus: http.StatusOK,
- description: "Should return 200 for valid PUT request when API is enabled",
- },
- {
- name: "PUT system prompts - enabled API - invalid request",
- enableSystemPromptAPI: true,
- method: "PUT",
- path: "/config/system-prompts",
- requestBody: `{"category": "nonexistent"}`,
- expectedStatus: http.StatusBadRequest,
- description: "Should return 400 for invalid PUT request",
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- // Create a test server that simulates the behavior
- var mux *http.ServeMux
- if tt.enableSystemPromptAPI {
- // Simulate enabled API - create a server that has the endpoints
- mux = http.NewServeMux()
- mux.HandleFunc("GET /health", func(w http.ResponseWriter, r *http.Request) {
- w.WriteHeader(http.StatusOK)
- })
- mux.HandleFunc("GET /config/classification", func(w http.ResponseWriter, r *http.Request) {
- w.WriteHeader(http.StatusOK)
- })
- mux.HandleFunc("PUT /config/classification", func(w http.ResponseWriter, r *http.Request) {
- w.WriteHeader(http.StatusOK)
- })
- // Add system prompt endpoints when enabled
- mux.HandleFunc("GET /config/system-prompts", func(w http.ResponseWriter, r *http.Request) {
- // Create a test server instance with config for the handler
- testServerWithConfig := &ClassificationAPIServer{
- classificationSvc: services.NewPlaceholderClassificationService(),
- config: cfg,
- enableSystemPromptAPI: true,
- }
- testServerWithConfig.handleGetSystemPrompts(w, r)
- })
- mux.HandleFunc("PUT /config/system-prompts", func(w http.ResponseWriter, r *http.Request) {
- // Create a test server instance with config for the handler
- testServerWithConfig := &ClassificationAPIServer{
- classificationSvc: services.NewPlaceholderClassificationService(),
- config: cfg,
- enableSystemPromptAPI: true,
- }
- testServerWithConfig.handleUpdateSystemPrompts(w, r)
- })
- } else {
- // Simulate disabled API - create a server without the endpoints
- mux = http.NewServeMux()
- mux.HandleFunc("GET /health", func(w http.ResponseWriter, r *http.Request) {
- w.WriteHeader(http.StatusOK)
- })
- mux.HandleFunc("GET /config/classification", func(w http.ResponseWriter, r *http.Request) {
- w.WriteHeader(http.StatusOK)
- })
- mux.HandleFunc("PUT /config/classification", func(w http.ResponseWriter, r *http.Request) {
- w.WriteHeader(http.StatusOK)
- })
- // System prompt endpoints are NOT registered when disabled
- }
-
- // Create request
- var req *http.Request
- if tt.requestBody != "" {
- req = httptest.NewRequest(tt.method, tt.path, bytes.NewBufferString(tt.requestBody))
- req.Header.Set("Content-Type", "application/json")
- } else {
- req = httptest.NewRequest(tt.method, tt.path, nil)
- }
-
- rr := httptest.NewRecorder()
-
- // Serve the request
- mux.ServeHTTP(rr, req)
-
- // Check status code
- if rr.Code != tt.expectedStatus {
- t.Errorf("%s: expected status %d, got %d. Response: %s",
- tt.description, tt.expectedStatus, rr.Code, rr.Body.String())
- }
-
- // Additional checks for specific cases
- if tt.enableSystemPromptAPI && tt.method == "GET" && tt.expectedStatus == http.StatusOK {
- // Verify the response structure for GET requests
- var response SystemPromptsResponse
- if err := json.Unmarshal(rr.Body.Bytes(), &response); err != nil {
- t.Errorf("Failed to unmarshal GET response: %v", err)
- }
-
- // Should have system prompts from config
- if len(response.SystemPrompts) != 2 {
- t.Errorf("Expected 2 system prompts, got %d", len(response.SystemPrompts))
- }
-
- // Verify the content
- foundMath := false
- foundCoding := false
- for _, sp := range response.SystemPrompts {
- if sp.Category == "math" {
- foundMath = true
- if sp.Prompt != "You are a math expert." {
- t.Errorf("Expected math prompt 'You are a math expert.', got '%s'", sp.Prompt)
- }
- if !sp.Enabled {
- t.Errorf("Expected math category to be enabled")
- }
- if sp.Mode != "replace" {
- t.Errorf("Expected math mode 'replace', got '%s'", sp.Mode)
- }
- }
- if sp.Category == "coding" {
- foundCoding = true
- if sp.Enabled {
- t.Errorf("Expected coding category to be disabled")
- }
- if sp.Mode != "insert" {
- t.Errorf("Expected coding mode 'insert', got '%s'", sp.Mode)
- }
- }
- }
-
- if !foundMath || !foundCoding {
- t.Errorf("Expected to find both math and coding categories")
- }
- }
- })
- }
-}
-
-// TestSystemPromptEndpointFunctionality tests the actual functionality of system prompt endpoints
-func TestSystemPromptEndpointFunctionality(t *testing.T) {
- // Create test configuration
- cfg := &config.RouterConfig{
- IntelligentRouting: config.IntelligentRouting{
- Categories: []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "math",
- },
- DomainAwarePolicies: config.DomainAwarePolicies{
- SystemPromptPolicy: config.SystemPromptPolicy{
- SystemPrompt: "You are a math expert.",
- SystemPromptEnabled: &[]bool{true}[0],
- SystemPromptMode: "replace",
- },
- },
- },
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "no-prompt",
- },
- DomainAwarePolicies: config.DomainAwarePolicies{
- SystemPromptPolicy: config.SystemPromptPolicy{
- SystemPrompt: "", // No system prompt
- },
- },
- },
- },
- },
- }
-
- // Create a test server with the config for functionality testing
- apiServer := &ClassificationAPIServer{
- classificationSvc: services.NewPlaceholderClassificationService(),
- config: cfg,
- enableSystemPromptAPI: true, // Enable for functionality testing
- }
-
- t.Run("GET system prompts returns correct data", func(t *testing.T) {
- req := httptest.NewRequest("GET", "/config/system-prompts", nil)
- rr := httptest.NewRecorder()
-
- apiServer.handleGetSystemPrompts(rr, req)
-
- if rr.Code != http.StatusOK {
- t.Fatalf("Expected 200, got %d", rr.Code)
- }
-
- var response SystemPromptsResponse
- if err := json.Unmarshal(rr.Body.Bytes(), &response); err != nil {
- t.Fatalf("Failed to unmarshal response: %v", err)
- }
-
- if len(response.SystemPrompts) != 2 {
- t.Errorf("Expected 2 categories, got %d", len(response.SystemPrompts))
- }
- })
-
- t.Run("PUT system prompts - enable specific category", func(t *testing.T) {
- requestBody := `{"category": "math", "enabled": false}`
- req := httptest.NewRequest("PUT", "/config/system-prompts", bytes.NewBufferString(requestBody))
- req.Header.Set("Content-Type", "application/json")
- rr := httptest.NewRecorder()
-
- apiServer.handleUpdateSystemPrompts(rr, req)
-
- if rr.Code != http.StatusOK {
- t.Fatalf("Expected 200, got %d. Response: %s", rr.Code, rr.Body.String())
- }
-
- var response SystemPromptsResponse
- if err := json.Unmarshal(rr.Body.Bytes(), &response); err != nil {
- t.Fatalf("Failed to unmarshal response: %v", err)
- }
-
- // Find the math category and verify it's disabled
- for _, sp := range response.SystemPrompts {
- if sp.Category == "math" && sp.Enabled {
- t.Errorf("Expected math category to be disabled after PUT request")
- }
- }
- })
-
- t.Run("PUT system prompts - change mode", func(t *testing.T) {
- requestBody := `{"category": "math", "mode": "insert"}`
- req := httptest.NewRequest("PUT", "/config/system-prompts", bytes.NewBufferString(requestBody))
- req.Header.Set("Content-Type", "application/json")
- rr := httptest.NewRecorder()
-
- apiServer.handleUpdateSystemPrompts(rr, req)
-
- if rr.Code != http.StatusOK {
- t.Fatalf("Expected 200, got %d. Response: %s", rr.Code, rr.Body.String())
- }
-
- var response SystemPromptsResponse
- if err := json.Unmarshal(rr.Body.Bytes(), &response); err != nil {
- t.Fatalf("Failed to unmarshal response: %v", err)
- }
-
- // Find the math category and verify mode is changed
- for _, sp := range response.SystemPrompts {
- if sp.Category == "math" && sp.Mode != "insert" {
- t.Errorf("Expected math category mode to be 'insert', got '%s'", sp.Mode)
- }
- }
- })
-
- t.Run("PUT system prompts - update all categories", func(t *testing.T) {
- requestBody := `{"enabled": true}` // No category specified = update all
- req := httptest.NewRequest("PUT", "/config/system-prompts", bytes.NewBufferString(requestBody))
- req.Header.Set("Content-Type", "application/json")
- rr := httptest.NewRecorder()
-
- apiServer.handleUpdateSystemPrompts(rr, req)
-
- if rr.Code != http.StatusOK {
- t.Fatalf("Expected 200, got %d. Response: %s", rr.Code, rr.Body.String())
- }
- })
-
- t.Run("PUT system prompts - invalid category", func(t *testing.T) {
- requestBody := `{"category": "nonexistent", "enabled": true}`
- req := httptest.NewRequest("PUT", "/config/system-prompts", bytes.NewBufferString(requestBody))
- req.Header.Set("Content-Type", "application/json")
- rr := httptest.NewRecorder()
-
- apiServer.handleUpdateSystemPrompts(rr, req)
-
- if rr.Code != http.StatusNotFound {
- t.Errorf("Expected 404 for nonexistent category, got %d", rr.Code)
- }
- })
-
- t.Run("PUT system prompts - category without system prompt", func(t *testing.T) {
- requestBody := `{"category": "no-prompt", "enabled": true}`
- req := httptest.NewRequest("PUT", "/config/system-prompts", bytes.NewBufferString(requestBody))
- req.Header.Set("Content-Type", "application/json")
- rr := httptest.NewRecorder()
-
- apiServer.handleUpdateSystemPrompts(rr, req)
-
- if rr.Code != http.StatusBadRequest {
- t.Errorf("Expected 400 for category without system prompt, got %d", rr.Code)
- }
- })
-
- t.Run("PUT system prompts - invalid mode", func(t *testing.T) {
- requestBody := `{"category": "math", "mode": "invalid"}`
- req := httptest.NewRequest("PUT", "/config/system-prompts", bytes.NewBufferString(requestBody))
- req.Header.Set("Content-Type", "application/json")
- rr := httptest.NewRecorder()
-
- apiServer.handleUpdateSystemPrompts(rr, req)
-
- if rr.Code != http.StatusBadRequest {
- t.Errorf("Expected 400 for invalid mode, got %d", rr.Code)
- }
- })
-
- t.Run("PUT system prompts - empty request", func(t *testing.T) {
- requestBody := `{}`
- req := httptest.NewRequest("PUT", "/config/system-prompts", bytes.NewBufferString(requestBody))
- req.Header.Set("Content-Type", "application/json")
- rr := httptest.NewRecorder()
-
- apiServer.handleUpdateSystemPrompts(rr, req)
-
- if rr.Code != http.StatusBadRequest {
- t.Errorf("Expected 400 for empty request, got %d", rr.Code)
- }
- })
-}
-
// TestSetupRoutesSecurityBehavior tests that setupRoutes correctly includes/excludes endpoints based on security flag
func TestSetupRoutesSecurityBehavior(t *testing.T) {
tests := []struct {
diff --git a/src/semantic-router/pkg/classification/classifier.go b/src/semantic-router/pkg/classification/classifier.go
index 608132600..9e737a12b 100644
--- a/src/semantic-router/pkg/classification/classifier.go
+++ b/src/semantic-router/pkg/classification/classifier.go
@@ -8,6 +8,7 @@ import (
candle_binding "github.com/vllm-project/semantic-router/candle-binding"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/decision"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/metrics"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/entropy"
@@ -511,6 +512,97 @@ func (c *Classifier) initializePIIClassifier() error {
return c.piiInitializer.Init(c.Config.PIIModel.ModelID, c.Config.PIIModel.UseCPU)
}
+// EvaluateAllRules evaluates all rule types and returns matched rule names
+// Returns (matchedKeywordRules, matchedEmbeddingRules, matchedDomainRules)
+// matchedKeywordRules: list of matched keyword rule category names
+// matchedEmbeddingRules: list of matched embedding rule category names
+// matchedDomainRules: list of matched domain rule category names (from category classification)
+func (c *Classifier) EvaluateAllRules(text string) ([]string, []string, []string, error) {
+ var matchedKeywordRules []string
+ var matchedEmbeddingRules []string
+ var matchedDomainRules []string
+
+ // Evaluate keyword rules - check each rule individually
+ if c.keywordClassifier != nil {
+ category, _, err := c.keywordClassifier.Classify(text)
+ if err != nil {
+ return nil, nil, nil, fmt.Errorf("keyword rule evaluation failed: %w", err)
+ }
+ if category != "" {
+ matchedKeywordRules = append(matchedKeywordRules, category)
+ }
+ }
+
+ // Evaluate embedding rules - check each rule individually
+ if c.keywordEmbeddingClassifier != nil {
+ category, _, err := c.keywordEmbeddingClassifier.Classify(text)
+ if err != nil {
+ return nil, nil, nil, fmt.Errorf("embedding rule evaluation failed: %w", err)
+ }
+ if category != "" {
+ matchedEmbeddingRules = append(matchedEmbeddingRules, category)
+ }
+ }
+
+ // Evaluate domain rules (category classification)
+ if c.IsCategoryEnabled() && c.categoryInference != nil && c.CategoryMapping != nil {
+ result, err := c.categoryInference.Classify(text)
+ if err != nil {
+ return nil, nil, nil, fmt.Errorf("domain rule evaluation failed: %w", err)
+ }
+ // Map class index to category name
+ if categoryName, ok := c.CategoryMapping.GetCategoryFromIndex(result.Class); ok {
+ if categoryName != "" {
+ matchedDomainRules = append(matchedDomainRules, categoryName)
+ }
+ }
+ }
+
+ return matchedKeywordRules, matchedEmbeddingRules, matchedDomainRules, nil
+}
+
+// EvaluateDecisionWithEngine evaluates all decisions using the DecisionEngine
+// Returns the best matching decision based on the configured strategy
+func (c *Classifier) EvaluateDecisionWithEngine(text string) (*decision.DecisionResult, error) {
+ // Check if decisions are configured
+ if len(c.Config.Decisions) == 0 {
+ return nil, fmt.Errorf("no decisions configured")
+ }
+
+ // Evaluate all rules
+ matchedKeywordRules, matchedEmbeddingRules, matchedDomainRules, err := c.EvaluateAllRules(text)
+ if err != nil {
+ return nil, fmt.Errorf("failed to evaluate rules: %w", err)
+ }
+
+ logging.Infof("Rule evaluation results: keyword=%v, embedding=%v, domain=%v",
+ matchedKeywordRules, matchedEmbeddingRules, matchedDomainRules)
+
+ // Create decision engine
+ engine := decision.NewDecisionEngine(
+ c.Config.KeywordRules,
+ c.Config.EmbeddingRules,
+ c.Config.Categories,
+ c.Config.Decisions,
+ c.Config.Strategy,
+ )
+
+ // Evaluate decisions
+ result, err := engine.EvaluateDecisions(
+ matchedKeywordRules,
+ matchedEmbeddingRules,
+ matchedDomainRules,
+ )
+ if err != nil {
+ return nil, fmt.Errorf("decision evaluation failed: %w", err)
+ }
+
+ logging.Infof("Decision evaluation result: decision=%s, confidence=%.3f, matched_rules=%v",
+ result.Decision.Name, result.Confidence, result.MatchedRules)
+
+ return result, nil
+}
+
// ClassifyCategoryWithEntropy performs category classification with entropy-based reasoning decision
func (c *Classifier) ClassifyCategoryWithEntropy(text string) (string, float64, entropy.ReasoningDecision, error) {
// Try keyword classifier first
@@ -559,15 +651,15 @@ func (c *Classifier) ClassifyCategoryWithEntropy(text string) (string, float64,
// makeReasoningDecisionForKeywordCategory creates a reasoning decision for keyword-matched categories
func (c *Classifier) makeReasoningDecisionForKeywordCategory(category string) entropy.ReasoningDecision {
- // Find the category configuration
+ // Find the decision configuration
normalizedCategory := strings.ToLower(strings.TrimSpace(category))
useReasoning := false
- for _, cat := range c.Config.Categories {
- if strings.ToLower(cat.Name) == normalizedCategory {
- // Check if the category has reasoning enabled in its best model
- if len(cat.ModelScores) > 0 && cat.ModelScores[0].UseReasoning != nil {
- useReasoning = *cat.ModelScores[0].UseReasoning
+ for _, decision := range c.Config.Decisions {
+ if strings.ToLower(decision.Name) == normalizedCategory {
+ // Check if the decision has reasoning enabled in its best model
+ if len(decision.ModelRefs) > 0 && decision.ModelRefs[0].UseReasoning != nil {
+ useReasoning = *decision.ModelRefs[0].UseReasoning
}
break
}
@@ -618,16 +710,16 @@ func (c *Classifier) classifyCategoryWithEntropyInTree(text string) (string, flo
}
}
- // Build category reasoning map from configuration
- // Use the best model's reasoning capability for each category
+ // Build decision reasoning map from configuration
+ // Use the best model's reasoning capability for each decision
categoryReasoningMap := make(map[string]bool)
- for _, category := range c.Config.Categories {
+ for _, decision := range c.Config.Decisions {
useReasoning := false
- if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil {
+ if len(decision.ModelRefs) > 0 && decision.ModelRefs[0].UseReasoning != nil {
// Use the first (best) model's reasoning capability
- useReasoning = *category.ModelScores[0].UseReasoning
+ useReasoning = *decision.ModelRefs[0].UseReasoning
}
- categoryReasoningMap[strings.ToLower(category.Name)] = useReasoning
+ categoryReasoningMap[strings.ToLower(decision.Name)] = useReasoning
}
// Make entropy-based reasoning decision
@@ -854,39 +946,38 @@ func (c *Classifier) AnalyzeContentForPIIWithThreshold(contentList []string, thr
return hasPII, analysisResults, nil
}
-// SelectBestModelForCategory selects the best model from a category based on score and TTFT
+// SelectBestModelForCategory selects the best model from a decision based on score and TTFT
func (c *Classifier) SelectBestModelForCategory(categoryName string) string {
- cat := c.findCategory(categoryName)
- if cat == nil {
- logging.Warnf("Could not find matching category %s in config, using default model", categoryName)
+ decision := c.findDecision(categoryName)
+ if decision == nil {
+ logging.Warnf("Could not find matching decision %s in config, using default model", categoryName)
return c.Config.DefaultModel
}
- bestModel, bestScore := c.selectBestModelInternal(cat, nil)
+ bestModel, bestScore := c.selectBestModelInternalForDecision(decision, nil)
if bestModel == "" {
- logging.Warnf("No models found for category %s, using default model", categoryName)
+ logging.Warnf("No models found for decision %s, using default model", categoryName)
return c.Config.DefaultModel
}
- logging.Infof("Selected model %s for category %s with score %.4f", bestModel, categoryName, bestScore)
+ logging.Infof("Selected model %s for decision %s with score %.4f", bestModel, categoryName, bestScore)
return bestModel
}
-// findCategory finds the category configuration by name (case-insensitive)
-func (c *Classifier) findCategory(categoryName string) *config.Category {
- for i, category := range c.Config.Categories {
- if strings.EqualFold(category.Name, categoryName) {
- return &c.Config.Categories[i]
+// findDecision finds the decision configuration by name (case-insensitive)
+func (c *Classifier) findDecision(decisionName string) *config.Decision {
+ for i, decision := range c.Config.Decisions {
+ if strings.EqualFold(decision.Name, decisionName) {
+ return &c.Config.Decisions[i]
}
}
return nil
}
-// GetCategoryByName returns the category configuration by name (case-insensitive)
-// This is a public method that can be used by other packages to get category information
-func (c *Classifier) GetCategoryByName(categoryName string) *config.Category {
- return c.findCategory(categoryName)
+// GetDecisionByName returns the decision configuration by name (case-insensitive)
+func (c *Classifier) GetDecisionByName(decisionName string) *config.Decision {
+ return c.findDecision(decisionName)
}
// GetCategorySystemPrompt returns the system prompt for a specific category if available.
@@ -955,78 +1046,70 @@ func (c *Classifier) translateMMLUToGeneric(mmluCategory string) string {
return mmluCategory
}
-// selectBestModelInternal performs the core model selection logic
+// selectBestModelInternalForDecision performs the core model selection logic for decisions
//
// modelFilter is optional - if provided, only models passing the filter will be considered
-func (c *Classifier) selectBestModelInternal(cat *config.Category, modelFilter func(string) bool) (string, float64) {
+func (c *Classifier) selectBestModelInternalForDecision(decision *config.Decision, modelFilter func(string) bool) (string, float64) {
bestModel := ""
- bestScore := -1.0
- c.forEachModelScore(cat, func(modelScore config.ModelScore) {
- model := modelScore.Model
- if modelFilter != nil && !modelFilter(model) {
- return
- }
- // Use LoRA name if specified, otherwise use the base model name
- // This enables intent-aware LoRA routing where the final model name
- // in the request becomes the LoRA adapter name
- finalModelName := model
- if modelScore.LoRAName != "" {
- finalModelName = modelScore.LoRAName
- logging.Debugf("Using LoRA adapter '%s' for base model '%s'", finalModelName, model)
+ // With new architecture, we only support one model per decision (first ModelRef)
+ if len(decision.ModelRefs) > 0 {
+ modelRef := decision.ModelRefs[0]
+ model := modelRef.Model
+
+ if modelFilter == nil || modelFilter(model) {
+ // Use LoRA name if specified, otherwise use the base model name
+ finalModelName := model
+ if modelRef.LoRAName != "" {
+ finalModelName = modelRef.LoRAName
+ logging.Debugf("Using LoRA adapter '%s' for base model '%s'", finalModelName, model)
+ }
+ bestModel = finalModelName
}
- c.updateBestModel(modelScore.Score, finalModelName, &bestScore, &bestModel)
- })
-
- return bestModel, bestScore
-}
-
-// forEachModelScore traverses the ModelScores document of the category and executes the callback for each element.
-func (c *Classifier) forEachModelScore(cat *config.Category, fn func(modelScore config.ModelScore)) {
- for _, modelScore := range cat.ModelScores {
- fn(modelScore)
}
+
+ return bestModel, 1.0 // Return score 1.0 since we don't have scores anymore
}
-// SelectBestModelFromList selects the best model from a list of candidate models for a given category
+// SelectBestModelFromList selects the best model from a list of candidate models for a given decision
func (c *Classifier) SelectBestModelFromList(candidateModels []string, categoryName string) string {
if len(candidateModels) == 0 {
return c.Config.DefaultModel
}
- cat := c.findCategory(categoryName)
- if cat == nil {
- // Return first candidate if category not found
+ decision := c.findDecision(categoryName)
+ if decision == nil {
+ // Return first candidate if decision not found
return candidateModels[0]
}
- bestModel, bestScore := c.selectBestModelInternal(cat,
+ bestModel, bestScore := c.selectBestModelInternalForDecision(decision,
func(model string) bool {
return slices.Contains(candidateModels, model)
})
if bestModel == "" {
- logging.Warnf("No suitable model found from candidates for category %s, using first candidate", categoryName)
+ logging.Warnf("No suitable model found from candidates for decision %s, using first candidate", categoryName)
return candidateModels[0]
}
- logging.Infof("Selected best model %s for category %s with score %.4f", bestModel, categoryName, bestScore)
+ logging.Infof("Selected best model %s for decision %s with score %.4f", bestModel, categoryName, bestScore)
return bestModel
}
-// GetModelsForCategory returns all models that are configured for the given category
-// If a ModelScore has a LoRAName specified, the LoRA name is returned instead of the base model name
+// GetModelsForCategory returns all models that are configured for the given decision
+// If a ModelRef has a LoRAName specified, the LoRA name is returned instead of the base model name
func (c *Classifier) GetModelsForCategory(categoryName string) []string {
var models []string
- for _, category := range c.Config.Categories {
- if strings.EqualFold(category.Name, categoryName) {
- for _, modelScore := range category.ModelScores {
+ for _, decision := range c.Config.Decisions {
+ if strings.EqualFold(decision.Name, categoryName) {
+ for _, modelRef := range decision.ModelRefs {
// Use LoRA name if specified, otherwise use the base model name
- if modelScore.LoRAName != "" {
- models = append(models, modelScore.LoRAName)
+ if modelRef.LoRAName != "" {
+ models = append(models, modelRef.LoRAName)
} else {
- models = append(models, modelScore.Model)
+ models = append(models, modelRef.Model)
}
}
break
diff --git a/src/semantic-router/pkg/classification/classifier_test.go b/src/semantic-router/pkg/classification/classifier_test.go
index 33a699946..42f0f3320 100644
--- a/src/semantic-router/pkg/classification/classifier_test.go
+++ b/src/semantic-router/pkg/classification/classifier_test.go
@@ -14,7 +14,6 @@ import (
"github.com/mark3labs/mcp-go/mcp"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
- "github.com/samber/lo"
candle_binding "github.com/vllm-project/semantic-router/candle-binding"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
@@ -49,438 +48,6 @@ func (m *MockCategoryInitializer) Init(_ string, useCPU bool, numClasses ...int)
return m.InitError
}
-var _ = Describe("category classification and model selection", func() {
- var (
- classifier *Classifier
- mockCategoryInitializer *MockCategoryInitializer
- mockCategoryModel *MockCategoryInference
- )
-
- BeforeEach(func() {
- mockCategoryInitializer = &MockCategoryInitializer{InitError: nil}
- mockCategoryModel = &MockCategoryInference{}
- cfg := &config.RouterConfig{}
- cfg.CategoryModel.ModelID = "model-id"
- cfg.CategoryMappingPath = "category-mapping-path"
- cfg.CategoryModel.Threshold = 0.5
- classifier, _ = newClassifierWithOptions(cfg,
- withCategory(&CategoryMapping{
- CategoryToIdx: map[string]int{"technology": 0, "sports": 1, "politics": 2},
- IdxToCategory: map[string]string{"0": "technology", "1": "sports", "2": "politics"},
- }, mockCategoryInitializer, mockCategoryModel),
- )
- })
-
- Describe("initialize category classifier", func() {
- It("should succeed", func() {
- err := classifier.initializeCategoryClassifier()
- Expect(err).ToNot(HaveOccurred())
- })
-
- Context("when category mapping is not initialized", func() {
- It("should return error", func() {
- classifier.CategoryMapping = nil
- err := classifier.initializeCategoryClassifier()
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("category classification is not properly configured"))
- })
- })
-
- Context("when not enough categories", func() {
- It("should return error", func() {
- classifier.CategoryMapping = &CategoryMapping{
- CategoryToIdx: map[string]int{"technology": 0},
- IdxToCategory: map[string]string{"0": "technology"},
- }
- err := classifier.initializeCategoryClassifier()
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("not enough categories for classification"))
- })
- })
-
- Context("when initialize category classifier fails", func() {
- It("should return error", func() {
- mockCategoryInitializer.InitError = errors.New("initialize category classifier failed")
- err := classifier.initializeCategoryClassifier()
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("initialize category classifier failed"))
- })
- })
- })
-
- Describe("classify category with entropy", func() {
- type row struct {
- ModelID string
- CategoryMappingPath string
- CategoryMapping *CategoryMapping
- }
-
- DescribeTable("when category classification is not properly configured",
- func(r row) {
- classifier.Config.CategoryModel.ModelID = r.ModelID
- classifier.Config.CategoryMappingPath = r.CategoryMappingPath
- classifier.CategoryMapping = r.CategoryMapping
- _, _, _, err := classifier.ClassifyCategoryWithEntropy("Some text")
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("category classification is not properly configured"))
- },
- Entry("ModelID is empty", row{ModelID: ""}),
- Entry("CategoryMappingPath is empty", row{CategoryMappingPath: ""}),
- Entry("CategoryMapping is nil", row{CategoryMapping: nil}),
- )
-
- Context("when classification succeeds with high confidence", func() {
- It("should return the correct category", func() {
- mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{
- Class: 2,
- Confidence: 0.95,
- Probabilities: []float32{0.02, 0.03, 0.95},
- NumClasses: 3,
- }
-
- category, score, _, err := classifier.ClassifyCategoryWithEntropy("This is about politics")
-
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal("politics"))
- Expect(score).To(BeNumerically("~", 0.95, 0.001))
- })
- })
-
- Context("when classification confidence is below threshold", func() {
- It("should return empty category", func() {
- mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{
- Class: 0,
- Confidence: 0.3,
- Probabilities: []float32{0.3, 0.35, 0.35},
- NumClasses: 3,
- }
-
- category, score, _, err := classifier.ClassifyCategoryWithEntropy("Ambiguous text")
-
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal(""))
- Expect(score).To(BeNumerically("~", 0.3, 0.001))
- })
- })
-
- Context("when model inference fails", func() {
- It("should return empty category with zero score", func() {
- mockCategoryModel.classifyWithProbsError = errors.New("model inference failed")
-
- category, score, _, err := classifier.ClassifyCategoryWithEntropy("Some text")
-
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("classification error"))
- Expect(category).To(Equal(""))
- Expect(score).To(BeNumerically("~", 0.0, 0.001))
- })
- })
-
- Context("when input is empty or invalid", func() {
- It("should handle empty text gracefully", func() {
- mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{
- Class: 0,
- Confidence: 0.8,
- Probabilities: []float32{0.8, 0.1, 0.1},
- NumClasses: 3,
- }
-
- category, score, _, err := classifier.ClassifyCategoryWithEntropy("")
-
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal("technology"))
- Expect(score).To(BeNumerically("~", 0.8, 0.001))
- })
- })
-
- Context("when class index is not found in category mapping", func() {
- It("should handle invalid category mapping gracefully", func() {
- mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{
- Class: 9,
- Confidence: 0.8,
- Probabilities: []float32{0.1, 0.1, 0.0},
- NumClasses: 3,
- }
-
- category, score, _, err := classifier.ClassifyCategoryWithEntropy("Some text")
-
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal(""))
- Expect(score).To(BeNumerically("~", 0.8, 0.001))
- })
- })
- })
-
- Describe("category classification with entropy", func() {
- Context("when category mapping is not initialized", func() {
- It("should return error", func() {
- classifier.CategoryMapping = nil
- _, _, _, err := classifier.ClassifyCategoryWithEntropy("Some text")
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("category classification is not properly configured"))
- })
- })
-
- Context("when classification succeeds with probabilities", func() {
- It("should return category and entropy decision", func() {
- mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{
- Class: 2,
- Confidence: 0.95,
- Probabilities: []float32{0.02, 0.03, 0.95},
- NumClasses: 3,
- }
-
- // Add UseReasoning configuration for the categories
- classifier.Config.Categories = []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{Name: "technology"},
- ModelScores: []config.ModelScore{{
- Model: "phi4",
- Score: 0.8,
- ModelReasoningControl: config.ModelReasoningControl{UseReasoning: lo.ToPtr(false)},
- }},
- },
- {
- CategoryMetadata: config.CategoryMetadata{Name: "sports"},
- ModelScores: []config.ModelScore{{
- Model: "phi4",
- Score: 0.8,
- ModelReasoningControl: config.ModelReasoningControl{UseReasoning: lo.ToPtr(false)},
- }},
- },
- {
- CategoryMetadata: config.CategoryMetadata{Name: "politics"},
- ModelScores: []config.ModelScore{{
- Model: "deepseek-v31",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{UseReasoning: lo.ToPtr(true)},
- }},
- },
- }
-
- category, confidence, reasoningDecision, err := classifier.ClassifyCategoryWithEntropy("This is about politics")
-
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal("politics"))
- Expect(confidence).To(BeNumerically("~", 0.95, 0.001))
- Expect(reasoningDecision.UseReasoning).To(BeTrue()) // Politics uses reasoning
- Expect(len(reasoningDecision.TopCategories)).To(BeNumerically(">", 0))
- })
- })
-
- Context("when classification confidence is below threshold", func() {
- It("should return empty category but still provide entropy decision", func() {
- mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{
- Class: 0,
- Confidence: 0.3,
- Probabilities: []float32{0.3, 0.35, 0.35},
- NumClasses: 3,
- }
-
- classifier.Config.Categories = []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{Name: "technology"},
- ModelScores: []config.ModelScore{{
- Model: "phi4",
- Score: 0.8,
- ModelReasoningControl: config.ModelReasoningControl{UseReasoning: lo.ToPtr(false)},
- }},
- },
- {
- CategoryMetadata: config.CategoryMetadata{Name: "sports"},
- ModelScores: []config.ModelScore{{
- Model: "deepseek-v31",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{UseReasoning: lo.ToPtr(true)},
- }},
- },
- {
- CategoryMetadata: config.CategoryMetadata{Name: "politics"},
- ModelScores: []config.ModelScore{{
- Model: "deepseek-v31",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{UseReasoning: lo.ToPtr(true)},
- }},
- },
- }
-
- category, confidence, reasoningDecision, err := classifier.ClassifyCategoryWithEntropy("Ambiguous text")
-
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal(""))
- Expect(confidence).To(BeNumerically("~", 0.3, 0.001))
- Expect(len(reasoningDecision.TopCategories)).To(BeNumerically(">", 0))
- })
- })
-
- Context("when model inference fails", func() {
- It("should return error", func() {
- mockCategoryModel.classifyWithProbsError = errors.New("model inference failed")
-
- category, confidence, reasoningDecision, err := classifier.ClassifyCategoryWithEntropy("Some text")
-
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("classification error"))
- Expect(category).To(Equal(""))
- Expect(confidence).To(BeNumerically("~", 0.0, 0.001))
- Expect(reasoningDecision.UseReasoning).To(BeFalse())
- })
- })
- })
-
- BeforeEach(func() {
- classifier.Config.Categories = []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{Name: "technology"},
- ModelScores: []config.ModelScore{
- {Model: "model-a", Score: 0.9},
- {Model: "model-b", Score: 0.8},
- },
- },
- {
- CategoryMetadata: config.CategoryMetadata{Name: "sports"},
- ModelScores: []config.ModelScore{},
- },
- }
- classifier.Config.DefaultModel = "default-model"
- })
-
- Describe("select best model for category", func() {
- It("should return the best model", func() {
- model := classifier.SelectBestModelForCategory("technology")
- Expect(model).To(Equal("model-a"))
- })
-
- Context("when category is not found", func() {
- It("should return the default model", func() {
- model := classifier.SelectBestModelForCategory("non-existent-category")
- Expect(model).To(Equal("default-model"))
- })
- })
-
- Context("when no best model is found", func() {
- It("should return the default model", func() {
- model := classifier.SelectBestModelForCategory("sports")
- Expect(model).To(Equal("default-model"))
- })
- })
- })
-
- Describe("select best model from list", func() {
- It("should return the best model", func() {
- model := classifier.SelectBestModelFromList([]string{"model-a"}, "technology")
- Expect(model).To(Equal("model-a"))
- })
-
- Context("when candidate models are empty", func() {
- It("should return the default model", func() {
- model := classifier.SelectBestModelFromList([]string{}, "technology")
- Expect(model).To(Equal("default-model"))
- })
- })
-
- Context("when category is not found", func() {
- It("should return the first candidate model", func() {
- model := classifier.SelectBestModelFromList([]string{"model-a"}, "non-existent-category")
- Expect(model).To(Equal("model-a"))
- })
- })
-
- Context("when the model is not in the candidate models", func() {
- It("should return the first candidate model", func() {
- model := classifier.SelectBestModelFromList([]string{"model-c"}, "technology")
- Expect(model).To(Equal("model-c"))
- })
- })
- })
-
- Describe("internal helper methods", func() {
- type row struct {
- query string
- want *config.Category
- }
-
- DescribeTable("find category",
- func(r row) {
- cat := classifier.findCategory(r.query)
- if r.want == nil {
- Expect(cat).To(BeNil())
- } else {
- Expect(cat.CategoryMetadata.Name).To(Equal(r.want.Name))
- }
- },
- Entry("should find category case-insensitively", row{query: "TECHNOLOGY", want: &config.Category{CategoryMetadata: config.CategoryMetadata{Name: "technology"}}}),
- Entry("should return nil for non-existent category", row{query: "non-existent", want: nil}),
- )
-
- Describe("select best model internal", func() {
- It("should select best model without filter", func() {
- cat := &config.Category{
- CategoryMetadata: config.CategoryMetadata{Name: "test"},
- ModelScores: []config.ModelScore{
- {Model: "model-a", Score: 0.9},
- {Model: "model-b", Score: 0.8},
- },
- }
-
- bestModel, score := classifier.selectBestModelInternal(cat, nil)
-
- Expect(bestModel).To(Equal("model-a"))
- Expect(score).To(BeNumerically("~", 0.9, 0.001))
- })
-
- It("should select best model with filter", func() {
- cat := &config.Category{
- CategoryMetadata: config.CategoryMetadata{Name: "test"},
- ModelScores: []config.ModelScore{
- {Model: "model-a", Score: 0.9},
- {Model: "model-b", Score: 0.8},
- {Model: "model-c", Score: 0.7},
- },
- }
- filter := func(model string) bool {
- return model == "model-b" || model == "model-c"
- }
-
- bestModel, score := classifier.selectBestModelInternal(cat, filter)
-
- Expect(bestModel).To(Equal("model-b"))
- Expect(score).To(BeNumerically("~", 0.8, 0.001))
- })
-
- It("should return empty when no models match filter", func() {
- cat := &config.Category{
- CategoryMetadata: config.CategoryMetadata{Name: "test"},
- ModelScores: []config.ModelScore{
- {Model: "model-a", Score: 0.9},
- {Model: "model-b", Score: 0.8},
- },
- }
- filter := func(model string) bool {
- return model == "non-existent-model"
- }
-
- bestModel, score := classifier.selectBestModelInternal(cat, filter)
-
- Expect(bestModel).To(Equal(""))
- Expect(score).To(BeNumerically("~", -1.0, 0.001))
- })
-
- It("should return empty when category has no models", func() {
- cat := &config.Category{
- CategoryMetadata: config.CategoryMetadata{Name: "test"},
- ModelScores: []config.ModelScore{},
- }
-
- bestModel, score := classifier.selectBestModelInternal(cat, nil)
-
- Expect(bestModel).To(Equal(""))
- Expect(score).To(BeNumerically("~", -1.0, 0.001))
- })
- })
- })
-})
-
type MockJailbreakInferenceResponse struct {
classifyResult candle_binding.ClassResult
classifyError error
@@ -1015,49 +582,6 @@ var _ = Describe("PII detection", func() {
})
})
-var _ = Describe("get models for category", func() {
- var c *Classifier
-
- BeforeEach(func() {
- c, _ = newClassifierWithOptions(&config.RouterConfig{
- IntelligentRouting: config.IntelligentRouting{
- Categories: []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{Name: "Toxicity"},
- ModelScores: []config.ModelScore{
- {Model: "m1"}, {Model: "m2"},
- },
- },
- {
- CategoryMetadata: config.CategoryMetadata{Name: "Toxicity"}, // duplicate name, should be ignored by "first wins"
- ModelScores: []config.ModelScore{{Model: "mX"}},
- },
- {
- CategoryMetadata: config.CategoryMetadata{Name: "Jailbreak"},
- ModelScores: []config.ModelScore{{Model: "jb1"}},
- },
- },
- },
- })
- })
-
- type row struct {
- query string
- want []string
- }
-
- DescribeTable("lookup behavior",
- func(r row) {
- got := c.GetModelsForCategory(r.query)
- Expect(got).To(Equal(r.want))
- },
-
- Entry("case-insensitive match", row{query: "toxicity", want: []string{"m1", "m2"}}),
- Entry("no match returns nil slice", row{query: "NotExist", want: nil}),
- Entry("another category", row{query: "JAILBREAK", want: []string{"jb1"}}),
- )
-})
-
func TestUpdateBestModel(t *testing.T) {
classifier := &Classifier{}
@@ -1075,40 +599,14 @@ func TestUpdateBestModel(t *testing.T) {
}
}
-func TestForEachModelScore(t *testing.T) {
- c := &Classifier{}
- cat := &config.Category{
- ModelScores: []config.ModelScore{
- {Model: "model-a", Score: 0.9},
- {Model: "model-b", Score: 0.8},
- {Model: "model-c", Score: 0.7},
- },
- }
-
- var models []string
- c.forEachModelScore(cat, func(ms config.ModelScore) {
- models = append(models, ms.Model)
- })
-
- expected := []string{"model-a", "model-b", "model-c"}
- if len(models) != len(expected) {
- t.Fatalf("expected %d models, got %d", len(expected), len(models))
- }
- for i, m := range expected {
- if models[i] != m {
- t.Errorf("expected model %s at index %d, got %s", m, i, models[i])
- }
- }
-}
-
// --- Current Regex Implementation ---
// This uses the currently modified keyword_classifier.go with regex matching.
func BenchmarkKeywordClassifierRegex(b *testing.B) {
rulesConfig := []config.KeywordRule{
- {Category: "cat-and", Operator: "AND", Keywords: []string{"apple", "banana"}, CaseSensitive: false},
- {Category: "cat-or", Operator: "OR", Keywords: []string{"orange", "grape"}, CaseSensitive: true},
- {Category: "cat-nor", Operator: "NOR", Keywords: []string{"disallowed"}, CaseSensitive: false},
+ {Name: "cat-and", Operator: "AND", Keywords: []string{"apple", "banana"}, CaseSensitive: false},
+ {Name: "cat-or", Operator: "OR", Keywords: []string{"orange", "grape"}, CaseSensitive: true},
+ {Name: "cat-nor", Operator: "NOR", Keywords: []string{"disallowed"}, CaseSensitive: false},
}
testTextAndMatch := "I like apple and banana"
@@ -1148,7 +646,7 @@ func BenchmarkKeywordClassifierRegex(b *testing.B) {
// Scenario: Keywords with varying lengths
rulesConfigLongKeywords := []config.KeywordRule{
- {Category: "long-kw", Operator: "OR", Keywords: []string{"supercalifragilisticexpialidocious", "pneumonoultramicroscopicsilicovolcanoconiosis"}, CaseSensitive: false},
+ {Name: "long-kw", Operator: "OR", Keywords: []string{"supercalifragilisticexpialidocious", "pneumonoultramicroscopicsilicovolcanoconiosis"}, CaseSensitive: false},
}
classifierLongKeywords, err := NewKeywordClassifier(rulesConfigLongKeywords)
if err != nil {
@@ -1163,7 +661,7 @@ func BenchmarkKeywordClassifierRegex(b *testing.B) {
// Scenario: Texts with varying lengths
rulesConfigShortText := []config.KeywordRule{
- {Category: "short-text", Operator: "OR", Keywords: []string{"short"}, CaseSensitive: false},
+ {Name: "short-text", Operator: "OR", Keywords: []string{"short"}, CaseSensitive: false},
}
classifierShortText, err := NewKeywordClassifier(rulesConfigShortText)
if err != nil {
@@ -1177,7 +675,7 @@ func BenchmarkKeywordClassifierRegex(b *testing.B) {
})
rulesConfigLongText := []config.KeywordRule{
- {Category: "long-text", Operator: "OR", Keywords: []string{"endword"}, CaseSensitive: false},
+ {Name: "long-text", Operator: "OR", Keywords: []string{"endword"}, CaseSensitive: false},
}
classifierLongText, err := NewKeywordClassifier(rulesConfigLongText)
if err != nil {
@@ -1197,7 +695,7 @@ func BenchmarkKeywordClassifierRegex(b *testing.B) {
manyKeywords[i] = fmt.Sprintf("keyword%d", i)
}
rulesConfigManyKeywords := []config.KeywordRule{
- {Category: "many-kw", Operator: "OR", Keywords: manyKeywords, CaseSensitive: false},
+ {Name: "many-kw", Operator: "OR", Keywords: manyKeywords, CaseSensitive: false},
}
classifierManyKeywords, err := NewKeywordClassifier(rulesConfigManyKeywords)
if err != nil {
@@ -1212,7 +710,7 @@ func BenchmarkKeywordClassifierRegex(b *testing.B) {
// Scenario: Keywords with many escaped characters
rulesConfigComplexKeywords := []config.KeywordRule{
- {Category: "complex-kw", Operator: "OR", Keywords: []string{"user.name@domain.com", "C:\\Program Files\\"}, CaseSensitive: false},
+ {Name: "complex-kw", Operator: "OR", Keywords: []string{"user.name@domain.com", "C:\\Program Files\\"}, CaseSensitive: false},
}
classifierComplexKeywords, err := NewKeywordClassifier(rulesConfigComplexKeywords)
if err != nil {
@@ -1226,149 +724,6 @@ func BenchmarkKeywordClassifierRegex(b *testing.B) {
})
}
-var _ = Describe("generic category mapping (MMLU-Pro -> generic)", func() {
- var (
- classifier *Classifier
- mockCategoryInitializer *MockCategoryInitializer
- mockCategoryModel *MockCategoryInference
- )
-
- BeforeEach(func() {
- mockCategoryInitializer = &MockCategoryInitializer{InitError: nil}
- mockCategoryModel = &MockCategoryInference{}
-
- cfg := &config.RouterConfig{}
- cfg.CategoryModel.ModelID = "model-id"
- cfg.CategoryMappingPath = "category-mapping-path"
- cfg.CategoryModel.Threshold = 0.5
-
- // Define generic categories with MMLU-Pro mappings
- cfg.Categories = []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "tech",
- MMLUCategories: []string{"computer science", "engineering"},
- },
- ModelScores: []config.ModelScore{{
- Model: "phi4",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- ReasoningEffort: "low",
- },
- }},
- },
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "finance",
- MMLUCategories: []string{"economics"},
- },
- ModelScores: []config.ModelScore{{
- Model: "gemma3:27b",
- Score: 0.8,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(true),
- },
- }},
- },
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "politics",
- // No explicit mmlu_categories -> identity fallback when label exists in mapping
- },
- ModelScores: []config.ModelScore{{
- Model: "gemma3:27b",
- Score: 0.6,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- }},
- },
- }
-
- // Category mapping represents labels coming from the MMLU-Pro model
- categoryMapping := &CategoryMapping{
- CategoryToIdx: map[string]int{
- "computer science": 0,
- "economics": 1,
- "politics": 2,
- },
- IdxToCategory: map[string]string{
- "0": "Computer Science", // different case to assert case-insensitive mapping
- "1": "economics",
- "2": "politics",
- },
- }
-
- var err error
- classifier, err = newClassifierWithOptions(
- cfg,
- withCategory(categoryMapping, mockCategoryInitializer, mockCategoryModel),
- )
- Expect(err).ToNot(HaveOccurred())
- })
-
- It("builds expected MMLU<->generic maps", func() {
- Expect(classifier.MMLUToGeneric).To(HaveKeyWithValue("computer science", "tech"))
- Expect(classifier.MMLUToGeneric).To(HaveKeyWithValue("engineering", "tech"))
- Expect(classifier.MMLUToGeneric).To(HaveKeyWithValue("economics", "finance"))
- // identity fallback for a generic name that exists as an MMLU label
- Expect(classifier.MMLUToGeneric).To(HaveKeyWithValue("politics", "politics"))
-
- Expect(classifier.GenericToMMLU).To(HaveKey("tech"))
- Expect(classifier.GenericToMMLU["tech"]).To(ConsistOf("computer science", "engineering"))
- Expect(classifier.GenericToMMLU).To(HaveKeyWithValue("finance", ConsistOf("economics")))
- Expect(classifier.GenericToMMLU).To(HaveKeyWithValue("politics", ConsistOf("politics")))
- })
-
- It("translates ClassifyCategoryWithEntropy result to generic category", func() {
- // Model returns class index 0 -> "Computer Science" (MMLU) which maps to generic "tech"
- mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{
- Class: 0,
- Confidence: 0.92,
- Probabilities: []float32{0.92, 0.05, 0.03},
- NumClasses: 3,
- }
-
- category, score, _, err := classifier.ClassifyCategoryWithEntropy("This text is about GPUs and compilers")
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal("tech"))
- Expect(score).To(BeNumerically("~", 0.92, 0.001))
- })
-
- It("translates names in entropy flow and returns generic top category", func() {
- // Probabilities favor index 0 -> generic should be "tech"
- mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{
- Class: 0,
- Confidence: 0.88,
- Probabilities: []float32{0.7, 0.2, 0.1},
- NumClasses: 3,
- }
-
- category, confidence, decision, err := classifier.ClassifyCategoryWithEntropy("Economic policies in computer science education")
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal("tech"))
- Expect(confidence).To(BeNumerically("~", 0.88, 0.001))
- Expect(decision.TopCategories).ToNot(BeEmpty())
- Expect(decision.TopCategories[0].Category).To(Equal("tech"))
- })
-
- It("falls back to identity when no mapping exists for an MMLU label", func() {
- // index 2 -> "politics" (no explicit mapping provided, but present in MMLU set)
- mockCategoryModel.classifyWithProbsResult = candle_binding.ClassResultWithProbs{
- Class: 2,
- Confidence: 0.91,
- Probabilities: []float32{0.04, 0.05, 0.91},
- NumClasses: 3,
- }
-
- category, score, _, err := classifier.ClassifyCategoryWithEntropy("This is a political debate")
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal("politics"))
- Expect(score).To(BeNumerically("~", 0.91, 0.001))
- })
-})
-
func TestKeywordClassifier(t *testing.T) {
tests := []struct {
name string
@@ -1383,12 +738,12 @@ func TestKeywordClassifier(t *testing.T) {
expected: "test-category-1",
rules: []config.KeywordRule{
{
- Category: "test-category-1",
+ Name: "test-category-1",
Operator: "AND",
Keywords: []string{"keyword1", "keyword2"},
},
{
- Category: "test-category-3",
+ Name: "test-category-3",
Operator: "NOR",
Keywords: []string{"keyword5", "keyword6"},
},
@@ -1400,12 +755,12 @@ func TestKeywordClassifier(t *testing.T) {
expected: "test-category-3", // Falls through to NOR
rules: []config.KeywordRule{
{
- Category: "test-category-1",
+ Name: "test-category-1",
Operator: "AND",
Keywords: []string{"keyword1", "keyword2"},
},
{
- Category: "test-category-3",
+ Name: "test-category-3",
Operator: "NOR",
Keywords: []string{"keyword5", "keyword6"},
},
@@ -1417,13 +772,13 @@ func TestKeywordClassifier(t *testing.T) {
expected: "test-category-2",
rules: []config.KeywordRule{
{
- Category: "test-category-2",
+ Name: "test-category-2",
Operator: "OR",
Keywords: []string{"keyword3", "keyword4"},
CaseSensitive: true,
},
{
- Category: "test-category-3",
+ Name: "test-category-3",
Operator: "NOR",
Keywords: []string{"keyword5", "keyword6"},
},
@@ -1435,13 +790,13 @@ func TestKeywordClassifier(t *testing.T) {
expected: "test-category-3", // Falls through to NOR
rules: []config.KeywordRule{
{
- Category: "test-category-2",
+ Name: "test-category-2",
Operator: "OR",
Keywords: []string{"keyword3", "keyword4"},
CaseSensitive: true,
},
{
- Category: "test-category-3",
+ Name: "test-category-3",
Operator: "NOR",
Keywords: []string{"keyword5", "keyword6"},
},
@@ -1453,7 +808,7 @@ func TestKeywordClassifier(t *testing.T) {
expected: "test-category-3",
rules: []config.KeywordRule{
{
- Category: "test-category-3",
+ Name: "test-category-3",
Operator: "NOR",
Keywords: []string{"keyword5", "keyword6"},
},
@@ -1465,7 +820,7 @@ func TestKeywordClassifier(t *testing.T) {
expected: "", // Fails NOR, and no other rules match
rules: []config.KeywordRule{
{
- Category: "test-category-3",
+ Name: "test-category-3",
Operator: "NOR",
Keywords: []string{"keyword5", "keyword6"},
},
@@ -1477,13 +832,13 @@ func TestKeywordClassifier(t *testing.T) {
expected: "test-category-3", // Fails case-sensitive OR, falls through to NOR
rules: []config.KeywordRule{
{
- Category: "test-category-2",
+ Name: "test-category-2",
Operator: "OR",
Keywords: []string{"keyword3", "keyword4"},
CaseSensitive: true,
},
{
- Category: "test-category-3",
+ Name: "test-category-3",
Operator: "NOR",
Keywords: []string{"keyword5", "keyword6"},
},
@@ -1495,13 +850,13 @@ func TestKeywordClassifier(t *testing.T) {
expected: "test-category-3", // "secret" rule (test-category-secret) won't match, falls through to NOR
rules: []config.KeywordRule{
{
- Category: "test-category-secret",
+ Name: "test-category-secret",
Operator: "OR",
Keywords: []string{"secret"},
CaseSensitive: false,
},
{
- Category: "test-category-3",
+ Name: "test-category-3",
Operator: "NOR",
Keywords: []string{"keyword5", "keyword6"},
},
@@ -1513,13 +868,13 @@ func TestKeywordClassifier(t *testing.T) {
expected: "test-category-secret", // Should match new "secret" rule
rules: []config.KeywordRule{
{
- Category: "test-category-secret",
+ Name: "test-category-secret",
Operator: "OR",
Keywords: []string{"secret"},
CaseSensitive: false,
},
{
- Category: "test-category-3",
+ Name: "test-category-3",
Operator: "NOR",
Keywords: []string{"keyword5", "keyword6"},
},
@@ -1531,13 +886,13 @@ func TestKeywordClassifier(t *testing.T) {
expected: "test-category-dot", // Should match new "1.0" rule
rules: []config.KeywordRule{
{
- Category: "test-category-dot",
+ Name: "test-category-dot",
Operator: "OR",
Keywords: []string{"1.0"},
CaseSensitive: false,
},
{
- Category: "test-category-3",
+ Name: "test-category-3",
Operator: "NOR",
Keywords: []string{"keyword5", "keyword6"},
},
@@ -1549,13 +904,13 @@ func TestKeywordClassifier(t *testing.T) {
expected: "test-category-asterisk", // Should match new "*" rule
rules: []config.KeywordRule{
{
- Category: "test-category-asterisk",
+ Name: "test-category-asterisk",
Operator: "OR",
Keywords: []string{"*"},
CaseSensitive: false,
},
{
- Category: "test-category-3",
+ Name: "test-category-3",
Operator: "NOR",
Keywords: []string{"keyword5", "keyword6"},
},
@@ -1565,7 +920,7 @@ func TestKeywordClassifier(t *testing.T) {
name: "Unsupported operator should return error",
rules: []config.KeywordRule{
{
- Category: "bad-operator",
+ Name: "bad-operator",
Operator: "UNKNOWN", // Invalid operator
Keywords: []string{"test"},
},
@@ -2308,233 +1663,6 @@ var _ = Describe("MCP Category Classifier", func() {
})
})
-var _ = Describe("Classifier MCP Methods", func() {
- var (
- classifier *Classifier
- mockClient *MockMCPClient
- )
-
- BeforeEach(func() {
- mockClient = &MockMCPClient{}
- cfg := &config.RouterConfig{}
- cfg.MCPCategoryModel.Enabled = true
- cfg.MCPCategoryModel.ToolName = "classify_text"
- cfg.MCPCategoryModel.Threshold = 0.5
- cfg.MCPCategoryModel.TimeoutSeconds = 30
-
- // Add Categories configuration for entropy-based tests
- cfg.Categories = []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{Name: "tech"},
- ModelScores: []config.ModelScore{{
- Model: "phi4",
- Score: 0.8,
- ModelReasoningControl: config.ModelReasoningControl{UseReasoning: lo.ToPtr(false)},
- }},
- },
- {
- CategoryMetadata: config.CategoryMetadata{Name: "sports"},
- ModelScores: []config.ModelScore{{
- Model: "phi4",
- Score: 0.8,
- ModelReasoningControl: config.ModelReasoningControl{UseReasoning: lo.ToPtr(false)},
- }},
- },
- {
- CategoryMetadata: config.CategoryMetadata{Name: "politics"},
- ModelScores: []config.ModelScore{{
- Model: "deepseek-v31",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{UseReasoning: lo.ToPtr(true)},
- }},
- },
- }
-
- // Create MCP classifier manually and inject mock client
- mcpClassifier := &MCPCategoryClassifier{
- client: mockClient,
- toolName: "classify_text",
- config: cfg,
- }
-
- classifier = &Classifier{
- Config: cfg,
- mcpCategoryInitializer: mcpClassifier,
- mcpCategoryInference: mcpClassifier,
- CategoryMapping: &CategoryMapping{
- CategoryToIdx: map[string]int{"tech": 0, "sports": 1, "politics": 2},
- IdxToCategory: map[string]string{"0": "tech", "1": "sports", "2": "politics"},
- CategorySystemPrompts: map[string]string{
- "tech": "You are a technology expert. Include practical examples.",
- "sports": "You are a sports expert. Provide game analysis.",
- "politics": "You are a politics expert. Provide balanced perspectives.",
- },
- CategoryDescriptions: map[string]string{
- "tech": "Technology and computing topics",
- "sports": "Sports and athletics",
- "politics": "Political topics and governance",
- },
- },
- }
- })
-
- Describe("IsMCPCategoryEnabled", func() {
- It("should return true when properly configured", func() {
- Expect(classifier.IsMCPCategoryEnabled()).To(BeTrue())
- })
-
- It("should return false when not enabled", func() {
- classifier.Config.MCPCategoryModel.Enabled = false
- Expect(classifier.IsMCPCategoryEnabled()).To(BeFalse())
- })
-
- // Note: tool_name is now optional and will be auto-discovered if not specified.
- // IsMCPCategoryEnabled only checks if MCP is enabled, not specific configuration details.
- // Runtime checks (like initializer != nil or successful connection) are handled
- // separately in the actual initialization and classification methods.
- })
-
- Describe("classifyCategoryMCP", func() {
- Context("when MCP is not enabled", func() {
- It("should return error", func() {
- classifier.Config.MCPCategoryModel.Enabled = false
- _, _, err := classifier.classifyCategoryMCP("test text")
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("not properly configured"))
- })
- })
-
- Context("when classification succeeds with high confidence", func() {
- It("should return category name", func() {
- mockClient.callToolResult = &mcp.CallToolResult{
- IsError: false,
- Content: []mcp.Content{
- mcp.TextContent{
- Type: "text",
- Text: `{"class": 2, "confidence": 0.95, "model": "openai/gpt-oss-20b", "use_reasoning": true}`,
- },
- },
- }
-
- category, confidence, err := classifier.classifyCategoryMCP("test text")
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal("politics"))
- Expect(confidence).To(BeNumerically("~", 0.95, 0.001))
- })
- })
-
- Context("when confidence is below threshold", func() {
- It("should return empty category", func() {
- mockClient.callToolResult = &mcp.CallToolResult{
- IsError: false,
- Content: []mcp.Content{
- mcp.TextContent{
- Type: "text",
- Text: `{"class": 1, "confidence": 0.3, "model": "openai/gpt-oss-20b", "use_reasoning": false}`,
- },
- },
- }
-
- category, confidence, err := classifier.classifyCategoryMCP("test text")
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal(""))
- Expect(confidence).To(BeNumerically("~", 0.3, 0.001))
- })
- })
-
- Context("when class index is not in mapping", func() {
- It("should return generic category name", func() {
- mockClient.callToolResult = &mcp.CallToolResult{
- IsError: false,
- Content: []mcp.Content{
- mcp.TextContent{
- Type: "text",
- Text: `{"class": 99, "confidence": 0.85, "model": "openai/gpt-oss-20b", "use_reasoning": true}`,
- },
- },
- }
-
- category, confidence, err := classifier.classifyCategoryMCP("test text")
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal("category_99"))
- Expect(confidence).To(BeNumerically("~", 0.85, 0.001))
- })
- })
-
- Context("when MCP call fails", func() {
- It("should return error", func() {
- mockClient.callToolError = errors.New("network error")
-
- _, _, err := classifier.classifyCategoryMCP("test text")
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("MCP tool call failed"))
- })
- })
- })
-
- Describe("classifyCategoryWithEntropyMCP", func() {
- Context("when MCP returns probabilities", func() {
- It("should return category with entropy decision", func() {
- mockClient.callToolResult = &mcp.CallToolResult{
- IsError: false,
- Content: []mcp.Content{
- mcp.TextContent{
- Type: "text",
- Text: `{"class": 2, "confidence": 0.95, "probabilities": [0.02, 0.03, 0.95], "model": "openai/gpt-oss-20b", "use_reasoning": true}`,
- },
- },
- }
-
- category, confidence, reasoningDecision, err := classifier.classifyCategoryWithEntropyMCP("test text")
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal("politics"))
- Expect(confidence).To(BeNumerically("~", 0.95, 0.001))
- Expect(len(reasoningDecision.TopCategories)).To(BeNumerically(">", 0))
- })
- })
-
- Context("when confidence is below threshold", func() {
- It("should return empty category but provide entropy decision", func() {
- mockClient.callToolResult = &mcp.CallToolResult{
- IsError: false,
- Content: []mcp.Content{
- mcp.TextContent{
- Type: "text",
- Text: `{"class": 0, "confidence": 0.3, "probabilities": [0.3, 0.35, 0.35], "model": "openai/gpt-oss-20b", "use_reasoning": false}`,
- },
- },
- }
-
- category, confidence, reasoningDecision, err := classifier.classifyCategoryWithEntropyMCP("test text")
- Expect(err).ToNot(HaveOccurred())
- Expect(category).To(Equal(""))
- Expect(confidence).To(BeNumerically("~", 0.3, 0.001))
- Expect(len(reasoningDecision.TopCategories)).To(BeNumerically(">", 0))
- })
- })
- })
-
- Describe("initializeMCPCategoryClassifier", func() {
- Context("when MCP is not enabled", func() {
- It("should return error", func() {
- classifier.Config.MCPCategoryModel.Enabled = false
- err := classifier.initializeMCPCategoryClassifier()
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("not properly configured"))
- })
- })
-
- Context("when initializer is nil", func() {
- It("should return error", func() {
- classifier.mcpCategoryInitializer = nil
- err := classifier.initializeMCPCategoryClassifier()
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("initializer is not set"))
- })
- })
- })
-})
-
var _ = Describe("MCP Helper Functions", func() {
Describe("createMCPCategoryInitializer", func() {
It("should create MCPCategoryClassifier", func() {
@@ -3580,12 +2708,10 @@ var _ = Describe("EmbeddingClassifier", func() {
}
rules := []config.EmbeddingRule{{
- Category: "cat1",
- Keywords: []string{"science", "math"},
+ Name: "cat1",
+ Candidates: []string{"science", "math"},
AggregationMethodConfiged: config.AggregationMethodMean,
SimilarityThreshold: 0.8,
- Model: "auto",
- Dimension: 768,
}}
clf, err := NewEmbeddingClassifier(rules)
@@ -3603,12 +2729,10 @@ var _ = Describe("EmbeddingClassifier", func() {
}
rules := []config.EmbeddingRule{{
- Category: "cat2",
- Keywords: []string{"x", "y"},
+ Name: "cat2",
+ Candidates: []string{"x", "y"},
AggregationMethodConfiged: config.AggregationMethodMax,
SimilarityThreshold: 0.5,
- Model: "auto",
- Dimension: 512,
}}
clf, err := NewEmbeddingClassifier(rules)
@@ -3626,12 +2750,10 @@ var _ = Describe("EmbeddingClassifier", func() {
}
rules := []config.EmbeddingRule{{
- Category: "cat3",
- Keywords: []string{"p", "q"},
+ Name: "cat3",
+ Candidates: []string{"p", "q"},
AggregationMethodConfiged: config.AggregationMethodAny,
SimilarityThreshold: 0.7,
- Model: "auto",
- Dimension: 256,
}}
clf, err := NewEmbeddingClassifier(rules)
@@ -3649,12 +2771,10 @@ var _ = Describe("EmbeddingClassifier", func() {
}
rules := []config.EmbeddingRule{{
- Category: "cat4",
- Keywords: []string{"z"},
+ Name: "cat4",
+ Candidates: []string{"z"},
AggregationMethodConfiged: config.AggregationMethodMean,
SimilarityThreshold: 0.1,
- Model: "auto",
- Dimension: 768,
}}
clf, err := NewEmbeddingClassifier(rules)
diff --git a/src/semantic-router/pkg/classification/embedding_classifier.go b/src/semantic-router/pkg/classification/embedding_classifier.go
index ecf491f2b..363227484 100644
--- a/src/semantic-router/pkg/classification/embedding_classifier.go
+++ b/src/semantic-router/pkg/classification/embedding_classifier.go
@@ -59,25 +59,25 @@ func (c *Classifier) initializeKeywordEmbeddingClassifier() error {
// Classify performs keyword-based embedding similarity classification on the given text.
func (c *EmbeddingClassifier) Classify(text string) (string, float64, error) {
var bestScore float32
- var mostMatchedCategory string
+ var mostMatchedRule string
for _, rule := range c.rules {
matched, aggregatedScore, err := c.matches(text, rule) // Error handled
if err != nil {
return "", 0.0, err // Propagate error
}
if matched {
- if len(rule.Keywords) > 0 {
- logging.Infof("Keyword-based embedding similarity classification matched category %q with keywords: %v, confidence score %s", rule.Category, rule.Keywords, aggregatedScore)
+ if len(rule.Candidates) > 0 {
+ logging.Infof("Keyword-based embedding similarity classification matched rule %q with candidates: %v, confidence score %s", rule.Name, rule.Candidates, aggregatedScore)
} else {
- logging.Infof("Keyword-based embedding similarity classification do not match category %q with keywords: %v, confidence score %s", rule.Category, rule.Keywords, aggregatedScore)
+ logging.Infof("Keyword-based embedding similarity classification do not match rule %q with candidates: %v, confidence score %s", rule.Name, rule.Candidates, aggregatedScore)
}
if aggregatedScore > bestScore {
bestScore = aggregatedScore
- mostMatchedCategory = rule.Category
+ mostMatchedRule = rule.Name
}
}
}
- return mostMatchedCategory, float64(bestScore), nil
+ return mostMatchedRule, float64(bestScore), nil
}
// matches checks if the text matches the given keyword rule.
@@ -86,30 +86,17 @@ func (c *EmbeddingClassifier) matches(text string, rule config.EmbeddingRule) (b
if text == "" {
return false, 0.0, fmt.Errorf("keyword-based embedding similarity classification: query must be provided")
}
- if len(rule.Keywords) == 0 {
- return false, 0.0, fmt.Errorf("keyword-based embedding similarity classification: keywords must be provided")
- }
- // Set defaults
- if rule.Dimension == 0 {
- rule.Dimension = 768 // Default to full dimension
- }
- if rule.Model == "auto" && rule.QualityPriority == 0 && rule.LatencyPriority == 0 {
- rule.QualityPriority = 0.5
- rule.LatencyPriority = 0.5
+ if len(rule.Candidates) == 0 {
+ return false, 0.0, fmt.Errorf("keyword-based embedding similarity classification: candidates must be provided")
}
- // Validate dimension
- validDimensions := map[int]bool{128: true, 256: true, 512: true, 768: true, 1024: true}
- if !validDimensions[rule.Dimension] {
- return false, 0.0, fmt.Errorf("keyword-based embedding similarity classification: dimension must be one of: 128, 256, 512, 768, 1024 (got %d)", rule.Dimension)
- }
- // Calculate batch similarity
+ // Calculate batch similarity using default model (auto) and dimension (768)
result, err := calculateSimilarityBatch(
text,
- rule.Keywords,
- 0, // return scores for all the keywords
- rule.Model,
- rule.Dimension,
+ rule.Candidates,
+ 0, // return scores for all the candidates
+ "auto", // use auto model selection
+ 768, // use default dimension
)
if err != nil {
return false, 0.0, fmt.Errorf("keyword-based embedding similarity classification: failed to calculate batch similarity: %w", err)
diff --git a/src/semantic-router/pkg/classification/keyword_classifier.go b/src/semantic-router/pkg/classification/keyword_classifier.go
index ce84ed37f..a2a47ac50 100644
--- a/src/semantic-router/pkg/classification/keyword_classifier.go
+++ b/src/semantic-router/pkg/classification/keyword_classifier.go
@@ -11,7 +11,7 @@ import (
// preppedKeywordRule stores preprocessed keywords for efficient matching.
type preppedKeywordRule struct {
- Category string
+ Name string // Name is also used as category
Operator string
CaseSensitive bool
OriginalKeywords []string // For logging/returning original case
@@ -33,11 +33,11 @@ func NewKeywordClassifier(cfgRules []config.KeywordRule) (*KeywordClassifier, er
case "AND", "OR", "NOR":
// Valid operator
default:
- return nil, fmt.Errorf("unsupported keyword rule operator: %q for category %q", rule.Operator, rule.Category)
+ return nil, fmt.Errorf("unsupported keyword rule operator: %q for rule %q", rule.Operator, rule.Name)
}
preppedRule := preppedKeywordRule{
- Category: rule.Category,
+ Name: rule.Name,
Operator: rule.Operator,
CaseSensitive: rule.CaseSensitive,
OriginalKeywords: rule.Keywords,
@@ -94,11 +94,11 @@ func (c *KeywordClassifier) Classify(text string) (string, float64, error) {
}
if matched {
if len(keywords) > 0 {
- logging.Infof("Keyword-based classification matched category %q with keywords: %v", rule.Category, keywords)
+ logging.Infof("Keyword-based classification matched rule %q with keywords: %v", rule.Name, keywords)
} else {
- logging.Infof("Keyword-based classification matched category %q with a NOR rule.", rule.Category)
+ logging.Infof("Keyword-based classification matched rule %q with a NOR rule.", rule.Name)
}
- return rule.Category, 1.0, nil
+ return rule.Name, 1.0, nil
}
}
return "", 0.0, nil
@@ -120,7 +120,7 @@ func (c *KeywordClassifier) matches(text string, rule preppedKeywordRule) (bool,
case "AND":
for i, re := range regexpsToUse {
if re == nil {
- return false, nil, fmt.Errorf("nil regular expression found in rule for category %q at index %d. This indicates a failed compilation during initialization", rule.Category, i)
+ return false, nil, fmt.Errorf("nil regular expression found in rule %q at index %d. This indicates a failed compilation during initialization", rule.Name, i)
}
if !re.MatchString(text) {
return false, nil, nil
@@ -131,7 +131,7 @@ func (c *KeywordClassifier) matches(text string, rule preppedKeywordRule) (bool,
case "OR":
for i, re := range regexpsToUse {
if re == nil {
- return false, nil, fmt.Errorf("nil regular expression found in rule for category %q at index %d. This indicates a failed compilation during initialization", rule.Category, i)
+ return false, nil, fmt.Errorf("nil regular expression found in rule for category %q at index %d. This indicates a failed compilation during initialization", rule.Name, i)
}
if re.MatchString(text) {
return true, []string{rule.OriginalKeywords[i]}, nil
@@ -141,7 +141,7 @@ func (c *KeywordClassifier) matches(text string, rule preppedKeywordRule) (bool,
case "NOR":
for i, re := range regexpsToUse {
if re == nil {
- return false, nil, fmt.Errorf("nil regular expression found in rule for category %q at index %d. This indicates a failed compilation during initialization", rule.Category, i)
+ return false, nil, fmt.Errorf("nil regular expression found in rule for category %q at index %d. This indicates a failed compilation during initialization", rule.Name, i)
}
if re.MatchString(text) {
return false, nil, nil
diff --git a/src/semantic-router/pkg/classification/keyword_entropy_test.go b/src/semantic-router/pkg/classification/keyword_entropy_test.go
index 0e875e30c..6934b7082 100644
--- a/src/semantic-router/pkg/classification/keyword_entropy_test.go
+++ b/src/semantic-router/pkg/classification/keyword_entropy_test.go
@@ -1,138 +1 @@
package classification
-
-import (
- "testing"
-
- "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
-)
-
-// TestKeywordClassifierWithEntropyReasoningDecision tests that keyword classifier
-// returns proper reasoning decisions based on category configuration
-func TestKeywordClassifierWithEntropyReasoningDecision(t *testing.T) {
- // Create a test configuration
- keywordRules := []config.KeywordRule{
- {
- Category: "urgent_request",
- Operator: "OR",
- Keywords: []string{"urgent", "immediate", "asap"},
- CaseSensitive: false,
- },
- {
- Category: "thinking",
- Operator: "OR",
- Keywords: []string{"think", "analyze", "reason"},
- CaseSensitive: false,
- },
- }
-
- categories := []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "urgent_request",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "fast-model",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: boolPtr(false), // No reasoning for urgent requests
- },
- },
- },
- },
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "thinking",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "smart-model",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: boolPtr(true), // Enable reasoning for thinking tasks
- },
- },
- },
- },
- }
-
- cfg := &config.RouterConfig{
- IntelligentRouting: config.IntelligentRouting{
- KeywordRules: keywordRules,
- Categories: categories,
- },
- }
-
- // Create classifier with keyword rules
- keywordClassifier, err := NewKeywordClassifier(keywordRules)
- if err != nil {
- t.Fatalf("Failed to create keyword classifier: %v", err)
- }
-
- classifier := &Classifier{
- Config: cfg,
- keywordClassifier: keywordClassifier,
- }
-
- // Test cases
- tests := []struct {
- name string
- text string
- expectedCategory string
- expectedUseReasoning bool
- expectedConfidence float64
- shouldMatch bool
- }{
- {
- name: "Urgent request - no reasoning",
- text: "This is an urgent request",
- expectedCategory: "urgent_request",
- expectedUseReasoning: false,
- expectedConfidence: 1.0,
- shouldMatch: true,
- },
- {
- name: "Thinking task - with reasoning",
- text: "Please think carefully about this problem",
- expectedCategory: "thinking",
- expectedUseReasoning: true,
- expectedConfidence: 1.0,
- shouldMatch: true,
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- category, confidence, reasoningDecision, err := classifier.ClassifyCategoryWithEntropy(tt.text)
- if err != nil {
- t.Fatalf("Classification failed: %v", err)
- }
-
- if category != tt.expectedCategory {
- t.Errorf("Expected category %q, got %q", tt.expectedCategory, category)
- }
- if confidence != tt.expectedConfidence {
- t.Errorf("Expected confidence %f, got %f", tt.expectedConfidence, confidence)
- }
- if reasoningDecision.UseReasoning != tt.expectedUseReasoning {
- t.Errorf("Expected useReasoning %v, got %v", tt.expectedUseReasoning, reasoningDecision.UseReasoning)
- }
- if reasoningDecision.Confidence != 1.0 {
- t.Errorf("Expected reasoning decision confidence 1.0, got %f", reasoningDecision.Confidence)
- }
- if reasoningDecision.DecisionReason != "keyword_match_category_config" {
- t.Errorf("Expected decision reason 'keyword_match_category_config', got %q", reasoningDecision.DecisionReason)
- }
- if len(reasoningDecision.TopCategories) != 1 {
- t.Errorf("Expected 1 top category, got %d", len(reasoningDecision.TopCategories))
- } else if reasoningDecision.TopCategories[0].Category != tt.expectedCategory {
- t.Errorf("Expected top category %q, got %q", tt.expectedCategory, reasoningDecision.TopCategories[0].Category)
- }
- })
- }
-}
-
-// Helper function to create bool pointer
-func boolPtr(b bool) *bool {
- return &b
-}
diff --git a/src/semantic-router/pkg/classification/mcp_classifier.go b/src/semantic-router/pkg/classification/mcp_classifier.go
index 9c2504042..0c98cd3a0 100644
--- a/src/semantic-router/pkg/classification/mcp_classifier.go
+++ b/src/semantic-router/pkg/classification/mcp_classifier.go
@@ -401,123 +401,6 @@ func (c *Classifier) initializeMCPCategoryClassifier() error {
return nil
}
-// classifyCategoryMCP performs category classification using MCP
-func (c *Classifier) classifyCategoryMCP(text string) (string, float64, error) {
- result, err := c.classifyCategoryMCPWithRouting(text)
- if err != nil {
- return "", 0.0, err
- }
- return result.CategoryName, float64(result.Confidence), nil
-}
-
-// classifyCategoryMCPWithRouting performs category classification using MCP and returns routing information
-func (c *Classifier) classifyCategoryMCPWithRouting(text string) (*MCPClassificationResult, error) {
- if !c.IsMCPCategoryEnabled() {
- return nil, fmt.Errorf("MCP category classification is not properly configured")
- }
-
- if c.mcpCategoryInference == nil {
- return nil, fmt.Errorf("MCP category inference is not initialized")
- }
-
- // Create context with timeout
- ctx := context.Background()
- if c.Config.MCPCategoryModel.TimeoutSeconds > 0 {
- var cancel context.CancelFunc
- ctx, cancel = context.WithTimeout(ctx, time.Duration(c.Config.MCPCategoryModel.TimeoutSeconds)*time.Second)
- defer cancel()
- }
-
- // Classify via MCP - need to call the raw client to get model/reasoning info
- start := time.Now()
-
- // Get MCP classifier to access raw response
- mcpClassifier, ok := c.mcpCategoryInference.(*MCPCategoryClassifier)
- if !ok {
- return nil, fmt.Errorf("MCP category inference is not MCPCategoryClassifier type")
- }
-
- // Call MCP tool directly to get full response
- arguments := map[string]interface{}{
- "text": text,
- }
-
- mcpResult, err := mcpClassifier.client.CallTool(ctx, mcpClassifier.toolName, arguments)
- metrics.RecordClassifierLatency("category_mcp", time.Since(start).Seconds())
-
- if err != nil {
- return nil, fmt.Errorf("MCP tool call failed: %w", err)
- }
-
- if mcpResult.IsError {
- return nil, fmt.Errorf("MCP tool returned error: %v", mcpResult.Content)
- }
-
- if len(mcpResult.Content) == 0 {
- return nil, fmt.Errorf("MCP tool returned empty content")
- }
-
- // Extract text content
- var responseText string
- firstContent := mcpResult.Content[0]
- if textContent, ok := mcp.AsTextContent(firstContent); ok {
- responseText = textContent.Text
- } else {
- return nil, fmt.Errorf("MCP tool returned non-text content")
- }
-
- // Parse JSON response with routing information using the API type
- var response api.ClassifyResponse
- if err := json.Unmarshal([]byte(responseText), &response); err != nil {
- return nil, fmt.Errorf("failed to parse MCP response: %w", err)
- }
-
- logging.Infof("MCP classification result: class=%d, confidence=%.4f, model=%s, use_reasoning=%v",
- response.Class, response.Confidence, response.Model, response.UseReasoning)
-
- // Check threshold
- threshold := c.Config.MCPCategoryModel.Threshold
- if threshold == 0 {
- threshold = DefaultMCPThreshold
- }
-
- if response.Confidence < threshold {
- logging.Infof("MCP classification confidence (%.4f) below threshold (%.4f)",
- response.Confidence, threshold)
- return &MCPClassificationResult{
- Class: response.Class,
- Confidence: response.Confidence,
- Model: response.Model,
- UseReasoning: response.UseReasoning,
- }, nil
- }
-
- // Map class index to category name
- var categoryName string
- if c.CategoryMapping != nil {
- name, ok := c.CategoryMapping.GetCategoryFromIndex(response.Class)
- if ok {
- categoryName = c.translateMMLUToGeneric(name)
- } else {
- categoryName = fmt.Sprintf("category_%d", response.Class)
- }
- } else {
- categoryName = fmt.Sprintf("category_%d", response.Class)
- }
-
- metrics.RecordCategoryClassification(categoryName)
- logging.Infof("MCP classified as category: %s (class=%d), routing: model=%s, reasoning=%v",
- categoryName, response.Class, response.Model, response.UseReasoning)
-
- return &MCPClassificationResult{
- Class: response.Class,
- Confidence: response.Confidence,
- CategoryName: categoryName,
- Model: response.Model,
- UseReasoning: response.UseReasoning,
- }, nil
-}
-
// classifyCategoryWithEntropyMCP performs category classification with entropy using MCP
func (c *Classifier) classifyCategoryWithEntropyMCP(text string) (string, float64, entropy.ReasoningDecision, error) {
if !c.IsMCPCategoryEnabled() {
@@ -562,14 +445,14 @@ func (c *Classifier) classifyCategoryWithEntropyMCP(text string) (string, float6
}
}
- // Build category reasoning map from configuration
+ // Build category reasoning map from decisions configuration
categoryReasoningMap := make(map[string]bool)
- for _, category := range c.Config.Categories {
+ for _, decision := range c.Config.Decisions {
useReasoning := false
- if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil {
- useReasoning = *category.ModelScores[0].UseReasoning
+ if len(decision.ModelRefs) > 0 && decision.ModelRefs[0].UseReasoning != nil {
+ useReasoning = *decision.ModelRefs[0].UseReasoning
}
- categoryReasoningMap[strings.ToLower(category.Name)] = useReasoning
+ categoryReasoningMap[strings.ToLower(decision.Name)] = useReasoning
}
// Determine threshold
diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go
index cb1546edd..e26b156ae 100644
--- a/src/semantic-router/pkg/config/config.go
+++ b/src/semantic-router/pkg/config/config.go
@@ -1,7 +1,29 @@
package config
+import (
+ "encoding/json"
+ "fmt"
+
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
+)
+
+// ConfigSource defines where to load dynamic configuration from
+type ConfigSource string
+
+const (
+ // ConfigSourceFile loads configuration from file (default)
+ ConfigSourceFile ConfigSource = "file"
+ // ConfigSourceKubernetes loads configuration from Kubernetes CRDs
+ ConfigSourceKubernetes ConfigSource = "kubernetes"
+)
+
// RouterConfig represents the main configuration for the LLM Router
type RouterConfig struct {
+ // ConfigSource specifies where to load dynamic configuration from (file or kubernetes)
+ // +optional
+ // +kubebuilder:default=file
+ ConfigSource ConfigSource `yaml:"config_source,omitempty"`
+
/*
Static: Global Configuration
Timing: Should be handled when starting the router.
@@ -84,9 +106,17 @@ type IntelligentRouting struct {
// Embedding-based classification rules
EmbeddingRules []EmbeddingRule `yaml:"embedding_rules,omitempty"`
- // Categories for routing queries
+ // Categories for domain classification (only metadata, used by domain rules)
Categories []Category `yaml:"categories"`
+ // Decisions for routing logic (combines rules with AND/OR operators)
+ Decisions []Decision `yaml:"decisions,omitempty"`
+
+ // Strategy for selecting decision when multiple decisions match
+ // "priority" - select decision with highest priority
+ // "confidence" - select decision with highest confidence score
+ Strategy string `yaml:"strategy,omitempty"`
+
// Reasoning mode configuration
ReasoningConfig `yaml:",inline"`
}
@@ -196,7 +226,7 @@ type SemanticCache struct {
// KeywordRule defines a rule for keyword-based classification.
type KeywordRule struct {
- Category string `yaml:"category"`
+ Name string `yaml:"name"` // Name is also used as category
Operator string `yaml:"operator"`
Keywords []string `yaml:"keywords"`
CaseSensitive bool `yaml:"case_sensitive"`
@@ -213,14 +243,10 @@ const (
// EmbeddingRule defines a rule for keyword embedding based similarity match rule.
type EmbeddingRule struct {
- Category string `yaml:"category"`
+ Name string `yaml:"name"` // Name is also used as category
SimilarityThreshold float32 `yaml:"threshold"`
- Keywords []string `yaml:"keywords"`
+ Candidates []string `yaml:"candidates"` // Renamed from Keywords
AggregationMethodConfiged AggregationMethod `yaml:"aggregation_method"`
- Model string `json:"model,omitempty"` // "auto" (default), "qwen3", "gemma"
- Dimension int `json:"dimension,omitempty"` // Target dimension: 768 (default), 512, 256, 128
- QualityPriority float32 `json:"quality_priority,omitempty"` // 0.0-1.0, only for "auto" model
- LatencyPriority float32 `json:"latency_priority,omitempty"` // 0.0-1.0, only for "auto" model
}
// APIConfig represents configuration for API endpoints
@@ -385,9 +411,6 @@ type ModelPricing struct {
}
type ModelParams struct {
- // PII policy configuration for this model
- PIIPolicy PIIPolicy `yaml:"pii_policy,omitempty"`
-
// Preferred endpoints for this model (optional)
PreferredEndpoints []string `yaml:"preferred_endpoints,omitempty"`
@@ -449,28 +472,276 @@ const (
)
// Category represents a category for routing queries
+// Category represents a domain category (only metadata, used by domain rules)
type Category struct {
// Metadata
CategoryMetadata `yaml:",inline"`
- // Domain-aware policies
- DomainAwarePolicies `yaml:",inline"`
- // Model scores for this category
- ModelScores []ModelScore `yaml:"model_scores"`
}
-// ModelScore associates an LLM with its selection weight and reasoning flag within a category.
-type ModelScore struct {
- Model string `yaml:"model"`
- Score float64 `yaml:"score"`
+// Decision represents a routing decision that combines multiple rules with AND/OR logic
+type Decision struct {
+ // Name is the unique identifier for this decision
+ Name string `yaml:"name"`
+
+ // Description provides information about what this decision handles
+ Description string `yaml:"description,omitempty"`
+
+ // Priority is used when strategy is "priority" - higher priority decisions are preferred
+ Priority int `yaml:"priority,omitempty"`
+
+ // Rules defines the combination of keyword/embedding/domain rules using AND/OR logic
+ Rules RuleCombination `yaml:"rules"`
+
+ // ModelRefs contains model references for this decision (currently only supports one model)
+ ModelRefs []ModelRef `yaml:"modelRefs,omitempty"`
+
+ // Plugins contains policy configurations applied after rule matching
+ Plugins []DecisionPlugin `yaml:"plugins,omitempty"`
+}
+
+// ModelRef represents a reference to a model (without score field)
+type ModelRef struct {
+ Model string `yaml:"model"`
// Optional LoRA adapter name - when specified, this LoRA adapter name will be used
// as the final model name in requests instead of the base model name.
- // This enables intent-aware LoRA routing where different LoRA adapters can be
- // selected based on the classified category.
LoRAName string `yaml:"lora_name,omitempty"`
// Reasoning mode control on Model Level
ModelReasoningControl `yaml:",inline"`
}
+// DecisionPlugin represents a plugin configuration for a decision
+type DecisionPlugin struct {
+ // Type specifies the plugin type: "semantic-cache", "jailbreak", "pii", "system_prompt"
+ Type string `yaml:"type" json:"type"`
+
+ // Configuration is the raw configuration for this plugin
+ // The structure depends on the plugin type
+ // When loaded from YAML, this will be a map[string]interface{}
+ // When loaded from Kubernetes CRD, this will be []byte (from runtime.RawExtension)
+ Configuration interface{} `yaml:"configuration,omitempty" json:"configuration,omitempty"`
+}
+
+// Plugin configuration structures for unmarshaling
+
+// SemanticCachePluginConfig represents configuration for semantic-cache plugin
+type SemanticCachePluginConfig struct {
+ Enabled bool `json:"enabled" yaml:"enabled"`
+ SimilarityThreshold *float32 `json:"similarity_threshold,omitempty" yaml:"similarity_threshold,omitempty"`
+}
+
+// JailbreakPluginConfig represents configuration for jailbreak plugin
+type JailbreakPluginConfig struct {
+ Enabled bool `json:"enabled" yaml:"enabled"`
+ Threshold *float32 `json:"threshold,omitempty" yaml:"threshold,omitempty"`
+}
+
+// PIIPluginConfig represents configuration for pii plugin
+type PIIPluginConfig struct {
+ Enabled bool `json:"enabled" yaml:"enabled"`
+ Threshold *float32 `json:"threshold,omitempty" yaml:"threshold,omitempty"`
+
+ // PII Policy configuration
+ // When Enabled is true, all PII types are blocked by default unless listed in PIITypesAllowed
+ // When Enabled is false, PII detection is skipped entirely
+ PIITypesAllowed []string `json:"pii_types_allowed,omitempty" yaml:"pii_types_allowed,omitempty"`
+}
+
+// SystemPromptPluginConfig represents configuration for system_prompt plugin
+type SystemPromptPluginConfig struct {
+ Enabled *bool `json:"enabled,omitempty" yaml:"enabled,omitempty"`
+ SystemPrompt string `json:"system_prompt,omitempty" yaml:"system_prompt,omitempty"`
+ Mode string `json:"mode,omitempty" yaml:"mode,omitempty"` // "replace" or "insert"
+}
+
+// HeaderMutationPluginConfig represents configuration for header_mutation plugin
+type HeaderMutationPluginConfig struct {
+ Add []HeaderPair `json:"add,omitempty" yaml:"add,omitempty"`
+ Update []HeaderPair `json:"update,omitempty" yaml:"update,omitempty"`
+ Delete []string `json:"delete,omitempty" yaml:"delete,omitempty"`
+}
+
+// HeaderPair represents a header name-value pair
+type HeaderPair struct {
+ Name string `json:"name" yaml:"name"`
+ Value string `json:"value" yaml:"value"`
+}
+
+// Helper methods for Decision to access plugin configurations
+
+// GetPluginConfig returns the configuration for a specific plugin type
+// Returns nil if the plugin is not found
+func (d *Decision) GetPluginConfig(pluginType string) interface{} {
+ for _, plugin := range d.Plugins {
+ if plugin.Type == pluginType {
+ return plugin.Configuration
+ }
+ }
+ return nil
+}
+
+// unmarshalPluginConfig unmarshals plugin configuration to a target struct
+// Handles both map[string]interface{} (from YAML) and []byte (from Kubernetes RawExtension)
+func unmarshalPluginConfig(config interface{}, target interface{}) error {
+ if config == nil {
+ return fmt.Errorf("plugin configuration is nil")
+ }
+
+ switch v := config.(type) {
+ case map[string]interface{}:
+ // From YAML file - convert via JSON
+ data, err := json.Marshal(v)
+ if err != nil {
+ return fmt.Errorf("failed to marshal config: %w", err)
+ }
+ return json.Unmarshal(data, target)
+ case map[interface{}]interface{}:
+ // From YAML file with interface{} keys - convert to map[string]interface{} first
+ converted := convertMapToStringKeys(v)
+ data, err := json.Marshal(converted)
+ if err != nil {
+ return fmt.Errorf("failed to marshal config: %w", err)
+ }
+ return json.Unmarshal(data, target)
+ case []byte:
+ // From Kubernetes RawExtension - direct unmarshal
+ return json.Unmarshal(v, target)
+ default:
+ return fmt.Errorf("unsupported configuration type: %T", config)
+ }
+}
+
+// convertMapToStringKeys recursively converts map[interface{}]interface{} to map[string]interface{}
+func convertMapToStringKeys(m map[interface{}]interface{}) map[string]interface{} {
+ result := make(map[string]interface{})
+ for k, v := range m {
+ // Convert key to string
+ key, ok := k.(string)
+ if !ok {
+ key = fmt.Sprintf("%v", k)
+ }
+
+ // Recursively convert nested maps
+ switch val := v.(type) {
+ case map[interface{}]interface{}:
+ result[key] = convertMapToStringKeys(val)
+ case []interface{}:
+ result[key] = convertSliceValues(val)
+ default:
+ result[key] = v
+ }
+ }
+ return result
+}
+
+// convertSliceValues recursively converts slice elements that are maps
+func convertSliceValues(s []interface{}) []interface{} {
+ result := make([]interface{}, len(s))
+ for i, v := range s {
+ switch val := v.(type) {
+ case map[interface{}]interface{}:
+ result[i] = convertMapToStringKeys(val)
+ case []interface{}:
+ result[i] = convertSliceValues(val)
+ default:
+ result[i] = v
+ }
+ }
+ return result
+}
+
+// GetSemanticCacheConfig returns the semantic-cache plugin configuration
+func (d *Decision) GetSemanticCacheConfig() *SemanticCachePluginConfig {
+ config := d.GetPluginConfig("semantic-cache")
+ if config == nil {
+ return nil
+ }
+
+ result := &SemanticCachePluginConfig{}
+ if err := unmarshalPluginConfig(config, result); err != nil {
+ logging.Errorf("Failed to unmarshal semantic-cache config: %v", err)
+ return nil
+ }
+ return result
+}
+
+// GetJailbreakConfig returns the jailbreak plugin configuration
+func (d *Decision) GetJailbreakConfig() *JailbreakPluginConfig {
+ config := d.GetPluginConfig("jailbreak")
+ if config == nil {
+ return nil
+ }
+
+ result := &JailbreakPluginConfig{}
+ if err := unmarshalPluginConfig(config, result); err != nil {
+ logging.Errorf("Failed to unmarshal jailbreak config: %v", err)
+ return nil
+ }
+ return result
+}
+
+// GetPIIConfig returns the pii plugin configuration
+func (d *Decision) GetPIIConfig() *PIIPluginConfig {
+ config := d.GetPluginConfig("pii")
+ if config == nil {
+ return nil
+ }
+
+ result := &PIIPluginConfig{}
+ if err := unmarshalPluginConfig(config, result); err != nil {
+ logging.Errorf("Failed to unmarshal pii config: %v", err)
+ return nil
+ }
+ return result
+}
+
+// GetSystemPromptConfig returns the system_prompt plugin configuration
+func (d *Decision) GetSystemPromptConfig() *SystemPromptPluginConfig {
+ config := d.GetPluginConfig("system_prompt")
+ if config == nil {
+ return nil
+ }
+
+ result := &SystemPromptPluginConfig{}
+ if err := unmarshalPluginConfig(config, result); err != nil {
+ logging.Errorf("Failed to unmarshal system_prompt config: %v", err)
+ return nil
+ }
+ return result
+}
+
+// GetHeaderMutationConfig returns the header_mutation plugin configuration
+func (d *Decision) GetHeaderMutationConfig() *HeaderMutationPluginConfig {
+ config := d.GetPluginConfig("header_mutation")
+ if config == nil {
+ return nil
+ }
+
+ result := &HeaderMutationPluginConfig{}
+ if err := unmarshalPluginConfig(config, result); err != nil {
+ logging.Errorf("Failed to unmarshal header_mutation config: %v", err)
+ return nil
+ }
+ return result
+}
+
+// RuleCombination defines how to combine multiple rule conditions with AND/OR operators
+type RuleCombination struct {
+ // Operator specifies how to combine conditions: "AND" or "OR"
+ Operator string `yaml:"operator"`
+
+ // Conditions is the list of rule references to evaluate
+ Conditions []RuleCondition `yaml:"conditions"`
+}
+
+// RuleCondition references a specific rule by type and name
+type RuleCondition struct {
+ // Type specifies the rule type: "keyword", "embedding", or "domain"
+ Type string `yaml:"type"`
+
+ // Name is the name of the rule to reference
+ Name string `yaml:"name"`
+}
+
// ModelReasoningControl represents reasoning mode control on model level
type ModelReasoningControl struct {
UseReasoning *bool `yaml:"use_reasoning"` // Pointer to detect missing field
diff --git a/src/semantic-router/pkg/config/config_test.go b/src/semantic-router/pkg/config/config_test.go
index 81b2b1c2a..889fe86fc 100644
--- a/src/semantic-router/pkg/config/config_test.go
+++ b/src/semantic-router/pkg/config/config_test.go
@@ -9,7 +9,6 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
- "github.com/samber/lo"
"gopkg.in/yaml.v3"
)
@@ -63,13 +62,19 @@ classifier:
categories:
- name: "general"
description: "General purpose tasks"
- model_scores:
+
+decisions:
+ - name: "general"
+ description: "General purpose decision"
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: general_keywords
+ modelRefs:
- model: "model-a"
- score: 0.9
use_reasoning: true
- - model: "model-b"
- score: 0.8
- use_reasoning: false
default_model: "model-b"
@@ -99,13 +104,8 @@ vllm_endpoints:
model_config:
"model-a":
- pii_policy:
- allow_by_default: false
- pii_types_allowed: ["NO_PII", "ORGANIZATION"]
preferred_endpoints: ["endpoint1"]
"model-b":
- pii_policy:
- allow_by_default: true
preferred_endpoints: ["endpoint1", "endpoint2"]
tools:
@@ -136,7 +136,12 @@ tools:
// Verify categories
Expect(cfg.Categories).To(HaveLen(1))
Expect(cfg.Categories[0].Name).To(Equal("general"))
- Expect(cfg.Categories[0].ModelScores).To(HaveLen(2))
+
+ // Verify decisions
+ Expect(cfg.Decisions).To(HaveLen(1))
+ Expect(cfg.Decisions[0].Name).To(Equal("general"))
+ Expect(cfg.Decisions[0].ModelRefs).To(HaveLen(1))
+ Expect(cfg.Decisions[0].ModelRefs[0].Model).To(Equal("model-a"))
// Verify default model
Expect(cfg.DefaultModel).To(Equal("model-b"))
@@ -158,8 +163,7 @@ tools:
// Verify model config
Expect(cfg.ModelConfig).To(HaveKey("model-a"))
- Expect(cfg.ModelConfig["model-a"].PIIPolicy.AllowByDefault).To(BeFalse())
- Expect(cfg.ModelConfig["model-a"].PIIPolicy.PIITypes).To(ContainElements("NO_PII", "ORGANIZATION"))
+ Expect(cfg.ModelConfig["model-a"].PreferredEndpoints).To(ContainElement("endpoint1"))
// Verify tools config
Expect(cfg.Tools.Enabled).To(BeTrue())
@@ -324,22 +328,29 @@ semantic_cache:
})
})
- Describe("GetModelForCategoryIndex", func() {
+ Describe("GetModelForDecisionIndex", func() {
BeforeEach(func() {
configContent := `
-categories:
- - name: "category1"
- model_scores:
+decisions:
+ - name: "decision1"
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: rule1
+ modelRefs:
- model: "model1"
- score: 0.9
use_reasoning: true
- - model: "model2"
- score: 0.8
- use_reasoning: false
- - name: "category2"
- model_scores:
+ - name: "decision2"
+ priority: 90
+ rules:
+ operator: OR
+ conditions:
+ - type: embedding
+ name: rule2
+ modelRefs:
- model: "model3"
- score: 0.95
use_reasoning: true
default_model: "default-model"
`
@@ -347,25 +358,25 @@ default_model: "default-model"
Expect(err).NotTo(HaveOccurred())
})
- Context("with valid category index", func() {
- It("should return the best model for the category", func() {
+ Context("with valid decision index", func() {
+ It("should return the best model for the decision", func() {
cfg, err := Load(configFile)
Expect(err).NotTo(HaveOccurred())
- model := cfg.GetModelForCategoryIndex(0)
+ model := cfg.GetModelForDecisionIndex(0)
Expect(model).To(Equal("model1"))
- model = cfg.GetModelForCategoryIndex(1)
+ model = cfg.GetModelForDecisionIndex(1)
Expect(model).To(Equal("model3"))
})
})
- Context("with invalid category index", func() {
+ Context("with invalid decision index", func() {
It("should return the default model for negative index", func() {
cfg, err := Load(configFile)
Expect(err).NotTo(HaveOccurred())
- model := cfg.GetModelForCategoryIndex(-1)
+ model := cfg.GetModelForDecisionIndex(-1)
Expect(model).To(Equal("default-model"))
})
@@ -373,20 +384,23 @@ default_model: "default-model"
cfg, err := Load(configFile)
Expect(err).NotTo(HaveOccurred())
- model := cfg.GetModelForCategoryIndex(10)
+ model := cfg.GetModelForDecisionIndex(10)
Expect(model).To(Equal("default-model"))
})
})
- Context("with category having no models", func() {
+ Context("with decision having no models", func() {
BeforeEach(func() {
configContent := `
-categories:
- - name: "empty_category"
- model_scores:
- - model: "fallback-model"
- score: 0.5
- use_reasoning: false
+decisions:
+ - name: "empty_decision"
+ priority: 50
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: rule1
+ modelRefs: []
default_model: "fallback-model"
`
err := os.WriteFile(configFile, []byte(configContent), 0o644)
@@ -394,112 +408,10 @@ default_model: "fallback-model"
})
It("should return the default model", func() {
- cfg, err := Load(configFile)
- Expect(err).NotTo(HaveOccurred())
-
- model := cfg.GetModelForCategoryIndex(0)
- Expect(model).To(Equal("fallback-model"))
- })
- })
- })
-
- Describe("PII Policy Functions", func() {
- BeforeEach(func() {
- configContent := `
-model_config:
- "strict-model":
- pii_policy:
- allow_by_default: false
- pii_types_allowed: ["NO_PII", "ORGANIZATION"]
- "permissive-model":
- pii_policy:
- allow_by_default: true
- "unconfigured-model":
- pii_policy:
- allow_by_default: true
-`
- err := os.WriteFile(configFile, []byte(configContent), 0o644)
- Expect(err).NotTo(HaveOccurred())
- })
-
- Describe("GetModelPIIPolicy", func() {
- It("should return configured PII policy for existing model", func() {
- cfg, err := Load(configFile)
- Expect(err).NotTo(HaveOccurred())
-
- policy := cfg.GetModelPIIPolicy("strict-model")
- Expect(policy.AllowByDefault).To(BeFalse())
- Expect(policy.PIITypes).To(ContainElements("NO_PII", "ORGANIZATION"))
-
- policy = cfg.GetModelPIIPolicy("permissive-model")
- Expect(policy.AllowByDefault).To(BeTrue())
- })
-
- It("should return default allow-all policy for non-existent model", func() {
- cfg, err := Load(configFile)
- Expect(err).NotTo(HaveOccurred())
-
- policy := cfg.GetModelPIIPolicy("non-existent-model")
- Expect(policy.AllowByDefault).To(BeTrue())
- Expect(policy.PIITypes).To(BeEmpty())
- })
- })
-
- Describe("IsModelAllowedForPIIType", func() {
- It("should allow all PII types when allow_by_default is true", func() {
- cfg, err := Load(configFile)
- Expect(err).NotTo(HaveOccurred())
-
- Expect(cfg.IsModelAllowedForPIIType("permissive-model", PIITypePerson)).To(BeTrue())
- Expect(cfg.IsModelAllowedForPIIType("permissive-model", PIITypeCreditCard)).To(BeTrue())
- Expect(cfg.IsModelAllowedForPIIType("permissive-model", PIITypeEmailAddress)).To(BeTrue())
- })
-
- It("should only allow explicitly permitted PII types when allow_by_default is false", func() {
- cfg, err := Load(configFile)
- Expect(err).NotTo(HaveOccurred())
-
- // Should allow explicitly listed PII types
- Expect(cfg.IsModelAllowedForPIIType("strict-model", PIITypeNoPII)).To(BeTrue())
- Expect(cfg.IsModelAllowedForPIIType("strict-model", PIITypeOrganization)).To(BeTrue())
-
- // Should deny non-listed PII types
- Expect(cfg.IsModelAllowedForPIIType("strict-model", PIITypePerson)).To(BeFalse())
- Expect(cfg.IsModelAllowedForPIIType("strict-model", PIITypeCreditCard)).To(BeFalse())
- Expect(cfg.IsModelAllowedForPIIType("strict-model", PIITypeEmailAddress)).To(BeFalse())
- })
-
- It("should handle unknown models with default allow-all policy", func() {
- cfg, err := Load(configFile)
- Expect(err).NotTo(HaveOccurred())
-
- Expect(cfg.IsModelAllowedForPIIType("unknown-model", PIITypePerson)).To(BeTrue())
- Expect(cfg.IsModelAllowedForPIIType("unknown-model", PIITypeCreditCard)).To(BeTrue())
- })
- })
-
- Describe("IsModelAllowedForPIITypes", func() {
- It("should return true when all PII types are allowed", func() {
- cfg, err := Load(configFile)
- Expect(err).NotTo(HaveOccurred())
-
- piiTypes := []string{PIITypeNoPII, PIITypeOrganization}
- Expect(cfg.IsModelAllowedForPIITypes("strict-model", piiTypes)).To(BeTrue())
- })
-
- It("should return false when any PII type is not allowed", func() {
- cfg, err := Load(configFile)
- Expect(err).NotTo(HaveOccurred())
-
- piiTypes := []string{PIITypeNoPII, PIITypePerson}
- Expect(cfg.IsModelAllowedForPIITypes("strict-model", piiTypes)).To(BeFalse())
- })
-
- It("should return true for empty PII types list", func() {
- cfg, err := Load(configFile)
- Expect(err).NotTo(HaveOccurred())
-
- Expect(cfg.IsModelAllowedForPIITypes("strict-model", []string{})).To(BeTrue())
+ // This should fail validation since decisions must have at least one model
+ _, err := Load(configFile)
+ Expect(err).To(HaveOccurred())
+ Expect(err.Error()).To(ContainSubstring("has no modelRefs defined"))
})
})
})
@@ -739,15 +651,14 @@ semantic_cache:
configContent := `
model_config:
"large-model":
- pii_policy:
- allow_by_default: true
+ preferred_endpoints: ["endpoint1"]
`
err := os.WriteFile(configFile, []byte(configContent), 0o644)
Expect(err).NotTo(HaveOccurred())
cfg, err := Load(configFile)
Expect(err).NotTo(HaveOccurred())
- Expect(cfg.ModelConfig["large-model"].PIIPolicy.AllowByDefault).To(BeTrue())
+ Expect(cfg.ModelConfig["large-model"].PreferredEndpoints).To(ContainElement("endpoint1"))
})
It("should handle special string values", func() {
@@ -957,40 +868,6 @@ default_model: "model-b"
Expect(err).NotTo(HaveOccurred())
})
- It("should fail validation when a category model has no endpoints", func() {
- // Add a model to categories that doesn't have preferred_endpoints configured
- configContent := `
-vllm_endpoints:
- - name: "endpoint1"
- address: "127.0.0.1"
- port: 8000
- weight: 1
-
-model_config:
- "existing-model":
- preferred_endpoints: ["endpoint1"]
-
-categories:
- - name: "test"
- model_scores:
- - model: "missing-model"
- score: 0.9
- use_reasoning: true
-
-default_model: "existing-model"
-`
- err := os.WriteFile(configFile, []byte(configContent), 0o644)
- Expect(err).NotTo(HaveOccurred())
-
- cfg, err := Load(configFile)
- Expect(err).NotTo(HaveOccurred())
-
- err = cfg.ValidateEndpoints()
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("missing-model"))
- Expect(err.Error()).To(ContainSubstring("no available endpoints"))
- })
-
It("should fail validation when default model has no endpoints", func() {
configContent := `
vllm_endpoints:
@@ -1799,171 +1676,12 @@ default_model: "test-model"
})
})
- Describe("Category-Level Cache Settings", func() {
- Context("with category-specific cache configuration", func() {
- It("should use category-specific cache enabled setting", func() {
- yamlContent := `
-bert_model:
- model_id: "test-model"
- threshold: 0.7
-
-semantic_cache:
- enabled: true
- similarity_threshold: 0.8
-
-categories:
- - name: health
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95
- model_scores:
- - model: test-model
- score: 1.0
- use_reasoning: false
- - name: general
- semantic_cache_enabled: false
- model_scores:
- - model: test-model
- score: 1.0
- use_reasoning: false
- - name: other
- model_scores:
- - model: test-model
- score: 1.0
- use_reasoning: false
-`
- var cfg RouterConfig
- err := yaml.Unmarshal([]byte(yamlContent), &cfg)
- Expect(err).NotTo(HaveOccurred())
-
- // Test category-specific enabled settings
- Expect(cfg.IsCacheEnabledForCategory("health")).To(BeTrue())
- Expect(cfg.IsCacheEnabledForCategory("general")).To(BeFalse())
- // "other" should fall back to global setting
- Expect(cfg.IsCacheEnabledForCategory("other")).To(BeTrue())
- // Unknown category should also fall back to global
- Expect(cfg.IsCacheEnabledForCategory("unknown")).To(BeTrue())
- })
-
- It("should use category-specific similarity thresholds", func() {
- yamlContent := `
-bert_model:
- model_id: "test-model"
- threshold: 0.7
-
-semantic_cache:
- enabled: true
- similarity_threshold: 0.8
-
-categories:
- - name: health
- semantic_cache_similarity_threshold: 0.95
- model_scores:
- - model: test-model
- score: 1.0
- use_reasoning: false
- - name: psychology
- semantic_cache_similarity_threshold: 0.92
- model_scores:
- - model: test-model
- score: 1.0
- use_reasoning: false
- - name: other
- semantic_cache_similarity_threshold: 0.75
- model_scores:
- - model: test-model
- score: 1.0
- use_reasoning: false
- - name: general
- model_scores:
- - model: test-model
- score: 1.0
- use_reasoning: false
-`
- var cfg RouterConfig
- err := yaml.Unmarshal([]byte(yamlContent), &cfg)
- Expect(err).NotTo(HaveOccurred())
-
- // Test category-specific thresholds
- Expect(cfg.GetCacheSimilarityThresholdForCategory("health")).To(Equal(float32(0.95)))
- Expect(cfg.GetCacheSimilarityThresholdForCategory("psychology")).To(Equal(float32(0.92)))
- Expect(cfg.GetCacheSimilarityThresholdForCategory("other")).To(Equal(float32(0.75)))
- // "general" should fall back to global semantic_cache threshold
- Expect(cfg.GetCacheSimilarityThresholdForCategory("general")).To(Equal(float32(0.8)))
- // Unknown category should also fall back
- Expect(cfg.GetCacheSimilarityThresholdForCategory("unknown")).To(Equal(float32(0.8)))
- })
-
- It("should fall back to bert threshold when semantic_cache threshold is not set", func() {
- yamlContent := `
-bert_model:
- model_id: "test-model"
- threshold: 0.6
-
-semantic_cache:
- enabled: true
-
-categories:
- - name: test
- model_scores:
- - model: test-model
- score: 1.0
- use_reasoning: false
-`
- var cfg RouterConfig
- err := yaml.Unmarshal([]byte(yamlContent), &cfg)
- Expect(err).NotTo(HaveOccurred())
-
- // Should fall back to bert_model.threshold
- Expect(cfg.GetCacheSimilarityThresholdForCategory("test")).To(Equal(float32(0.6)))
- Expect(cfg.GetCacheSimilarityThreshold()).To(Equal(float32(0.6)))
- })
-
- It("should handle nil pointers for optional cache settings", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- ModelScores: []ModelScore{
- {
- Model: "test",
- Score: 1.0,
- ModelReasoningControl: ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- },
- },
- }
-
- cfg := &RouterConfig{
- SemanticCache: SemanticCache{
- Enabled: true,
- SimilarityThreshold: lo.ToPtr(float32(0.8)),
- },
- InlineModels: InlineModels{
- BertModel: BertModel{
- Threshold: 0.7,
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- // Nil values should use defaults
- Expect(cfg.IsCacheEnabledForCategory("test")).To(BeTrue())
- Expect(cfg.GetCacheSimilarityThresholdForCategory("test")).To(Equal(float32(0.8)))
- })
- })
- })
-
- Describe("IsJailbreakEnabledForCategory", func() {
+ Describe("IsJailbreakEnabledForDecision", func() {
Context("when global jailbreak is enabled", func() {
- It("should return true for category without explicit setting", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- ModelScores: []ModelScore{{Model: "test", Score: 1.0}},
+ It("should return true for decision without explicit setting", func() {
+ decision := Decision{
+ Name: "test",
+ ModelRefs: []ModelRef{{Model: "test"}},
}
cfg := &RouterConfig{
@@ -1973,51 +1691,25 @@ categories:
},
},
IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
+ Decisions: []Decision{decision},
},
}
- Expect(cfg.IsJailbreakEnabledForCategory("test")).To(BeTrue())
+ Expect(cfg.IsJailbreakEnabledForDecision("test")).To(BeTrue())
})
- It("should return false when category explicitly disables jailbreak", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- DomainAwarePolicies: DomainAwarePolicies{
- JailbreakPolicy: JailbreakPolicy{
- JailbreakEnabled: lo.ToPtr(false),
- },
- },
- ModelScores: []ModelScore{{Model: "test", Score: 1.0}},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- PromptGuard: PromptGuardConfig{
- Enabled: true,
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.IsJailbreakEnabledForCategory("test")).To(BeFalse())
- })
-
- It("should return true when category explicitly enables jailbreak", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- DomainAwarePolicies: DomainAwarePolicies{
- JailbreakPolicy: JailbreakPolicy{
- JailbreakEnabled: lo.ToPtr(true),
+ It("should return false when decision explicitly disables jailbreak", func() {
+ decision := Decision{
+ Name: "test",
+ ModelRefs: []ModelRef{{Model: "test"}},
+ Plugins: []DecisionPlugin{
+ {
+ Type: "jailbreak",
+ Configuration: map[string]interface{}{
+ "enabled": false,
+ },
},
},
- ModelScores: []ModelScore{{Model: "test", Score: 1.0}},
}
cfg := &RouterConfig{
@@ -2027,94 +1719,27 @@ categories:
},
},
IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.IsJailbreakEnabledForCategory("test")).To(BeTrue())
- })
- })
-
- Context("when global jailbreak is disabled", func() {
- It("should return false for category without explicit setting", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- ModelScores: []ModelScore{{Model: "test", Score: 1.0}},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- PromptGuard: PromptGuardConfig{
- Enabled: false,
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
+ Decisions: []Decision{decision},
},
}
- Expect(cfg.IsJailbreakEnabledForCategory("test")).To(BeFalse())
+ Expect(cfg.IsJailbreakEnabledForDecision("test")).To(BeFalse())
})
- It("should return true when category explicitly enables jailbreak", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- DomainAwarePolicies: DomainAwarePolicies{
- JailbreakPolicy: JailbreakPolicy{
- JailbreakEnabled: lo.ToPtr(true),
- },
- },
- ModelScores: []ModelScore{{Model: "test", Score: 1.0}},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- PromptGuard: PromptGuardConfig{
- Enabled: false,
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.IsJailbreakEnabledForCategory("test")).To(BeTrue())
- })
-
- It("should return false when category explicitly disables jailbreak", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- DomainAwarePolicies: DomainAwarePolicies{
- JailbreakPolicy: JailbreakPolicy{
- JailbreakEnabled: lo.ToPtr(false),
+ It("should return true when decision explicitly enables jailbreak", func() {
+ decision := Decision{
+ Name: "test",
+ ModelRefs: []ModelRef{{Model: "test"}},
+ Plugins: []DecisionPlugin{
+ {
+ Type: "jailbreak",
+ Configuration: map[string]interface{}{
+ "enabled": true,
+ },
},
},
- ModelScores: []ModelScore{{Model: "test", Score: 1.0}},
}
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- PromptGuard: PromptGuardConfig{
- Enabled: false,
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.IsJailbreakEnabledForCategory("test")).To(BeFalse())
- })
- })
-
- Context("when category does not exist", func() {
- It("should fall back to global setting", func() {
cfg := &RouterConfig{
InlineModels: InlineModels{
PromptGuard: PromptGuardConfig{
@@ -2122,419 +1747,11 @@ categories:
},
},
IntelligentRouting: IntelligentRouting{
- Categories: []Category{},
- },
- }
-
- Expect(cfg.IsJailbreakEnabledForCategory("nonexistent")).To(BeTrue())
- })
- })
- })
-
- Describe("GetJailbreakThresholdForCategory", func() {
- Context("when global threshold is set", func() {
- It("should return global threshold for category without explicit setting", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- ModelScores: []ModelScore{{Model: "test", Score: 1.0}},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- PromptGuard: PromptGuardConfig{
- Threshold: 0.7,
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.GetJailbreakThresholdForCategory("test")).To(Equal(float32(0.7)))
- })
-
- It("should return category-specific threshold when set", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- DomainAwarePolicies: DomainAwarePolicies{
- JailbreakPolicy: JailbreakPolicy{
- JailbreakThreshold: lo.ToPtr(float32(0.9)),
- },
- },
- ModelScores: []ModelScore{{Model: "test", Score: 1.0}},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- PromptGuard: PromptGuardConfig{
- Threshold: 0.7,
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.GetJailbreakThresholdForCategory("test")).To(Equal(float32(0.9)))
- })
-
- It("should allow lower threshold override", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- DomainAwarePolicies: DomainAwarePolicies{
- JailbreakPolicy: JailbreakPolicy{
- JailbreakThreshold: lo.ToPtr(float32(0.5)),
- },
- },
- ModelScores: []ModelScore{{Model: "test", Score: 1.0}},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- PromptGuard: PromptGuardConfig{
- Threshold: 0.7,
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.GetJailbreakThresholdForCategory("test")).To(Equal(float32(0.5)))
- })
-
- It("should allow higher threshold override", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- DomainAwarePolicies: DomainAwarePolicies{
- JailbreakPolicy: JailbreakPolicy{
- JailbreakThreshold: lo.ToPtr(float32(0.95)),
- },
- },
- ModelScores: []ModelScore{{Model: "test", Score: 1.0}},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- PromptGuard: PromptGuardConfig{
- Threshold: 0.7,
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.GetJailbreakThresholdForCategory("test")).To(Equal(float32(0.95)))
- })
- })
-
- Context("when category does not exist", func() {
- It("should fall back to global threshold", func() {
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- PromptGuard: PromptGuardConfig{
- Threshold: 0.8,
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{},
- },
- }
-
- Expect(cfg.GetJailbreakThresholdForCategory("nonexistent")).To(Equal(float32(0.8)))
- })
- })
- })
-
- Describe("GetPIIThresholdForCategory", func() {
- Context("when global threshold is set", func() {
- It("should return global threshold for category without explicit setting", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- ModelScores: []ModelScore{{
- Model: "test",
- Score: 1.0,
- ModelReasoningControl: ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- }},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- Classifier: Classifier{
- PIIModel: PIIModel{
- Threshold: 0.7,
- },
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.7)))
- })
-
- It("should return category-specific threshold when set", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- DomainAwarePolicies: DomainAwarePolicies{
- PIIDetectionPolicy: PIIDetectionPolicy{
- PIIThreshold: lo.ToPtr(float32(0.9)),
- },
- },
- ModelScores: []ModelScore{{
- Model: "test",
- Score: 1.0,
- ModelReasoningControl: ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- }},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- Classifier: Classifier{
- PIIModel: PIIModel{
- Threshold: 0.7,
- },
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.9)))
- })
-
- It("should allow lower threshold override", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- DomainAwarePolicies: DomainAwarePolicies{
- PIIDetectionPolicy: PIIDetectionPolicy{
- PIIThreshold: lo.ToPtr(float32(0.5)),
- },
- },
- ModelScores: []ModelScore{{
- Model: "test",
- Score: 1.0,
- ModelReasoningControl: ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- }},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- Classifier: Classifier{
- PIIModel: PIIModel{
- Threshold: 0.7,
- },
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.5)))
- })
-
- It("should allow higher threshold override", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- DomainAwarePolicies: DomainAwarePolicies{
- PIIDetectionPolicy: PIIDetectionPolicy{
- PIIThreshold: lo.ToPtr(float32(0.95)),
- },
- },
- ModelScores: []ModelScore{{
- Model: "test",
- Score: 1.0,
- ModelReasoningControl: ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- }},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- Classifier: Classifier{
- PIIModel: PIIModel{
- Threshold: 0.7,
- },
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.95)))
- })
- })
-
- Context("when category does not exist", func() {
- It("should fall back to global threshold", func() {
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- Classifier: Classifier{
- PIIModel: PIIModel{
- Threshold: 0.8,
- },
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{},
- },
- }
-
- Expect(cfg.GetPIIThresholdForCategory("nonexistent")).To(Equal(float32(0.8)))
- })
- })
- })
-
- Describe("IsPIIEnabledForCategory", func() {
- Context("when global PII is enabled", func() {
- It("should return true for category without explicit setting", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- ModelScores: []ModelScore{{
- Model: "test",
- Score: 1.0,
- ModelReasoningControl: ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- }},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- Classifier: Classifier{
- PIIModel: PIIModel{
- ModelID: "test-model",
- PIIMappingPath: "/path/to/mapping.json",
- },
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
+ Decisions: []Decision{decision},
},
}
- Expect(cfg.IsPIIEnabledForCategory("test")).To(BeTrue())
- })
-
- It("should return category-specific setting when set to false", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- DomainAwarePolicies: DomainAwarePolicies{
- PIIDetectionPolicy: PIIDetectionPolicy{
- PIIEnabled: lo.ToPtr(false),
- },
- },
- ModelScores: []ModelScore{{
- Model: "test",
- Score: 1.0,
- ModelReasoningControl: ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- }},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- Classifier: Classifier{
- PIIModel: PIIModel{
- ModelID: "test-model",
- PIIMappingPath: "/path/to/mapping.json",
- },
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.IsPIIEnabledForCategory("test")).To(BeFalse())
- })
-
- It("should return category-specific setting when set to true", func() {
- category := Category{
- CategoryMetadata: CategoryMetadata{
- Name: "test",
- },
- DomainAwarePolicies: DomainAwarePolicies{
- PIIDetectionPolicy: PIIDetectionPolicy{
- PIIEnabled: lo.ToPtr(true),
- },
- },
- ModelScores: []ModelScore{{
- Model: "test",
- Score: 1.0,
- ModelReasoningControl: ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- }},
- }
-
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- Classifier: Classifier{
- PIIModel: PIIModel{
- ModelID: "",
- },
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{category},
- },
- }
-
- Expect(cfg.IsPIIEnabledForCategory("test")).To(BeTrue())
- })
- })
-
- Context("when category does not exist", func() {
- It("should fall back to global setting", func() {
- cfg := &RouterConfig{
- InlineModels: InlineModels{
- Classifier: Classifier{
- PIIModel: PIIModel{
- ModelID: "test-model",
- PIIMappingPath: "/path/to/mapping.json",
- },
- },
- },
- IntelligentRouting: IntelligentRouting{
- Categories: []Category{},
- },
- }
-
- Expect(cfg.IsPIIEnabledForCategory("nonexistent")).To(BeTrue())
+ Expect(cfg.IsJailbreakEnabledForDecision("test")).To(BeTrue())
})
})
})
@@ -2546,21 +1763,9 @@ var _ = Describe("MMLU categories in config YAML", func() {
categories:
- name: "tech"
mmlu_categories: ["computer science", "engineering"]
- model_scores:
- - model: "phi4"
- score: 0.9
- use_reasoning: false
- name: "finance"
mmlu_categories: ["economics"]
- model_scores:
- - model: "gemma3:27b"
- score: 0.8
- use_reasoning: true
- name: "politics"
- model_scores:
- - model: "gemma3:27b"
- score: 0.6
- use_reasoning: false
`
var cfg RouterConfig
@@ -2570,7 +1775,6 @@ categories:
Expect(cfg.Categories[0].Name).To(Equal("tech"))
Expect(cfg.Categories[0].MMLUCategories).To(ConsistOf("computer science", "engineering"))
- Expect(cfg.Categories[0].ModelScores).ToNot(BeEmpty())
Expect(cfg.Categories[1].Name).To(Equal("finance"))
Expect(cfg.Categories[1].MMLUCategories).To(ConsistOf("economics"))
diff --git a/src/semantic-router/pkg/config/helper.go b/src/semantic-router/pkg/config/helper.go
index e134d3dfe..224baa601 100644
--- a/src/semantic-router/pkg/config/helper.go
+++ b/src/semantic-router/pkg/config/helper.go
@@ -59,18 +59,18 @@ func (c *RouterConfig) GetCategoryDescriptions() []string {
return descriptions
}
-// GetModelForCategoryIndex returns the best LLM model name for the category at the given index
-func (c *RouterConfig) GetModelForCategoryIndex(index int) string {
- if index < 0 || index >= len(c.Categories) {
+// GetModelForDecisionIndex returns the best LLM model name for the decision at the given index
+func (c *RouterConfig) GetModelForDecisionIndex(index int) string {
+ if index < 0 || index >= len(c.Decisions) {
return c.DefaultModel
}
- category := c.Categories[index]
- if len(category.ModelScores) > 0 {
- return category.ModelScores[0].Model
+ decision := c.Decisions[index]
+ if len(decision.ModelRefs) > 0 {
+ return decision.ModelRefs[0].Model
}
- // Fall back to default model if category has no models
+ // Fall back to default model if decision has no models
return c.DefaultModel
}
@@ -90,22 +90,31 @@ func (c *RouterConfig) GetModelPricing(modelName string) (promptPer1M float64, c
return 0, 0, "", false
}
-// GetModelPIIPolicy returns the PII policy for a given model
-// If the model is not found in the config, returns a default policy that allows all PII
-func (c *RouterConfig) GetModelPIIPolicy(modelName string) PIIPolicy {
- if modelConfig, ok := c.ModelConfig[modelName]; ok {
- return modelConfig.PIIPolicy
+// GetDecisionPIIPolicy returns the PII policy for a given decision
+// If the decision doesn't have a PII plugin or policy config, returns a default policy that allows all PII
+func (d *Decision) GetDecisionPIIPolicy() PIIPolicy {
+ piiConfig := d.GetPIIConfig()
+ if piiConfig == nil {
+ // Default policy allows all PII (no PII plugin configured)
+ return PIIPolicy{
+ AllowByDefault: true,
+ PIITypes: []string{},
+ }
}
- // Default policy allows all PII
+
+ // When PII plugin is enabled, default behavior is to block all PII (AllowByDefault: false)
+ // unless specific types are listed in PIITypesAllowed
+ allowByDefault := !piiConfig.Enabled
+
return PIIPolicy{
- AllowByDefault: true,
- PIITypes: []string{},
+ AllowByDefault: allowByDefault,
+ PIITypes: piiConfig.PIITypesAllowed,
}
}
-// IsModelAllowedForPIIType checks if a model is allowed to process a specific PII type
-func (c *RouterConfig) IsModelAllowedForPIIType(modelName string, piiType string) bool {
- policy := c.GetModelPIIPolicy(modelName)
+// IsDecisionAllowedForPIIType checks if a decision is allowed to process a specific PII type
+func (d *Decision) IsDecisionAllowedForPIIType(piiType string) bool {
+ policy := d.GetDecisionPIIPolicy()
// If allow_by_default is true, all PII types are allowed unless explicitly denied
if policy.AllowByDefault {
@@ -116,10 +125,10 @@ func (c *RouterConfig) IsModelAllowedForPIIType(modelName string, piiType string
return slices.Contains(policy.PIITypes, piiType)
}
-// IsModelAllowedForPIITypes checks if a model is allowed to process any of the given PII types
-func (c *RouterConfig) IsModelAllowedForPIITypes(modelName string, piiTypes []string) bool {
+// IsDecisionAllowedForPIITypes checks if a decision is allowed to process any of the given PII types
+func (d *Decision) IsDecisionAllowedForPIITypes(piiTypes []string) bool {
for _, piiType := range piiTypes {
- if !c.IsModelAllowedForPIIType(modelName, piiType) {
+ if !d.IsDecisionAllowedForPIIType(piiType) {
return false
}
}
@@ -238,40 +247,40 @@ func (c *RouterConfig) SelectBestEndpointAddressForModel(modelName string) (stri
return fmt.Sprintf("%s:%d", bestEndpoint.Address, bestEndpoint.Port), true
}
-// GetModelReasoningForCategory returns whether a specific model supports reasoning in a given category
-func (c *RouterConfig) GetModelReasoningForCategory(categoryName string, modelName string) bool {
- for _, category := range c.Categories {
- if category.Name == categoryName {
- for _, modelScore := range category.ModelScores {
- if modelScore.Model == modelName {
- return modelScore.UseReasoning != nil && *modelScore.UseReasoning
+// GetModelReasoningForDecision returns whether a specific model supports reasoning in a given decision
+func (c *RouterConfig) GetModelReasoningForDecision(decisionName string, modelName string) bool {
+ for _, decision := range c.Decisions {
+ if decision.Name == decisionName {
+ for _, modelRef := range decision.ModelRefs {
+ if modelRef.Model == modelName {
+ return modelRef.UseReasoning != nil && *modelRef.UseReasoning
}
}
}
}
- return false // Default to false if category or model not found
+ return false // Default to false if decision or model not found
}
-// GetBestModelForCategory returns the best scoring model for a given category
-func (c *RouterConfig) GetBestModelForCategory(categoryName string) (string, bool) {
- for _, category := range c.Categories {
- if category.Name == categoryName {
- if len(category.ModelScores) > 0 {
- useReasoning := category.ModelScores[0].UseReasoning != nil && *category.ModelScores[0].UseReasoning
- return category.ModelScores[0].Model, useReasoning
+// GetBestModelForDecision returns the best model for a given decision (first model in ModelRefs)
+func (c *RouterConfig) GetBestModelForDecision(decisionName string) (string, bool) {
+ for _, decision := range c.Decisions {
+ if decision.Name == decisionName {
+ if len(decision.ModelRefs) > 0 {
+ useReasoning := decision.ModelRefs[0].UseReasoning != nil && *decision.ModelRefs[0].UseReasoning
+ return decision.ModelRefs[0].Model, useReasoning
}
}
}
- return "", false // Return empty string and false if category not found or has no models
+ return "", false // Return empty string and false if decision not found or has no models
}
// ValidateEndpoints validates that all configured models have at least one endpoint
func (c *RouterConfig) ValidateEndpoints() error {
- // Get all models from categories
+ // Get all models from decisions
allCategoryModels := make(map[string]bool)
- for _, category := range c.Categories {
- for _, modelScore := range category.ModelScores {
- allCategoryModels[modelScore.Model] = true
+ for _, decision := range c.Decisions {
+ for _, modelRef := range decision.ModelRefs {
+ allCategoryModels[modelRef.Model] = true
}
}
@@ -291,22 +300,27 @@ func (c *RouterConfig) ValidateEndpoints() error {
return nil
}
-// IsSystemPromptEnabled returns whether system prompt injection is enabled for a category
-func (c *Category) IsSystemPromptEnabled() bool {
- // If SystemPromptEnabled is explicitly set, use that value
- if c.SystemPromptEnabled != nil {
- return *c.SystemPromptEnabled
+// IsSystemPromptEnabled returns whether system prompt injection is enabled for a decision
+func (d *Decision) IsSystemPromptEnabled() bool {
+ config := d.GetSystemPromptConfig()
+ if config == nil {
+ return false
+ }
+ // If Enabled is explicitly set, use that value
+ if config.Enabled != nil {
+ return *config.Enabled
}
// Default to true if SystemPrompt is not empty
- return c.SystemPrompt != ""
+ return config.SystemPrompt != ""
}
// GetSystemPromptMode returns the system prompt injection mode, defaulting to "replace"
-func (c *Category) GetSystemPromptMode() string {
- if c.SystemPromptMode == "" {
+func (d *Decision) GetSystemPromptMode() string {
+ config := d.GetSystemPromptConfig()
+ if config == nil || config.Mode == "" {
return "replace" // Default mode
}
- return c.SystemPromptMode
+ return config.Mode
}
// GetCategoryByName returns a category by name
@@ -319,67 +333,89 @@ func (c *RouterConfig) GetCategoryByName(name string) *Category {
return nil
}
-// IsCacheEnabledForCategory returns whether semantic caching is enabled for a specific category
-// If the category has an explicit setting, it takes precedence; otherwise, uses global setting
-func (c *RouterConfig) IsCacheEnabledForCategory(categoryName string) bool {
- category := c.GetCategoryByName(categoryName)
- if category != nil && category.SemanticCacheEnabled != nil {
- return *category.SemanticCacheEnabled
+// GetDecisionByName returns a decision by name
+func (c *RouterConfig) GetDecisionByName(name string) *Decision {
+ for i := range c.Decisions {
+ if c.Decisions[i].Name == name {
+ return &c.Decisions[i]
+ }
+ }
+ return nil
+}
+
+// IsCacheEnabledForDecision returns whether semantic caching is enabled for a specific decision
+func (c *RouterConfig) IsCacheEnabledForDecision(decisionName string) bool {
+ decision := c.GetDecisionByName(decisionName)
+ if decision != nil {
+ config := decision.GetSemanticCacheConfig()
+ if config != nil {
+ return config.Enabled
+ }
}
// Fall back to global setting
return c.Enabled
}
-// GetCacheSimilarityThresholdForCategory returns the effective cache similarity threshold for a category
-// Priority: category-specific > global semantic_cache > bert_model threshold
-func (c *RouterConfig) GetCacheSimilarityThresholdForCategory(categoryName string) float32 {
- category := c.GetCategoryByName(categoryName)
- if category != nil && category.SemanticCacheSimilarityThreshold != nil {
- return *category.SemanticCacheSimilarityThreshold
+// GetCacheSimilarityThresholdForDecision returns the effective cache similarity threshold for a decision
+func (c *RouterConfig) GetCacheSimilarityThresholdForDecision(decisionName string) float32 {
+ decision := c.GetDecisionByName(decisionName)
+ if decision != nil {
+ config := decision.GetSemanticCacheConfig()
+ if config != nil && config.SimilarityThreshold != nil {
+ return *config.SimilarityThreshold
+ }
}
// Fall back to global cache threshold or bert threshold
return c.GetCacheSimilarityThreshold()
}
-// IsJailbreakEnabledForCategory returns whether jailbreak detection is enabled for a specific category
-// If the category has an explicit setting, it takes precedence; otherwise, uses global setting
-func (c *RouterConfig) IsJailbreakEnabledForCategory(categoryName string) bool {
- category := c.GetCategoryByName(categoryName)
- if category != nil && category.JailbreakEnabled != nil {
- return *category.JailbreakEnabled
+// IsJailbreakEnabledForDecision returns whether jailbreak detection is enabled for a specific decision
+func (c *RouterConfig) IsJailbreakEnabledForDecision(decisionName string) bool {
+ decision := c.GetDecisionByName(decisionName)
+ if decision != nil {
+ config := decision.GetJailbreakConfig()
+ if config != nil {
+ return config.Enabled
+ }
}
// Fall back to global setting
return c.PromptGuard.Enabled
}
-// GetJailbreakThresholdForCategory returns the effective jailbreak detection threshold for a category
-// Priority: category-specific > global prompt_guard threshold
-func (c *RouterConfig) GetJailbreakThresholdForCategory(categoryName string) float32 {
- category := c.GetCategoryByName(categoryName)
- if category != nil && category.JailbreakThreshold != nil {
- return *category.JailbreakThreshold
+// GetJailbreakThresholdForDecision returns the effective jailbreak detection threshold for a decision
+func (c *RouterConfig) GetJailbreakThresholdForDecision(decisionName string) float32 {
+ decision := c.GetDecisionByName(decisionName)
+ if decision != nil {
+ config := decision.GetJailbreakConfig()
+ if config != nil && config.Threshold != nil {
+ return *config.Threshold
+ }
}
// Fall back to global threshold
return c.PromptGuard.Threshold
}
-// IsPIIEnabledForCategory returns whether PII detection is enabled for a specific category
-// If the category has an explicit setting, it takes precedence; otherwise, uses global setting
-func (c *RouterConfig) IsPIIEnabledForCategory(categoryName string) bool {
- category := c.GetCategoryByName(categoryName)
- if category != nil && category.PIIEnabled != nil {
- return *category.PIIEnabled
+// IsPIIEnabledForDecision returns whether PII detection is enabled for a specific decision
+func (c *RouterConfig) IsPIIEnabledForDecision(decisionName string) bool {
+ decision := c.GetDecisionByName(decisionName)
+ if decision != nil {
+ config := decision.GetPIIConfig()
+ if config != nil {
+ return config.Enabled
+ }
}
// Fall back to global setting
return c.IsPIIClassifierEnabled()
}
-// GetPIIThresholdForCategory returns the effective PII detection threshold for a category
-// Priority: category-specific > global classifier.pii_model threshold
-func (c *RouterConfig) GetPIIThresholdForCategory(categoryName string) float32 {
- category := c.GetCategoryByName(categoryName)
- if category != nil && category.PIIThreshold != nil {
- return *category.PIIThreshold
+// GetPIIThresholdForDecision returns the effective PII detection threshold for a decision
+func (c *RouterConfig) GetPIIThresholdForDecision(decisionName string) float32 {
+ decision := c.GetDecisionByName(decisionName)
+ if decision != nil {
+ config := decision.GetPIIConfig()
+ if config != nil && config.Threshold != nil {
+ return *config.Threshold
+ }
}
// Fall back to global threshold
return c.PIIModel.Threshold
diff --git a/src/semantic-router/pkg/config/loader.go b/src/semantic-router/pkg/config/loader.go
index 616ec0341..4e80bb5f9 100644
--- a/src/semantic-router/pkg/config/loader.go
+++ b/src/semantic-router/pkg/config/loader.go
@@ -14,6 +14,10 @@ var (
configOnce sync.Once
configErr error
configMu sync.RWMutex
+
+ // Config change notification channel
+ configUpdateCh chan *RouterConfig
+ configUpdateMu sync.Mutex
)
// Load loads the configuration from the specified YAML file once and caches it globally.
@@ -64,10 +68,20 @@ func Parse(configPath string) (*RouterConfig, error) {
// Replace replaces the globally cached config. It is safe for concurrent readers.
func Replace(newCfg *RouterConfig) {
configMu.Lock()
- defer configMu.Unlock()
config = newCfg
- // Do not reset configOnce to avoid racing re-parses via LoadConfig; callers should use ParseConfigFile for fresher reads.
configErr = nil
+ configMu.Unlock()
+
+ // Notify listeners of config change
+ configUpdateMu.Lock()
+ if configUpdateCh != nil {
+ select {
+ case configUpdateCh <- newCfg:
+ default:
+ // Channel full or no listener, skip
+ }
+ }
+ configUpdateMu.Unlock()
}
// Get returns the current configuration
@@ -76,3 +90,15 @@ func Get() *RouterConfig {
defer configMu.RUnlock()
return config
}
+
+// WatchConfigUpdates returns a channel that receives config updates
+// Only one watcher is supported at a time
+func WatchConfigUpdates() <-chan *RouterConfig {
+ configUpdateMu.Lock()
+ defer configUpdateMu.Unlock()
+
+ if configUpdateCh == nil {
+ configUpdateCh = make(chan *RouterConfig, 1)
+ }
+ return configUpdateCh
+}
diff --git a/src/semantic-router/pkg/config/validator.go b/src/semantic-router/pkg/config/validator.go
index 0f855f676..14124da09 100644
--- a/src/semantic-router/pkg/config/validator.go
+++ b/src/semantic-router/pkg/config/validator.go
@@ -85,28 +85,32 @@ func getIPAddressType(address string) string {
// validateConfigStructure performs additional validation on the parsed config
func validateConfigStructure(cfg *RouterConfig) error {
- // Ensure all categories have at least one model with scores
- for _, category := range cfg.Categories {
- if len(category.ModelScores) == 0 {
- return fmt.Errorf("category '%s' has no model_scores defined - each category must have at least one model", category.Name)
+ // In Kubernetes mode, decisions and model_config will be loaded from CRDs
+ // Skip validation for these fields during initial config parse
+ if cfg.ConfigSource == ConfigSourceKubernetes {
+ // Skip validation for decisions and model_config
+ return nil
+ }
+
+ // File mode: validate decisions have at least one model ref
+ for _, decision := range cfg.Decisions {
+ if len(decision.ModelRefs) == 0 {
+ return fmt.Errorf("decision '%s' has no modelRefs defined - each decision must have at least one model", decision.Name)
}
- // Validate each model score has the required fields
- for i, modelScore := range category.ModelScores {
- if modelScore.Model == "" {
- return fmt.Errorf("category '%s', model_scores[%d]: model name cannot be empty", category.Name, i)
- }
- if modelScore.Score <= 0 {
- return fmt.Errorf("category '%s', model '%s': score must be greater than 0, got %f", category.Name, modelScore.Model, modelScore.Score)
+ // Validate each model ref has the required fields
+ for i, modelRef := range decision.ModelRefs {
+ if modelRef.Model == "" {
+ return fmt.Errorf("decision '%s', modelRefs[%d]: model name cannot be empty", decision.Name, i)
}
- if modelScore.UseReasoning == nil {
- return fmt.Errorf("category '%s', model '%s': missing required field 'use_reasoning'", category.Name, modelScore.Model)
+ if modelRef.UseReasoning == nil {
+ return fmt.Errorf("decision '%s', model '%s': missing required field 'use_reasoning'", decision.Name, modelRef.Model)
}
// Validate LoRA name if specified
- if modelScore.LoRAName != "" {
- if err := validateLoRAName(cfg, modelScore.Model, modelScore.LoRAName); err != nil {
- return fmt.Errorf("category '%s', model '%s': %w", category.Name, modelScore.Model, err)
+ if modelRef.LoRAName != "" {
+ if err := validateLoRAName(cfg, modelRef.Model, modelRef.LoRAName); err != nil {
+ return fmt.Errorf("decision '%s', model '%s': %w", decision.Name, modelRef.Model, err)
}
}
}
diff --git a/src/semantic-router/pkg/decision/engine.go b/src/semantic-router/pkg/decision/engine.go
new file mode 100644
index 000000000..e5418caef
--- /dev/null
+++ b/src/semantic-router/pkg/decision/engine.go
@@ -0,0 +1,204 @@
+/*
+Copyright 2025 vLLM Semantic Router.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package decision
+
+import (
+ "fmt"
+ "sort"
+
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+)
+
+// DecisionEngine evaluates routing decisions based on rule combinations
+type DecisionEngine struct {
+ keywordRules []config.KeywordRule
+ embeddingRules []config.EmbeddingRule
+ categories []config.Category
+ decisions []config.Decision
+ strategy string
+}
+
+// NewDecisionEngine creates a new decision engine
+func NewDecisionEngine(
+ keywordRules []config.KeywordRule,
+ embeddingRules []config.EmbeddingRule,
+ categories []config.Category,
+ decisions []config.Decision,
+ strategy string,
+) *DecisionEngine {
+ if strategy == "" {
+ strategy = "priority" // default strategy
+ }
+ return &DecisionEngine{
+ keywordRules: keywordRules,
+ embeddingRules: embeddingRules,
+ categories: categories,
+ decisions: decisions,
+ strategy: strategy,
+ }
+}
+
+// DecisionResult represents the result of decision evaluation
+type DecisionResult struct {
+ Decision *config.Decision
+ Confidence float64
+ MatchedRules []string
+}
+
+// EvaluateDecisions evaluates all decisions and returns the best match based on strategy
+// matchedKeywordRules: list of matched keyword rule names
+// matchedEmbeddingRules: list of matched embedding rule names
+// matchedDomainRules: list of matched domain rule names (category names)
+func (e *DecisionEngine) EvaluateDecisions(
+ matchedKeywordRules []string,
+ matchedEmbeddingRules []string,
+ matchedDomainRules []string,
+) (*DecisionResult, error) {
+ if len(e.decisions) == 0 {
+ return nil, fmt.Errorf("no decisions configured")
+ }
+
+ var results []DecisionResult
+
+ // Evaluate each decision
+ for i := range e.decisions {
+ decision := &e.decisions[i]
+ matched, confidence, matchedRules := e.evaluateDecision(
+ decision,
+ matchedKeywordRules,
+ matchedEmbeddingRules,
+ matchedDomainRules,
+ )
+
+ if matched {
+ results = append(results, DecisionResult{
+ Decision: decision,
+ Confidence: confidence,
+ MatchedRules: matchedRules,
+ })
+ }
+ }
+
+ if len(results) == 0 {
+ return nil, fmt.Errorf("no decision matched")
+ }
+
+ // Select best decision based on strategy
+ return e.selectBestDecision(results), nil
+}
+
+// evaluateDecision evaluates a single decision's rule combination
+func (e *DecisionEngine) evaluateDecision(
+ decision *config.Decision,
+ matchedKeywordRules []string,
+ matchedEmbeddingRules []string,
+ matchedDomainRules []string,
+) (matched bool, confidence float64, matchedRules []string) {
+ return e.evaluateRuleCombination(
+ decision.Rules,
+ matchedKeywordRules,
+ matchedEmbeddingRules,
+ matchedDomainRules,
+ )
+}
+
+// evaluateRuleCombination evaluates a rule combination with AND/OR logic
+func (e *DecisionEngine) evaluateRuleCombination(
+ rules config.RuleCombination,
+ matchedKeywordRules []string,
+ matchedEmbeddingRules []string,
+ matchedDomainRules []string,
+) (matched bool, confidence float64, matchedRules []string) {
+ if len(rules.Conditions) == 0 {
+ return false, 0, nil
+ }
+
+ matchedCount := 0
+ totalConfidence := 0.0
+ var allMatchedRules []string
+
+ for _, condition := range rules.Conditions {
+ conditionMatched := false
+ var matchedList []string
+
+ switch condition.Type {
+ case "keyword":
+ matchedList = matchedKeywordRules
+ case "embedding":
+ matchedList = matchedEmbeddingRules
+ case "domain":
+ matchedList = matchedDomainRules
+ default:
+ continue
+ }
+
+ // Check if the condition's rule name is in the matched list
+ for _, ruleName := range matchedList {
+ if ruleName == condition.Name {
+ conditionMatched = true
+ allMatchedRules = append(allMatchedRules, fmt.Sprintf("%s:%s", condition.Type, condition.Name))
+ break
+ }
+ }
+
+ if conditionMatched {
+ matchedCount++
+ totalConfidence += 1.0 // Each matched condition contributes 1.0 to confidence
+ }
+ }
+
+ // Calculate final match result based on operator
+ if rules.Operator == "AND" {
+ matched = matchedCount == len(rules.Conditions)
+ } else { // OR
+ matched = matchedCount > 0
+ }
+
+ // Calculate confidence as ratio of matched conditions
+ if len(rules.Conditions) > 0 {
+ confidence = totalConfidence / float64(len(rules.Conditions))
+ }
+
+ return matched, confidence, allMatchedRules
+}
+
+// selectBestDecision selects the best decision based on the configured strategy
+func (e *DecisionEngine) selectBestDecision(results []DecisionResult) *DecisionResult {
+ if len(results) == 0 {
+ return nil
+ }
+
+ if len(results) == 1 {
+ return &results[0]
+ }
+
+ // Sort based on strategy
+ if e.strategy == "confidence" {
+ // Sort by confidence (descending)
+ sort.Slice(results, func(i, j int) bool {
+ return results[i].Confidence > results[j].Confidence
+ })
+ } else {
+ // Default: priority strategy
+ // Sort by priority (descending)
+ sort.Slice(results, func(i, j int) bool {
+ return results[i].Decision.Priority > results[j].Decision.Priority
+ })
+ }
+
+ return &results[0]
+}
diff --git a/src/semantic-router/pkg/decision/engine_test.go b/src/semantic-router/pkg/decision/engine_test.go
new file mode 100644
index 000000000..34350ce50
--- /dev/null
+++ b/src/semantic-router/pkg/decision/engine_test.go
@@ -0,0 +1,160 @@
+package decision
+
+import (
+ "testing"
+
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+)
+
+func TestDecisionEngine_EvaluateDecisions(t *testing.T) {
+ tests := []struct {
+ name string
+ decisions []config.Decision
+ strategy string
+ matchedKeywordRules []string
+ matchedEmbeddingRules []string
+ matchedDomainRules []string
+ expectedDecision string
+ expectError bool
+ }{
+ {
+ name: "Single decision with AND operator - all rules match",
+ decisions: []config.Decision{
+ {
+ Name: "coding-task",
+ Priority: 10,
+ Rules: config.RuleCombination{
+ Operator: "AND",
+ Conditions: []config.RuleCondition{
+ {Type: "keyword", Name: "programming"},
+ {Type: "domain", Name: "coding"},
+ },
+ },
+ ModelRefs: []config.ModelRef{
+ {Model: "codellama"},
+ },
+ },
+ },
+ strategy: "priority",
+ matchedKeywordRules: []string{"programming"},
+ matchedEmbeddingRules: []string{},
+ matchedDomainRules: []string{"coding"},
+ expectedDecision: "coding-task",
+ expectError: false,
+ },
+ {
+ name: "Single decision with AND operator - partial match",
+ decisions: []config.Decision{
+ {
+ Name: "coding-task",
+ Priority: 10,
+ Rules: config.RuleCombination{
+ Operator: "AND",
+ Conditions: []config.RuleCondition{
+ {Type: "keyword", Name: "programming"},
+ {Type: "domain", Name: "coding"},
+ },
+ },
+ },
+ },
+ strategy: "priority",
+ matchedKeywordRules: []string{"programming"},
+ matchedEmbeddingRules: []string{},
+ matchedDomainRules: []string{}, // Missing domain rule
+ expectedDecision: "",
+ expectError: true, // No decision matched
+ },
+ {
+ name: "Single decision with OR operator - partial match",
+ decisions: []config.Decision{
+ {
+ Name: "coding-task",
+ Priority: 10,
+ Rules: config.RuleCombination{
+ Operator: "OR",
+ Conditions: []config.RuleCondition{
+ {Type: "keyword", Name: "programming"},
+ {Type: "domain", Name: "coding"},
+ },
+ },
+ },
+ },
+ strategy: "priority",
+ matchedKeywordRules: []string{"programming"},
+ matchedEmbeddingRules: []string{},
+ matchedDomainRules: []string{}, // Missing domain rule, but OR should still match
+ expectedDecision: "coding-task",
+ expectError: false,
+ },
+ {
+ name: "Multiple decisions - priority strategy",
+ decisions: []config.Decision{
+ {
+ Name: "high-priority-task",
+ Priority: 20,
+ Rules: config.RuleCombination{
+ Operator: "OR",
+ Conditions: []config.RuleCondition{
+ {Type: "keyword", Name: "urgent"},
+ },
+ },
+ },
+ {
+ Name: "low-priority-task",
+ Priority: 10,
+ Rules: config.RuleCombination{
+ Operator: "OR",
+ Conditions: []config.RuleCondition{
+ {Type: "keyword", Name: "urgent"},
+ },
+ },
+ },
+ },
+ strategy: "priority",
+ matchedKeywordRules: []string{"urgent"},
+ matchedEmbeddingRules: []string{},
+ matchedDomainRules: []string{},
+ expectedDecision: "high-priority-task", // Higher priority wins
+ expectError: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ engine := NewDecisionEngine(
+ []config.KeywordRule{},
+ []config.EmbeddingRule{},
+ []config.Category{},
+ tt.decisions,
+ tt.strategy,
+ )
+
+ result, err := engine.EvaluateDecisions(
+ tt.matchedKeywordRules,
+ tt.matchedEmbeddingRules,
+ tt.matchedDomainRules,
+ )
+
+ if tt.expectError {
+ if err == nil {
+ t.Errorf("Expected error but got none")
+ }
+ return
+ }
+
+ if err != nil {
+ t.Errorf("Unexpected error: %v", err)
+ return
+ }
+
+ if result == nil {
+ t.Errorf("Expected result but got nil")
+ return
+ }
+
+ if result.Decision.Name != tt.expectedDecision {
+ t.Errorf("Expected decision %s, got %s", tt.expectedDecision, result.Decision.Name)
+ }
+ })
+ }
+}
diff --git a/src/semantic-router/pkg/extproc/extproc_test.go b/src/semantic-router/pkg/extproc/extproc_test.go
index bbe56d13c..fc320e746 100644
--- a/src/semantic-router/pkg/extproc/extproc_test.go
+++ b/src/semantic-router/pkg/extproc/extproc_test.go
@@ -18,7 +18,6 @@ import (
"github.com/openai/openai-go"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
- "github.com/samber/lo"
"github.com/stretchr/testify/assert"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/metadata"
@@ -491,15 +490,9 @@ func CreateTestConfig() *config.RouterConfig {
DefaultModel: "model-b",
ModelConfig: map[string]config.ModelParams{
"model-a": {
- PIIPolicy: config.PIIPolicy{
- AllowByDefault: true,
- },
PreferredEndpoints: []string{"test-endpoint1"},
},
"model-b": {
- PIIPolicy: config.PIIPolicy{
- AllowByDefault: true,
- },
PreferredEndpoints: []string{"test-endpoint1", "test-endpoint2"},
},
},
@@ -525,10 +518,6 @@ func CreateTestConfig() *config.RouterConfig {
Name: "coding",
Description: "Programming tasks",
},
- ModelScores: []config.ModelScore{
- {Model: "model-a", Score: 0.9},
- {Model: "model-b", Score: 0.8},
- },
},
},
},
@@ -599,7 +588,7 @@ func CreateTestRouter(cfg *config.RouterConfig) (*OpenAIRouter, error) {
}
// Create PII checker
- piiChecker := pii.NewPolicyChecker(cfg, cfg.ModelConfig)
+ piiChecker := pii.NewPolicyChecker(cfg)
// Create router manually with proper initialization
router := &OpenAIRouter{
@@ -633,56 +622,6 @@ var _ = Describe("Security Checks", func() {
Expect(err).NotTo(HaveOccurred())
})
- Context("with PII detection enabled", func() {
- BeforeEach(func() {
- cfg.PIIModel.ModelID = testPIIModelID
- cfg.PIIMappingPath = testPIIMappingPath
-
- // Create a restrictive PII policy
- cfg.ModelConfig["model-a"] = config.ModelParams{
- PIIPolicy: config.PIIPolicy{
- AllowByDefault: false,
- PIITypes: []string{"NO_PII"},
- },
- }
- router.PIIChecker = pii.NewPolicyChecker(cfg, cfg.ModelConfig)
- var err error
- router.Classifier, err = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, router.Classifier.PIIMapping, nil)
- Expect(err).NotTo(HaveOccurred())
- })
-
- It("should allow requests with no PII", func() {
- request := cache.OpenAIRequest{
- Model: "model-a",
- Messages: []cache.ChatMessage{
- {Role: "user", Content: "What is the weather like today?"},
- },
- }
-
- requestBody, err := json.Marshal(request)
- Expect(err).NotTo(HaveOccurred())
-
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: requestBody,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "pii-test-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(response).NotTo(BeNil())
-
- // Should either continue or return PII violation, but not error
- Expect(response.GetRequestBody()).NotTo(BeNil())
- })
- })
-
Context("with PII token classification", func() {
BeforeEach(func() {
cfg.PIIModel.ModelID = testPIIModelID
@@ -1030,47 +969,6 @@ var _ = Describe("Security Checks", func() {
})
Describe("Integration with request processing", func() {
- It("should properly integrate PII detection in request processing", func() {
- // Create a request with PII content
- request := cache.OpenAIRequest{
- Model: "model-a",
- Messages: []cache.ChatMessage{
- {Role: "user", Content: "My email is sensitive@example.com, please help me"},
- },
- }
-
- requestBody, err := json.Marshal(request)
- Expect(err).NotTo(HaveOccurred())
-
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: requestBody,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "pii-integration-test",
- StartTime: time.Now(),
- }
-
- // Configure restrictive PII policy
- cfg.ModelConfig["model-a"] = config.ModelParams{
- PIIPolicy: config.PIIPolicy{
- AllowByDefault: false,
- PIITypes: []string{"NO_PII"},
- },
- }
- router.PIIChecker = pii.NewPolicyChecker(cfg, cfg.ModelConfig)
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(response).NotTo(BeNil())
-
- // The response should handle PII appropriately (either block or allow based on policy)
- Expect(response.GetRequestBody()).NotTo(BeNil())
- })
-
It("should handle PII detection when classifier is disabled", func() {
// Temporarily disable PII classification
originalMapping := router.Classifier.PIIMapping
@@ -1164,570 +1062,6 @@ var _ = Describe("Security Checks", func() {
})
})
-var _ = Describe("Request Processing", func() {
- var (
- router *OpenAIRouter
- cfg *config.RouterConfig
- )
-
- BeforeEach(func() {
- cfg = CreateTestConfig()
- var err error
- router, err = CreateTestRouter(cfg)
- Expect(err).NotTo(HaveOccurred())
- })
-
- Describe("handleRequestHeaders", func() {
- It("should process request headers successfully", func() {
- headers := &ext_proc.ProcessingRequest_RequestHeaders{
- RequestHeaders: &ext_proc.HttpHeaders{
- Headers: &core.HeaderMap{
- Headers: []*core.HeaderValue{
- {Key: "content-type", Value: "application/json"},
- {Key: "x-request-id", Value: "test-request-123"},
- {Key: "authorization", Value: "Bearer token"},
- },
- },
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- }
-
- response, err := router.HandleRequestHeaders(headers, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(response).NotTo(BeNil())
-
- // Check that headers were stored
- Expect(ctx.Headers).To(HaveKeyWithValue("content-type", "application/json"))
- Expect(ctx.Headers).To(HaveKeyWithValue("x-request-id", "test-request-123"))
- Expect(ctx.RequestID).To(Equal("test-request-123"))
-
- // Check response status
- headerResp := response.GetRequestHeaders()
- Expect(headerResp).NotTo(BeNil())
- Expect(headerResp.Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
- })
-
- It("should handle missing x-request-id header", func() {
- headers := &ext_proc.ProcessingRequest_RequestHeaders{
- RequestHeaders: &ext_proc.HttpHeaders{
- Headers: &core.HeaderMap{
- Headers: []*core.HeaderValue{
- {Key: "content-type", Value: "application/json"},
- },
- },
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- }
-
- response, err := router.HandleRequestHeaders(headers, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(ctx.RequestID).To(BeEmpty())
- Expect(response.GetRequestHeaders().Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
- })
-
- It("should handle case-insensitive header matching", func() {
- headers := &ext_proc.ProcessingRequest_RequestHeaders{
- RequestHeaders: &ext_proc.HttpHeaders{
- Headers: &core.HeaderMap{
- Headers: []*core.HeaderValue{
- {Key: "X-Request-ID", Value: "test-case-insensitive"},
- },
- },
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- }
-
- _, err := router.HandleRequestHeaders(headers, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(ctx.RequestID).To(Equal("test-case-insensitive"))
- })
- })
-
- Describe("handleRequestBody", func() {
- Context("with valid OpenAI request", func() {
- It("should process auto model routing successfully", func() {
- request := cache.OpenAIRequest{
- Model: "auto",
- Messages: []cache.ChatMessage{
- {Role: "user", Content: "Write a Python function to sort a list"},
- },
- }
-
- requestBody, err := json.Marshal(request)
- Expect(err).NotTo(HaveOccurred())
-
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: requestBody,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "test-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(response).NotTo(BeNil())
-
- // Should continue processing
- bodyResp := response.GetRequestBody()
- Expect(bodyResp).NotTo(BeNil())
- Expect(bodyResp.Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
- })
-
- It("should handle non-auto model without modification", func() {
- request := cache.OpenAIRequest{
- Model: "model-a",
- Messages: []cache.ChatMessage{
- {Role: "user", Content: "Hello world"},
- },
- }
-
- requestBody, err := json.Marshal(request)
- Expect(err).NotTo(HaveOccurred())
-
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: requestBody,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "test-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).NotTo(HaveOccurred())
-
- bodyResp := response.GetRequestBody()
- Expect(bodyResp.Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
- })
-
- It("should handle empty user content", func() {
- request := cache.OpenAIRequest{
- Model: "auto",
- Messages: []cache.ChatMessage{
- {Role: "system", Content: "You are a helpful assistant"},
- {Role: "assistant", Content: "Hello! How can I help you?"},
- },
- }
-
- requestBody, err := json.Marshal(request)
- Expect(err).NotTo(HaveOccurred())
-
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: requestBody,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "test-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(response.GetRequestBody().Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
- })
- })
-
- Context("with invalid request body", func() {
- It("should return error for malformed JSON", func() {
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: []byte(`{"model": "model-a", "messages": [invalid json}`),
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "test-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).To(HaveOccurred())
- Expect(response).To(BeNil())
- Expect(err.Error()).To(ContainSubstring("invalid request body"))
- })
-
- It("should handle empty request body", func() {
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: []byte{},
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "test-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).To(HaveOccurred())
- Expect(response).To(BeNil())
- })
-
- It("should handle nil request body", func() {
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: nil,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "test-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).To(HaveOccurred())
- Expect(response).To(BeNil())
- })
- })
-
- Context("with tools auto-selection", func() {
- BeforeEach(func() {
- cfg.Tools.Enabled = true
- router.ToolsDatabase = tools.NewToolsDatabase(tools.ToolsDatabaseOptions{
- Enabled: true,
- })
- })
-
- It("should handle tools auto-selection", func() {
- request := map[string]interface{}{
- "model": "model-a",
- "messages": []map[string]interface{}{
- {"role": "user", "content": "Calculate the square root of 16"},
- },
- "tools": "auto",
- }
-
- requestBody, err := json.Marshal(request)
- Expect(err).NotTo(HaveOccurred())
-
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: requestBody,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "test-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).NotTo(HaveOccurred())
-
- // Should process successfully even if tools selection fails
- bodyResp := response.GetRequestBody()
- Expect(bodyResp.Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
- })
-
- It("should fallback to empty tools on error", func() {
- cfg.Tools.FallbackToEmpty = true
-
- request := map[string]interface{}{
- "model": "model-a",
- "messages": []map[string]interface{}{
- {"role": "user", "content": "Test query"},
- },
- "tools": "auto",
- }
-
- requestBody, err := json.Marshal(request)
- Expect(err).NotTo(HaveOccurred())
-
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: requestBody,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "test-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(response.GetRequestBody().Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
- })
- })
- })
-
- Describe("handleResponseHeaders", func() {
- It("should process response headers successfully", func() {
- responseHeaders := &ext_proc.ProcessingRequest_ResponseHeaders{
- ResponseHeaders: &ext_proc.HttpHeaders{
- Headers: &core.HeaderMap{
- Headers: []*core.HeaderValue{
- {Key: "content-type", Value: "application/json"},
- {Key: "x-response-id", Value: "resp-123"},
- },
- },
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestModel: "model-a",
- ProcessingStartTime: time.Now().Add(-50 * time.Millisecond),
- }
-
- response, err := router.HandleResponseHeaders(responseHeaders, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(response).NotTo(BeNil())
-
- respHeaders := response.GetResponseHeaders()
- Expect(respHeaders).NotTo(BeNil())
- Expect(respHeaders.Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
- })
- })
-
- Describe("handleResponseBody", func() {
- It("should process response body with token parsing", func() {
- openAIResponse := openai.ChatCompletion{
- ID: "chatcmpl-123",
- Object: "chat.completion",
- Created: time.Now().Unix(),
- Model: "model-a",
- Usage: openai.CompletionUsage{
- PromptTokens: 150,
- CompletionTokens: 50,
- TotalTokens: 200,
- },
- Choices: []openai.ChatCompletionChoice{
- {
- Message: openai.ChatCompletionMessage{
- Role: "assistant",
- Content: "This is a test response",
- },
- FinishReason: "stop",
- },
- },
- }
-
- responseBody, err := json.Marshal(openAIResponse)
- Expect(err).NotTo(HaveOccurred())
-
- bodyResponse := &ext_proc.ProcessingRequest_ResponseBody{
- ResponseBody: &ext_proc.HttpBody{
- Body: responseBody,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "test-request",
- RequestModel: "model-a",
- RequestQuery: "test query",
- StartTime: time.Now().Add(-2 * time.Second),
- }
-
- response, err := router.HandleResponseBody(bodyResponse, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(response).NotTo(BeNil())
-
- respBody := response.GetResponseBody()
- Expect(respBody).NotTo(BeNil())
- Expect(respBody.Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
- })
-
- It("should handle invalid response JSON gracefully", func() {
- bodyResponse := &ext_proc.ProcessingRequest_ResponseBody{
- ResponseBody: &ext_proc.HttpBody{
- Body: []byte(`{invalid json}`),
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "test-request",
- RequestModel: "model-a",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleResponseBody(bodyResponse, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(response.GetResponseBody().Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
- })
-
- It("should handle empty response body", func() {
- bodyResponse := &ext_proc.ProcessingRequest_ResponseBody{
- ResponseBody: &ext_proc.HttpBody{
- Body: nil,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "test-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleResponseBody(bodyResponse, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(response.GetResponseBody().Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
- })
-
- Context("with category-specific system prompt", func() {
- BeforeEach(func() {
- // Add a category with system prompt to the config
- cfg.Categories = append(cfg.Categories, config.Category{
- CategoryMetadata: config.CategoryMetadata{
- Name: "math",
- Description: "Mathematical queries and calculations",
- },
- DomainAwarePolicies: config.DomainAwarePolicies{
- SystemPromptPolicy: config.SystemPromptPolicy{
- SystemPrompt: "You are a helpful assistant specialized in mathematics. Please provide step-by-step solutions.",
- },
- },
- ModelScores: []config.ModelScore{
- {
- Model: "model-a",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- },
- },
- })
-
- // Recreate router with updated config
- var err error
- router, err = CreateTestRouter(cfg)
- Expect(err).NotTo(HaveOccurred())
- })
-
- It("should add category-specific system prompt to auto model requests", func() {
- request := cache.OpenAIRequest{
- Model: "auto",
- Messages: []cache.ChatMessage{
- {Role: "user", Content: "What is the derivative of x^2 + 3x + 1?"},
- },
- }
-
- requestBody, err := json.Marshal(request)
- Expect(err).NotTo(HaveOccurred())
-
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: requestBody,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "system-prompt-test-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).NotTo(HaveOccurred())
-
- bodyResp := response.GetRequestBody()
- Expect(bodyResp.Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
-
- // Check if the request body was modified with system prompt
- if bodyResp.Response.BodyMutation != nil {
- modifiedBody := bodyResp.Response.BodyMutation.GetBody()
- Expect(modifiedBody).NotTo(BeNil())
-
- var modifiedRequest map[string]interface{}
- err = json.Unmarshal(modifiedBody, &modifiedRequest)
- Expect(err).NotTo(HaveOccurred())
-
- messages, ok := modifiedRequest["messages"].([]interface{})
- Expect(ok).To(BeTrue())
- Expect(len(messages)).To(BeNumerically(">=", 2))
-
- // Check that system message was added
- firstMessage, ok := messages[0].(map[string]interface{})
- Expect(ok).To(BeTrue())
- Expect(firstMessage["role"]).To(Equal("system"))
- Expect(firstMessage["content"]).To(ContainSubstring("mathematics"))
- Expect(firstMessage["content"]).To(ContainSubstring("step-by-step"))
- }
- })
-
- It("should replace existing system prompt with category-specific one", func() {
- request := cache.OpenAIRequest{
- Model: "auto",
- Messages: []cache.ChatMessage{
- {Role: "system", Content: "You are a general assistant."},
- {Role: "user", Content: "Solve the equation 2x + 5 = 15"},
- },
- }
-
- requestBody, err := json.Marshal(request)
- Expect(err).NotTo(HaveOccurred())
-
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: requestBody,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "system-prompt-replace-test-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).NotTo(HaveOccurred())
-
- bodyResp := response.GetRequestBody()
- Expect(bodyResp.Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
-
- // Check if the request body was modified with system prompt
- if bodyResp.Response.BodyMutation != nil {
- modifiedBody := bodyResp.Response.BodyMutation.GetBody()
- Expect(modifiedBody).NotTo(BeNil())
-
- var modifiedRequest map[string]interface{}
- err = json.Unmarshal(modifiedBody, &modifiedRequest)
- Expect(err).NotTo(HaveOccurred())
-
- messages, ok := modifiedRequest["messages"].([]interface{})
- Expect(ok).To(BeTrue())
- Expect(len(messages)).To(Equal(2))
-
- // Check that system message was replaced
- firstMessage, ok := messages[0].(map[string]interface{})
- Expect(ok).To(BeTrue())
- Expect(firstMessage["role"]).To(Equal("system"))
- Expect(firstMessage["content"]).To(ContainSubstring("mathematics"))
- Expect(firstMessage["content"]).NotTo(ContainSubstring("general assistant"))
- }
- })
- })
- })
-})
-
func TestExtProc(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "ExtProc Suite")
@@ -2905,111 +2239,24 @@ var _ = Describe("Caching Functionality", func() {
},
}
- requestBody, err := json.Marshal(request)
- Expect(err).NotTo(HaveOccurred())
-
- bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
- RequestBody: &ext_proc.HttpBody{
- Body: requestBody,
- },
- }
-
- ctx := &RequestContext{
- Headers: make(map[string]string),
- RequestID: "no-cache-request",
- StartTime: time.Now(),
- }
-
- response, err := router.HandleRequestBody(bodyRequest, ctx)
- Expect(err).NotTo(HaveOccurred())
- Expect(response.GetRequestBody().Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
- })
- })
-
- Describe("Category-Specific Caching", func() {
- It("should use category-specific cache settings", func() {
- // Create a config with category-specific cache settings
- cfg := CreateTestConfig()
- cfg.Enabled = true
- cfg.SimilarityThreshold = lo.ToPtr(float32(0.8))
-
- // Add categories with different cache settings
- cfg.Categories = []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "health",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "model-a",
- Score: 1.0,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- },
- },
- DomainAwarePolicies: config.DomainAwarePolicies{
- SemanticCachingPolicy: config.SemanticCachingPolicy{
- SemanticCacheEnabled: lo.ToPtr(true),
- SemanticCacheSimilarityThreshold: lo.ToPtr(float32(0.95)),
- },
- },
- },
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "general",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "model-a",
- Score: 1.0,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- },
- },
- DomainAwarePolicies: config.DomainAwarePolicies{
- SemanticCachingPolicy: config.SemanticCachingPolicy{
- SemanticCacheEnabled: lo.ToPtr(false),
- SemanticCacheSimilarityThreshold: lo.ToPtr(float32(0.7)),
- },
- },
- },
- }
-
- // Verify category cache settings are correct
- Expect(cfg.IsCacheEnabledForCategory("health")).To(BeTrue())
- Expect(cfg.IsCacheEnabledForCategory("general")).To(BeFalse())
- Expect(cfg.GetCacheSimilarityThresholdForCategory("health")).To(Equal(float32(0.95)))
- Expect(cfg.GetCacheSimilarityThresholdForCategory("general")).To(Equal(float32(0.7)))
- })
-
- It("should fall back to global settings when category doesn't specify", func() {
- cfg := CreateTestConfig()
- cfg.Enabled = true
- cfg.SimilarityThreshold = lo.ToPtr(float32(0.8))
-
- // Add category without cache settings
- cfg.Categories = []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "test",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "model-a",
- Score: 1.0,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- },
- },
+ requestBody, err := json.Marshal(request)
+ Expect(err).NotTo(HaveOccurred())
+
+ bodyRequest := &ext_proc.ProcessingRequest_RequestBody{
+ RequestBody: &ext_proc.HttpBody{
+ Body: requestBody,
},
}
- // Should use global settings
- Expect(cfg.IsCacheEnabledForCategory("test")).To(BeTrue())
- Expect(cfg.GetCacheSimilarityThresholdForCategory("test")).To(Equal(float32(0.8)))
+ ctx := &RequestContext{
+ Headers: make(map[string]string),
+ RequestID: "no-cache-request",
+ StartTime: time.Now(),
+ }
+
+ response, err := router.HandleRequestBody(bodyRequest, ctx)
+ Expect(err).NotTo(HaveOccurred())
+ Expect(response.GetRequestBody().Response.Status).To(Equal(ext_proc.CommonResponse_CONTINUE))
})
})
})
@@ -3288,351 +2535,6 @@ func TestVSRInjectedSystemPromptHeader(t *testing.T) {
})
}
-// TestReasoningModeIntegration tests the complete reasoning mode integration
-func TestReasoningModeIntegration(t *testing.T) {
- // Create a mock router with reasoning configuration
- cfg := &config.RouterConfig{
- IntelligentRouting: config.IntelligentRouting{
- ReasoningConfig: config.ReasoningConfig{
- DefaultReasoningEffort: "medium",
- ReasoningFamilies: map[string]config.ReasoningFamilyConfig{
- "deepseek": {
- Type: "chat_template_kwargs",
- Parameter: "thinking",
- },
- "qwen3": {
- Type: "chat_template_kwargs",
- Parameter: "enable_thinking",
- },
- "gpt-oss": {
- Type: "reasoning_effort",
- Parameter: "reasoning_effort",
- },
- },
- },
- Categories: []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "math",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "deepseek-v31",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(true),
- ReasoningDescription: "Mathematical problems require step-by-step reasoning",
- ReasoningEffort: "high",
- },
- },
- {
- Model: "phi4",
- Score: 0.7,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- },
- },
- },
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "business",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "phi4",
- Score: 0.8,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- ReasoningDescription: "Business content is typically conversational",
- },
- },
- {
- Model: "deepseek-v31",
- Score: 0.6,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- },
- },
- },
- },
- },
- },
- BackendModels: config.BackendModels{
- ModelConfig: map[string]config.ModelParams{
- "deepseek-v31": {
- ReasoningFamily: "deepseek",
- },
- "qwen3-model": {
- ReasoningFamily: "qwen3",
- },
- "gpt-oss-model": {
- ReasoningFamily: "gpt-oss",
- },
- "phi4": {
- // No reasoning family - doesn't support reasoning
- },
- },
- },
- }
-
- router := &OpenAIRouter{
- Config: cfg,
- }
- // Test case 3: Test addReasoningModeToRequestBody function
- t.Run("addReasoningModeToRequestBody adds correct fields", func(t *testing.T) {
- // Test with DeepSeek model (which supports chat_template_kwargs)
- originalRequest := map[string]interface{}{
- "model": "deepseek-v31",
- "messages": []map[string]interface{}{
- {"role": "user", "content": "What is 2 + 2?"},
- },
- "stream": false,
- }
-
- originalBody, err := json.Marshal(originalRequest)
- if err != nil {
- t.Fatalf("Failed to marshal original request: %v", err)
- }
-
- modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true, "math")
- if err != nil {
- t.Fatalf("Failed to add reasoning mode: %v", err)
- }
-
- var modifiedRequest map[string]interface{}
- if unmarshalErr := json.Unmarshal(modifiedBody, &modifiedRequest); unmarshalErr != nil {
- t.Fatalf("Failed to unmarshal modified request: %v", unmarshalErr)
- }
-
- // Check if chat_template_kwargs was added for DeepSeek model
- chatTemplateKwargs, exists := modifiedRequest["chat_template_kwargs"]
- if !exists {
- t.Error("chat_template_kwargs not found in modified request for DeepSeek model")
- }
-
- // Check if thinking: true was set for DeepSeek model
- if kwargs, ok := chatTemplateKwargs.(map[string]interface{}); ok {
- if thinking, hasThinking := kwargs["thinking"]; hasThinking {
- if thinkingBool, isBool := thinking.(bool); !isBool || !thinkingBool {
- t.Errorf("Expected thinking: true for DeepSeek model, got %v", thinking)
- }
- } else {
- t.Error("thinking field not found in chat_template_kwargs for DeepSeek model")
- }
- } else {
- t.Errorf("chat_template_kwargs is not a map for DeepSeek model, got %T", chatTemplateKwargs)
- }
-
- // Verify original fields are preserved
- originalFields := []string{"model", "messages", "stream"}
- for _, field := range originalFields {
- if _, exists := modifiedRequest[field]; !exists {
- t.Errorf("Original field '%s' was lost", field)
- }
- }
-
- // Test with unsupported model (phi4) - should not add chat_template_kwargs
- originalRequestPhi4 := map[string]interface{}{
- "model": "phi4",
- "messages": []map[string]interface{}{
- {"role": "user", "content": "What is 2 + 2?"},
- },
- "stream": false,
- }
-
- originalBodyPhi4, err := json.Marshal(originalRequestPhi4)
- if err != nil {
- t.Fatalf("Failed to marshal phi4 request: %v", err)
- }
-
- modifiedBodyPhi4, err := router.setReasoningModeToRequestBody(originalBodyPhi4, true, "math")
- if err != nil {
- t.Fatalf("Failed to process phi4 request: %v", err)
- }
-
- var modifiedRequestPhi4 map[string]interface{}
- if err := json.Unmarshal(modifiedBodyPhi4, &modifiedRequestPhi4); err != nil {
- t.Fatalf("Failed to unmarshal phi4 request: %v", err)
- }
-
- // For phi4, no reasoning fields should be added (since it's an unknown model)
- if _, exists := modifiedRequestPhi4["chat_template_kwargs"]; exists {
- t.Error("chat_template_kwargs should not be added for unknown model phi4")
- }
-
- // reasoning_effort should also not be set for unknown models
- if reasoningEffort, exists := modifiedRequestPhi4["reasoning_effort"]; exists {
- t.Errorf("reasoning_effort should NOT be set for unknown model phi4, but got %v", reasoningEffort)
- }
- })
-
- // Test case 4: Test buildReasoningRequestFields function with config-driven approach
- t.Run("buildReasoningRequestFields returns correct values", func(t *testing.T) {
- // Create a router with sample configurations for testing
- testRouter := &OpenAIRouter{
- Config: &config.RouterConfig{
- IntelligentRouting: config.IntelligentRouting{
- ReasoningConfig: config.ReasoningConfig{
- DefaultReasoningEffort: "medium",
- ReasoningFamilies: map[string]config.ReasoningFamilyConfig{
- "deepseek": {
- Type: "chat_template_kwargs",
- Parameter: "thinking",
- },
- "qwen3": {
- Type: "chat_template_kwargs",
- Parameter: "enable_thinking",
- },
- },
- },
- },
- BackendModels: config.BackendModels{
- ModelConfig: map[string]config.ModelParams{
- "deepseek-v31": {
- ReasoningFamily: "deepseek",
- },
- "qwen3-model": {
- ReasoningFamily: "qwen3",
- },
- "phi4": {
- // No reasoning family - doesn't support reasoning
- },
- },
- },
- },
- }
-
- // Test with DeepSeek model and reasoning enabled
- fields, _ := testRouter.buildReasoningRequestFields("deepseek-v31", true, "test-category")
- if fields == nil {
- t.Error("Expected non-nil fields for DeepSeek model with reasoning enabled")
- }
- if chatKwargs, ok := fields["chat_template_kwargs"]; !ok {
- t.Error("Expected chat_template_kwargs for DeepSeek model")
- } else if kwargs, ok := chatKwargs.(map[string]interface{}); !ok {
- t.Error("Expected chat_template_kwargs to be a map")
- } else if thinking, ok := kwargs["thinking"]; !ok || thinking != true {
- t.Errorf("Expected thinking: true for DeepSeek model, got %v", thinking)
- }
-
- // Test with DeepSeek model and reasoning disabled
- fields, _ = testRouter.buildReasoningRequestFields("deepseek-v31", false, "test-category")
- if fields != nil {
- t.Errorf("Expected nil fields for DeepSeek model with reasoning disabled, got %v", fields)
- }
-
- // Test with Qwen3 model and reasoning enabled
- fields, _ = testRouter.buildReasoningRequestFields("qwen3-model", true, "test-category")
- if fields == nil {
- t.Error("Expected non-nil fields for Qwen3 model with reasoning enabled")
- }
- if chatKwargs, ok := fields["chat_template_kwargs"]; !ok {
- t.Error("Expected chat_template_kwargs for Qwen3 model")
- } else if kwargs, ok := chatKwargs.(map[string]interface{}); !ok {
- t.Error("Expected chat_template_kwargs to be a map")
- } else if enableThinking, ok := kwargs["enable_thinking"]; !ok || enableThinking != true {
- t.Errorf("Expected enable_thinking: true for Qwen3 model, got %v", enableThinking)
- }
-
- // Test with unknown model (should return no fields)
- fields, effort := testRouter.buildReasoningRequestFields("unknown-model", true, "test-category")
- if fields != nil {
- t.Errorf("Expected nil fields for unknown model with reasoning enabled, got %v", fields)
- }
- if effort != "" {
- t.Errorf("Expected effort string: empty for unknown model, got %v", effort)
- }
- })
-}
-
-// TestReasoningModeConfigurationValidation tests the configuration validation
-func TestReasoningModeConfigurationValidation(t *testing.T) {
- testCases := []struct {
- name string
- category config.Category
- expected bool
- }{
- {
- name: "Math category with reasoning enabled",
- category: config.Category{
- CategoryMetadata: config.CategoryMetadata{
- Name: "math",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "deepseek-v31",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(true),
- ReasoningDescription: "Mathematical problems require step-by-step reasoning",
- },
- },
- },
- },
- expected: true,
- },
- {
- name: "Business category with reasoning disabled",
- category: config.Category{
- CategoryMetadata: config.CategoryMetadata{
- Name: "business",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "phi4",
- Score: 0.8,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- ReasoningDescription: "Business content is typically conversational",
- },
- },
- },
- },
- expected: false,
- },
- {
- name: "Science category with reasoning enabled",
- category: config.Category{
- CategoryMetadata: config.CategoryMetadata{
- Name: "science",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "deepseek-v31",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(true),
- ReasoningDescription: "Scientific concepts benefit from structured analysis",
- },
- },
- },
- },
- expected: true,
- },
- }
-
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- // Check the best model's reasoning capability
- bestModelReasoning := false
- if len(tc.category.ModelScores) > 0 && tc.category.ModelScores[0].UseReasoning != nil {
- bestModelReasoning = *tc.category.ModelScores[0].UseReasoning
- }
-
- if bestModelReasoning != tc.expected {
- t.Errorf("Expected best model UseReasoning %v for %s, got %v",
- tc.expected, tc.category.Name, bestModelReasoning)
- }
-
- // Verify description is not empty (now in ModelScore)
- if len(tc.category.ModelScores) > 0 && tc.category.ModelScores[0].ReasoningDescription == "" {
- t.Errorf("ReasoningDescription should not be empty for best model in category %s", tc.category.Name)
- }
- })
- }
-}
-
// TestModelReasoningFamily tests the new family-based configuration approach
func TestModelReasoningFamily(t *testing.T) {
// Create a router with sample model configurations
@@ -3986,209 +2888,6 @@ func TestSetReasoningModeToRequestBody(t *testing.T) {
}
}
-// TestReasoningModeConfiguration demonstrates how the reasoning mode works with the new config-based approach
-func TestReasoningModeConfiguration(_ *testing.T) {
- fmt.Println("=== Configuration-Based Reasoning Mode Test ===")
-
- // Create a mock configuration for testing
- cfg := &config.RouterConfig{
- IntelligentRouting: config.IntelligentRouting{
- Categories: []config.Category{
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "math",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "deepseek-v31",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(true),
- ReasoningDescription: "Mathematical problems require step-by-step reasoning",
- },
- },
- },
- },
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "business",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "phi4",
- Score: 0.8,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(false),
- ReasoningDescription: "Business content is typically conversational",
- },
- },
- },
- },
- {
- CategoryMetadata: config.CategoryMetadata{
- Name: "biology",
- },
- ModelScores: []config.ModelScore{
- {
- Model: "deepseek-v31",
- Score: 0.9,
- ModelReasoningControl: config.ModelReasoningControl{
- UseReasoning: lo.ToPtr(true),
- ReasoningDescription: "Biological processes benefit from structured analysis",
- },
- },
- },
- },
- },
- },
- }
-
- fmt.Printf("Loaded configuration with %d categories\n\n", len(cfg.Categories))
-
- // Display reasoning configuration for each category
- fmt.Println("--- Reasoning Mode Configuration ---")
- for _, category := range cfg.Categories {
- reasoningStatus := "DISABLED"
- bestModel := "no-model"
- reasoningDesc := ""
- if len(category.ModelScores) > 0 {
- bestModel = category.ModelScores[0].Model
- if category.ModelScores[0].UseReasoning != nil && *category.ModelScores[0].UseReasoning {
- reasoningStatus = "ENABLED"
- }
- reasoningDesc = category.ModelScores[0].ReasoningDescription
- }
-
- fmt.Printf("Category: %-15s | Model: %-12s | Reasoning: %-8s | %s\n",
- category.Name, bestModel, reasoningStatus, reasoningDesc)
- }
-
- // Test queries with expected categories
- testQueries := []struct {
- query string
- category string
- }{
- {"What is the derivative of x^2 + 3x + 1?", "math"},
- {"Implement a binary search algorithm in Python", "computer science"},
- {"Explain the process of photosynthesis", "biology"},
- {"Write a business plan for a coffee shop", "business"},
- {"Tell me about World War II", "history"},
- {"What are Newton's laws of motion?", "physics"},
- {"How does chemical bonding work?", "chemistry"},
- {"Design a bridge structure", "engineering"},
- }
-
- fmt.Printf("\n--- Test Query Reasoning Decisions ---\n")
- for _, test := range testQueries {
- // Find the category configuration
- var useReasoning bool
- var reasoningDesc string
- var found bool
-
- for _, category := range cfg.Categories {
- if strings.EqualFold(category.Name, test.category) {
- if len(category.ModelScores) > 0 {
- if category.ModelScores[0].UseReasoning != nil {
- useReasoning = *category.ModelScores[0].UseReasoning
- }
- reasoningDesc = category.ModelScores[0].ReasoningDescription
- }
- found = true
- break
- }
- }
-
- if !found {
- fmt.Printf("Query: %s\n", test.query)
- fmt.Printf(" Expected Category: %s (NOT FOUND IN CONFIG)\n", test.category)
- fmt.Printf(" Reasoning: DISABLED (default)\n\n")
- continue
- }
-
- reasoningStatus := "DISABLED"
- if useReasoning {
- reasoningStatus = "ENABLED"
- }
-
- fmt.Printf("Query: %s\n", test.query)
- fmt.Printf(" Category: %s\n", test.category)
- fmt.Printf(" Reasoning: %s - %s\n", reasoningStatus, reasoningDesc)
-
- // // Generate example request body
- // messages := []map[string]string{
- // {"role": "system", "content": "You are an AI assistant"},
- // {"role": "user", "content": test.query},
- // }
-
- // requestBody := buildRequestBody("deepseek-v31", messages, useReasoning, true)
-
- // Show key differences in request
- if useReasoning {
- fmt.Printf(" Request includes: chat_template_kwargs: {thinking: true}\n")
- } else {
- fmt.Printf(" Request: Standard mode (no reasoning)\n")
- }
- fmt.Println()
- }
-
- // Show example configuration section
- fmt.Println("--- Example Config.yaml Section ---")
- fmt.Print(`
-categories:
-- name: math
- model_scores:
- - model: deepseek-v31
- score: 0.9
- use_reasoning: true
- reasoning_description: "Mathematical problems require step-by-step reasoning"
- reasoning_effort: high
- - model: phi4
- score: 0.7
- use_reasoning: false
-
-- name: business
- model_scores:
- - model: phi4
- score: 0.8
- use_reasoning: false
- reasoning_description: "Business content is typically conversational"
-`)
-}
-
-// GetReasoningConfigurationSummary returns a summary of the reasoning configuration
-func GetReasoningConfigurationSummary(cfg *config.RouterConfig) map[string]interface{} {
- summary := make(map[string]interface{})
-
- reasoningEnabled := 0
- reasoningDisabled := 0
-
- categoriesWithReasoning := []string{}
- categoriesWithoutReasoning := []string{}
-
- for _, category := range cfg.Categories {
- bestModelReasoning := false
- if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil {
- bestModelReasoning = *category.ModelScores[0].UseReasoning
- }
-
- if bestModelReasoning {
- reasoningEnabled++
- categoriesWithReasoning = append(categoriesWithReasoning, category.Name)
- } else {
- reasoningDisabled++
- categoriesWithoutReasoning = append(categoriesWithoutReasoning, category.Name)
- }
- }
-
- summary["total_categories"] = len(cfg.Categories)
- summary["reasoning_enabled_count"] = reasoningEnabled
- summary["reasoning_disabled_count"] = reasoningDisabled
- summary["categories_with_reasoning"] = categoriesWithReasoning
- summary["categories_without_reasoning"] = categoriesWithoutReasoning
-
- return summary
-}
-
// DemonstrateConfigurationUsage shows how to use the configuration-based reasoning
func DemonstrateConfigurationUsage() {
fmt.Println("=== Configuration Usage Example ===")
diff --git a/src/semantic-router/pkg/extproc/processor_req_body.go b/src/semantic-router/pkg/extproc/processor_req_body.go
index 8e7e2d16b..35de4bfd1 100644
--- a/src/semantic-router/pkg/extproc/processor_req_body.go
+++ b/src/semantic-router/pkg/extproc/processor_req_body.go
@@ -64,46 +64,34 @@ func (r *OpenAIRouter) handleRequestBody(v *ext_proc.ProcessingRequest_RequestBo
// Get content from messages
userContent, nonUserMessages := extractUserAndNonUserContent(openAIRequest)
- // Perform classification and model selection once at the beginning
- categoryName, classificationConfidence, reasoningDecision, selectedModel := r.performClassificationAndModelSelection(originalModel, userContent, nonUserMessages)
+ // Perform decision evaluation and model selection once at the beginning
+ // Use decision-based routing if decisions are configured, otherwise fall back to category-based
+ decisionName, classificationConfidence, reasoningDecision, selectedModel := r.performDecisionEvaluationAndModelSelection(originalModel, userContent, nonUserMessages, ctx)
- // Perform security checks with category-specific settings
- if response, shouldReturn := r.performSecurityChecks(ctx, userContent, nonUserMessages, categoryName); shouldReturn {
+ // Perform security checks with decision-specific settings
+ if response, shouldReturn := r.performSecurityChecks(ctx, userContent, nonUserMessages, decisionName); shouldReturn {
return response, nil
}
- // Perform PII detection and policy check (if PII policy is enabled for the category)
- // For auto models: this may modify selectedModel if the initially selected model violates PII policy
- // For non-auto models: this checks if the specified model passes PII policy
- isAutoModel := r.Config != nil && r.Config.IsAutoModelName(originalModel)
- modelToCheck := selectedModel
- if !isAutoModel || selectedModel == "" {
- // For non-auto models or when no model was selected, check the original model
- modelToCheck = originalModel
- }
-
- allowedModel, piiResponse := r.performPIIDetection(ctx, userContent, nonUserMessages, categoryName, modelToCheck, isAutoModel)
+ // Perform PII detection and policy check (if PII policy is enabled for the decision)
+ piiResponse := r.performPIIDetection(ctx, userContent, nonUserMessages, decisionName)
if piiResponse != nil {
// PII policy violation - return error response
return piiResponse, nil
}
- // Use the allowed model (may be different from selectedModel if PII policy required a change)
- if allowedModel != "" {
- selectedModel = allowedModel
- }
- // Handle caching with category-specific settings
- if response, shouldReturn := r.handleCaching(ctx, categoryName); shouldReturn {
+ // Handle caching with decision-specific settings
+ if response, shouldReturn := r.handleCaching(ctx, decisionName); shouldReturn {
return response, nil
}
// Handle model selection and routing with pre-computed classification results and selected model
- return r.handleModelRouting(openAIRequest, originalModel, categoryName, classificationConfidence, reasoningDecision, selectedModel, ctx)
+ return r.handleModelRouting(openAIRequest, originalModel, decisionName, classificationConfidence, reasoningDecision, selectedModel, ctx)
}
// handleModelRouting handles model selection and routing logic
-// categoryName, classificationConfidence, reasoningDecision, and selectedModel are pre-computed from ProcessRequest
-func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNewParams, originalModel string, categoryName string, classificationConfidence float64, reasoningDecision entropy.ReasoningDecision, selectedModel string, ctx *RequestContext) (*ext_proc.ProcessingResponse, error) {
+// decisionName, classificationConfidence, reasoningDecision, and selectedModel are pre-computed from ProcessRequest
+func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNewParams, originalModel string, decisionName string, classificationConfidence float64, reasoningDecision entropy.ReasoningDecision, selectedModel string, ctx *RequestContext) (*ext_proc.ProcessingResponse, error) {
response := &ext_proc.ProcessingResponse{
Response: &ext_proc.ProcessingResponse_RequestBody{
RequestBody: &ext_proc.BodyResponse{
@@ -117,7 +105,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
isAutoModel := r.Config != nil && r.Config.IsAutoModelName(originalModel)
if isAutoModel && selectedModel != "" {
- return r.handleAutoModelRouting(openAIRequest, originalModel, categoryName, reasoningDecision, selectedModel, ctx, response)
+ return r.handleAutoModelRouting(openAIRequest, originalModel, decisionName, reasoningDecision, selectedModel, ctx, response)
} else if !isAutoModel {
return r.handleSpecifiedModelRouting(openAIRequest, originalModel, ctx)
}
@@ -128,9 +116,9 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
}
// handleAutoModelRouting handles routing for auto model selection
-func (r *OpenAIRouter) handleAutoModelRouting(openAIRequest *openai.ChatCompletionNewParams, originalModel string, categoryName string, reasoningDecision entropy.ReasoningDecision, selectedModel string, ctx *RequestContext, response *ext_proc.ProcessingResponse) (*ext_proc.ProcessingResponse, error) {
- logging.Infof("Using Auto Model Selection (model=%s), category=%s, selected=%s",
- originalModel, categoryName, selectedModel)
+func (r *OpenAIRouter) handleAutoModelRouting(openAIRequest *openai.ChatCompletionNewParams, originalModel string, decisionName string, reasoningDecision entropy.ReasoningDecision, selectedModel string, ctx *RequestContext, response *ext_proc.ProcessingResponse) (*ext_proc.ProcessingResponse, error) {
+ logging.Infof("Using Auto Model Selection (model=%s), decision=%s, selected=%s",
+ originalModel, decisionName, selectedModel)
matchedModel := selectedModel
@@ -141,10 +129,11 @@ func (r *OpenAIRouter) handleAutoModelRouting(openAIRequest *openai.ChatCompleti
}
// Record routing decision with tracing
- r.recordRoutingDecision(ctx, categoryName, originalModel, matchedModel, reasoningDecision)
+ r.recordRoutingDecision(ctx, decisionName, originalModel, matchedModel, reasoningDecision)
// Track VSR decision information
- r.trackVSRDecision(ctx, categoryName, matchedModel, reasoningDecision.UseReasoning)
+ // categoryName is already set in ctx.VSRSelectedCategory by performDecisionEvaluationAndModelSelection
+ r.trackVSRDecision(ctx, ctx.VSRSelectedCategory, decisionName, matchedModel, reasoningDecision.UseReasoning)
// Track model routing metrics
metrics.RecordModelRouting(originalModel, matchedModel)
@@ -153,16 +142,16 @@ func (r *OpenAIRouter) handleAutoModelRouting(openAIRequest *openai.ChatCompleti
selectedEndpoint := r.selectEndpointForModel(ctx, matchedModel)
// Modify request body with new model, reasoning mode, and system prompt
- modifiedBody, err := r.modifyRequestBodyForAutoRouting(openAIRequest, matchedModel, categoryName, reasoningDecision.UseReasoning, ctx)
+ modifiedBody, err := r.modifyRequestBodyForAutoRouting(openAIRequest, matchedModel, decisionName, reasoningDecision.UseReasoning, ctx)
if err != nil {
return nil, err
}
// Create response with mutations
- response = r.createRoutingResponse(matchedModel, selectedEndpoint, modifiedBody)
+ response = r.createRoutingResponse(matchedModel, selectedEndpoint, modifiedBody, ctx)
// Log routing decision
- r.logRoutingDecision(ctx, "auto_routing", originalModel, matchedModel, categoryName, reasoningDecision.UseReasoning, selectedEndpoint)
+ r.logRoutingDecision(ctx, "auto_routing", originalModel, matchedModel, decisionName, reasoningDecision.UseReasoning, selectedEndpoint)
// Handle route cache clearing
if r.shouldClearRouteCache() {
@@ -241,7 +230,7 @@ func (r *OpenAIRouter) selectEndpointForModel(ctx *RequestContext, model string)
}
// modifyRequestBodyForAutoRouting modifies the request body for auto routing
-func (r *OpenAIRouter) modifyRequestBodyForAutoRouting(openAIRequest *openai.ChatCompletionNewParams, matchedModel string, categoryName string, useReasoning bool, ctx *RequestContext) ([]byte, error) {
+func (r *OpenAIRouter) modifyRequestBodyForAutoRouting(openAIRequest *openai.ChatCompletionNewParams, matchedModel string, decisionName string, useReasoning bool, ctx *RequestContext) ([]byte, error) {
// Modify the model in the request
openAIRequest.Model = matchedModel
@@ -253,19 +242,19 @@ func (r *OpenAIRouter) modifyRequestBodyForAutoRouting(openAIRequest *openai.Cha
return nil, status.Errorf(codes.Internal, "error serializing modified request: %v", err)
}
- if categoryName == "" {
+ if decisionName == "" {
return modifiedBody, nil
}
// Set reasoning mode
- modifiedBody, err = r.setReasoningModeToRequestBody(modifiedBody, useReasoning, categoryName)
+ modifiedBody, err = r.setReasoningModeToRequestBody(modifiedBody, useReasoning, decisionName)
if err != nil {
logging.Errorf("Error setting reasoning mode %v to request: %v", useReasoning, err)
metrics.RecordRequestError(matchedModel, "serialization_error")
return nil, status.Errorf(codes.Internal, "error setting reasoning mode: %v", err)
}
- // Add category-specific system prompt if configured
- modifiedBody, err = r.addSystemPromptIfConfigured(modifiedBody, categoryName, matchedModel, ctx)
+ // Add decision-specific system prompt if configured
+ modifiedBody, err = r.addSystemPromptIfConfigured(modifiedBody, decisionName, matchedModel, ctx)
if err != nil {
return nil, err
}
@@ -274,7 +263,7 @@ func (r *OpenAIRouter) modifyRequestBodyForAutoRouting(openAIRequest *openai.Cha
}
// createRoutingResponse creates a routing response with mutations
-func (r *OpenAIRouter) createRoutingResponse(model string, endpoint string, modifiedBody []byte) *ext_proc.ProcessingResponse {
+func (r *OpenAIRouter) createRoutingResponse(model string, endpoint string, modifiedBody []byte, ctx *RequestContext) *ext_proc.ProcessingResponse {
bodyMutation := &ext_proc.BodyMutation{
Mutation: &ext_proc.BodyMutation_Body{
Body: modifiedBody,
@@ -282,6 +271,9 @@ func (r *OpenAIRouter) createRoutingResponse(model string, endpoint string, modi
}
setHeaders := []*core.HeaderValueOption{}
+ removeHeaders := []string{"content-length"}
+
+ // Add standard routing headers
if endpoint != "" {
setHeaders = append(setHeaders, &core.HeaderValueOption{
Header: &core.HeaderValue{
@@ -299,8 +291,21 @@ func (r *OpenAIRouter) createRoutingResponse(model string, endpoint string, modi
})
}
+ // Apply header mutations from decision's header_mutation plugin
+ if ctx.VSRSelectedDecision != nil {
+ pluginSetHeaders, pluginRemoveHeaders := r.buildHeaderMutations(ctx.VSRSelectedDecision)
+ if len(pluginSetHeaders) > 0 {
+ setHeaders = append(setHeaders, pluginSetHeaders...)
+ logging.Infof("Applied %d header mutations from decision %s", len(pluginSetHeaders), ctx.VSRSelectedDecision.Name)
+ }
+ if len(pluginRemoveHeaders) > 0 {
+ removeHeaders = append(removeHeaders, pluginRemoveHeaders...)
+ logging.Infof("Applied %d header deletions from decision %s", len(pluginRemoveHeaders), ctx.VSRSelectedDecision.Name)
+ }
+ }
+
headerMutation := &ext_proc.HeaderMutation{
- RemoveHeaders: []string{"content-length"},
+ RemoveHeaders: removeHeaders,
SetHeaders: setHeaders,
}
diff --git a/src/semantic-router/pkg/extproc/processor_req_header.go b/src/semantic-router/pkg/extproc/processor_req_header.go
index ba3dad431..e0fce4ac7 100644
--- a/src/semantic-router/pkg/extproc/processor_req_header.go
+++ b/src/semantic-router/pkg/extproc/processor_req_header.go
@@ -9,6 +9,7 @@ import (
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/headers"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/tracing"
@@ -33,11 +34,13 @@ type RequestContext struct {
TTFTSeconds float64
// VSR decision tracking
- VSRSelectedCategory string // The category selected by VSR
- VSRReasoningMode string // "on" or "off" - whether reasoning mode was determined to be used
- VSRSelectedModel string // The model selected by VSR
- VSRCacheHit bool // Whether this request hit the cache
- VSRInjectedSystemPrompt bool // Whether a system prompt was injected into the request
+ VSRSelectedCategory string // The category from domain classification (MMLU category)
+ VSRSelectedDecisionName string // The decision name from DecisionEngine evaluation
+ VSRReasoningMode string // "on" or "off" - whether reasoning mode was determined to be used
+ VSRSelectedModel string // The model selected by VSR
+ VSRCacheHit bool // Whether this request hit the cache
+ VSRInjectedSystemPrompt bool // Whether a system prompt was injected into the request
+ VSRSelectedDecision *config.Decision // The decision object selected by DecisionEngine (for plugins)
// Tracing context
TraceContext context.Context // OpenTelemetry trace context for span propagation
diff --git a/src/semantic-router/pkg/extproc/processor_res_header.go b/src/semantic-router/pkg/extproc/processor_res_header.go
index 3d65720cf..068a89a48 100644
--- a/src/semantic-router/pkg/extproc/processor_res_header.go
+++ b/src/semantic-router/pkg/extproc/processor_res_header.go
@@ -54,7 +54,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo
if isSuccessful && !ctx.VSRCacheHit && ctx != nil {
var setHeaders []*core.HeaderValueOption
- // Add x-vsr-selected-category header
+ // Add x-vsr-selected-category header (from domain classification)
if ctx.VSRSelectedCategory != "" {
setHeaders = append(setHeaders, &core.HeaderValueOption{
Header: &core.HeaderValue{
@@ -64,6 +64,16 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo
})
}
+ // Add x-vsr-selected-decision header (from decision evaluation)
+ if ctx.VSRSelectedDecisionName != "" {
+ setHeaders = append(setHeaders, &core.HeaderValueOption{
+ Header: &core.HeaderValue{
+ Key: headers.VSRSelectedDecision,
+ RawValue: []byte(ctx.VSRSelectedDecisionName),
+ },
+ })
+ }
+
// Add x-vsr-selected-reasoning header
if ctx.VSRReasoningMode != "" {
setHeaders = append(setHeaders, &core.HeaderValueOption{
diff --git a/src/semantic-router/pkg/extproc/recorder.go b/src/semantic-router/pkg/extproc/recorder.go
index 4ff1e4ad9..1211d9b7a 100644
--- a/src/semantic-router/pkg/extproc/recorder.go
+++ b/src/semantic-router/pkg/extproc/recorder.go
@@ -13,10 +13,10 @@ import (
)
// logRoutingDecision logs routing decision with structured logging
-func (r *OpenAIRouter) logRoutingDecision(ctx *RequestContext, reasonCode string, originalModel string, selectedModel string, categoryName string, reasoningEnabled bool, endpoint string) {
+func (r *OpenAIRouter) logRoutingDecision(ctx *RequestContext, reasonCode string, originalModel string, selectedModel string, decisionName string, reasoningEnabled bool, endpoint string) {
effortForMetrics := ""
- if reasoningEnabled && categoryName != "" {
- effortForMetrics = r.getReasoningEffort(categoryName, selectedModel)
+ if reasoningEnabled && decisionName != "" {
+ effortForMetrics = r.getReasoningEffort(decisionName, selectedModel)
}
logging.LogEvent("routing_decision", map[string]interface{}{
@@ -24,7 +24,7 @@ func (r *OpenAIRouter) logRoutingDecision(ctx *RequestContext, reasonCode string
"request_id": ctx.RequestID,
"original_model": originalModel,
"selected_model": selectedModel,
- "category": categoryName,
+ "decision": decisionName,
"reasoning_enabled": reasoningEnabled,
"reasoning_effort": effortForMetrics,
"selected_endpoint": endpoint,
@@ -34,15 +34,15 @@ func (r *OpenAIRouter) logRoutingDecision(ctx *RequestContext, reasonCode string
}
// recordRoutingDecision records routing decision with tracing
-func (r *OpenAIRouter) recordRoutingDecision(ctx *RequestContext, categoryName string, originalModel string, matchedModel string, reasoningDecision entropy.ReasoningDecision) {
+func (r *OpenAIRouter) recordRoutingDecision(ctx *RequestContext, decisionName string, originalModel string, matchedModel string, reasoningDecision entropy.ReasoningDecision) {
routingCtx, routingSpan := tracing.StartSpan(ctx.TraceContext, tracing.SpanRoutingDecision)
useReasoning := reasoningDecision.UseReasoning
logging.Infof("Entropy-based reasoning decision for this query: %v on [%s] model (confidence: %.3f, reason: %s)",
useReasoning, matchedModel, reasoningDecision.Confidence, reasoningDecision.DecisionReason)
- effortForMetrics := r.getReasoningEffort(categoryName, matchedModel)
- metrics.RecordReasoningDecision(categoryName, matchedModel, useReasoning, effortForMetrics)
+ effortForMetrics := r.getReasoningEffort(decisionName, matchedModel)
+ metrics.RecordReasoningDecision(decisionName, matchedModel, useReasoning, effortForMetrics)
tracing.SetSpanAttributes(routingSpan,
attribute.String(tracing.AttrRoutingStrategy, "auto"),
@@ -57,8 +57,11 @@ func (r *OpenAIRouter) recordRoutingDecision(ctx *RequestContext, categoryName s
}
// trackVSRDecision tracks VSR decision information in context
-func (r *OpenAIRouter) trackVSRDecision(ctx *RequestContext, categoryName string, matchedModel string, useReasoning bool) {
+// categoryName: the category from domain classification (MMLU category)
+// decisionName: the decision name from DecisionEngine evaluation
+func (r *OpenAIRouter) trackVSRDecision(ctx *RequestContext, categoryName string, decisionName string, matchedModel string, useReasoning bool) {
ctx.VSRSelectedCategory = categoryName
+ ctx.VSRSelectedDecisionName = decisionName
ctx.VSRSelectedModel = matchedModel
if useReasoning {
ctx.VSRReasoningMode = "on"
diff --git a/src/semantic-router/pkg/extproc/req_filter_cache.go b/src/semantic-router/pkg/extproc/req_filter_cache.go
index 4a4f2fcb7..7caed3144 100644
--- a/src/semantic-router/pkg/extproc/req_filter_cache.go
+++ b/src/semantic-router/pkg/extproc/req_filter_cache.go
@@ -25,17 +25,17 @@ func (r *OpenAIRouter) handleCaching(ctx *RequestContext, categoryName string) (
ctx.RequestModel = requestModel
ctx.RequestQuery = requestQuery
- // Check if caching is enabled for this category
+ // Check if caching is enabled for this decision
cacheEnabled := r.Config.SemanticCache.Enabled
if categoryName != "" {
- cacheEnabled = r.Config.IsCacheEnabledForCategory(categoryName)
+ cacheEnabled = r.Config.IsCacheEnabledForDecision(categoryName)
}
if requestQuery != "" && r.Cache.IsEnabled() && cacheEnabled {
- // Get category-specific threshold
+ // Get decision-specific threshold
threshold := r.Config.GetCacheSimilarityThreshold()
if categoryName != "" {
- threshold = r.Config.GetCacheSimilarityThresholdForCategory(categoryName)
+ threshold = r.Config.GetCacheSimilarityThresholdForDecision(categoryName)
}
// Start cache lookup span
diff --git a/src/semantic-router/pkg/extproc/req_filter_classification.go b/src/semantic-router/pkg/extproc/req_filter_classification.go
index 12f36d9bc..411eb52fc 100644
--- a/src/semantic-router/pkg/extproc/req_filter_classification.go
+++ b/src/semantic-router/pkg/extproc/req_filter_classification.go
@@ -7,17 +7,16 @@ import (
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/entropy"
)
-// extractUserAndNonUserContent extracts user and non-user messages from the request
-
-// performClassificationAndModelSelection performs classification and model selection once
-// Returns (categoryName, confidence, reasoningDecision, selectedModel)
-func (r *OpenAIRouter) performClassificationAndModelSelection(originalModel string, userContent string, nonUserMessages []string) (string, float64, entropy.ReasoningDecision, string) {
- var categoryName string
- var classificationConfidence float64
+// performDecisionEvaluationAndModelSelection performs decision evaluation using DecisionEngine
+// Returns (decisionName, confidence, reasoningDecision, selectedModel)
+// This is the new approach that uses Decision-based routing with AND/OR rule combinations
+func (r *OpenAIRouter) performDecisionEvaluationAndModelSelection(originalModel string, userContent string, nonUserMessages []string, ctx *RequestContext) (string, float64, entropy.ReasoningDecision, string) {
+ var decisionName string
+ var evaluationConfidence float64
var reasoningDecision entropy.ReasoningDecision
var selectedModel string
- // Only perform classification for auto models with content
+ // Only perform evaluation for auto models with content
if !r.Config.IsAutoModelName(originalModel) {
return "", 0.0, entropy.ReasoningDecision{}, ""
}
@@ -26,40 +25,90 @@ func (r *OpenAIRouter) performClassificationAndModelSelection(originalModel stri
return "", 0.0, entropy.ReasoningDecision{}, ""
}
- // Determine text to use for classification
- classificationText := userContent
- if classificationText == "" && len(nonUserMessages) > 0 {
- classificationText = strings.Join(nonUserMessages, " ")
+ // Check if decisions are configured
+ if len(r.Config.Decisions) == 0 {
+ logging.Warnf("No decisions configured, using default model")
+ return "", 0.0, entropy.ReasoningDecision{}, r.Config.DefaultModel
+ }
+
+ // Determine text to use for evaluation
+ evaluationText := userContent
+ if evaluationText == "" && len(nonUserMessages) > 0 {
+ evaluationText = strings.Join(nonUserMessages, " ")
}
- if classificationText == "" {
+ if evaluationText == "" {
return "", 0.0, entropy.ReasoningDecision{}, ""
}
- // Perform entropy-based classification once
- catName, confidence, reasoningDec, err := r.Classifier.ClassifyCategoryWithEntropy(classificationText)
+ // Perform decision evaluation using DecisionEngine
+ result, err := r.Classifier.EvaluateDecisionWithEngine(evaluationText)
if err != nil {
- logging.Errorf("Entropy-based classification error: %v, using empty category", err)
- categoryName = ""
- classificationConfidence = 0.0
- reasoningDecision = entropy.ReasoningDecision{}
- } else {
- categoryName = catName
- classificationConfidence = confidence
- reasoningDecision = reasoningDec
- logging.Infof("Classification Result: category=%s, confidence=%.3f, reasoning=%v",
- categoryName, classificationConfidence, reasoningDecision.UseReasoning)
+ logging.Errorf("Decision evaluation error: %v, using default model", err)
+ return "", 0.0, entropy.ReasoningDecision{}, r.Config.DefaultModel
+ }
+
+ if result == nil || result.Decision == nil {
+ logging.Warnf("No decision matched, using default model")
+ return "", 0.0, entropy.ReasoningDecision{}, r.Config.DefaultModel
}
- // Select best model for this category
- if categoryName != "" {
- selectedModel = r.Classifier.SelectBestModelForCategory(categoryName)
- logging.Infof("Selected model for category %s: %s", categoryName, selectedModel)
+ // Store the selected decision in context for later use (e.g., header mutations)
+ ctx.VSRSelectedDecision = result.Decision
+
+ // Extract domain category from matched rules (for VSRSelectedCategory header)
+ // MatchedRules contains rule names like "domain:math", "keyword:thinking", etc.
+ // We extract the first domain rule as the category
+ categoryName := ""
+ for _, rule := range result.MatchedRules {
+ if strings.HasPrefix(rule, "domain:") {
+ categoryName = strings.TrimPrefix(rule, "domain:")
+ break
+ }
+ }
+ // Store category in context for response headers
+ ctx.VSRSelectedCategory = categoryName
+
+ decisionName = result.Decision.Name
+ evaluationConfidence = result.Confidence
+ logging.Infof("Decision Evaluation Result: decision=%s, category=%s, confidence=%.3f, matched_rules=%v",
+ decisionName, categoryName, evaluationConfidence, result.MatchedRules)
+
+ // Select best model from the decision's ModelRefs
+ if len(result.Decision.ModelRefs) > 0 {
+ modelRef := result.Decision.ModelRefs[0]
+ // Use LoRA name if specified, otherwise use the base model name
+ selectedModel = modelRef.Model
+ if modelRef.LoRAName != "" {
+ selectedModel = modelRef.LoRAName
+ logging.Infof("Selected model from decision %s: %s (LoRA adapter for base model %s)",
+ decisionName, selectedModel, modelRef.Model)
+ } else {
+ logging.Infof("Selected model from decision %s: %s", decisionName, selectedModel)
+ }
+
+ // Determine reasoning mode from the best model's configuration
+ if result.Decision.ModelRefs[0].UseReasoning != nil {
+ useReasoning := *result.Decision.ModelRefs[0].UseReasoning
+ reasoningDecision = entropy.ReasoningDecision{
+ UseReasoning: useReasoning,
+ Confidence: evaluationConfidence,
+ DecisionReason: "decision_engine_evaluation",
+ FallbackStrategy: "decision_based_routing",
+ TopCategories: []entropy.CategoryProbability{
+ {
+ Category: decisionName,
+ Probability: float32(evaluationConfidence),
+ },
+ },
+ }
+ // Note: ReasoningEffort is handled separately in req_filter_reason.go
+ }
} else {
- // No category found, use default model
+ // No model refs in decision, use default model
selectedModel = r.Config.DefaultModel
- logging.Infof("No category classified, using default model: %s", selectedModel)
+ logging.Infof("No model refs in decision %s, using default model: %s", decisionName, selectedModel)
}
- return categoryName, classificationConfidence, reasoningDecision, selectedModel
+ return decisionName, evaluationConfidence, reasoningDecision, selectedModel
}
diff --git a/src/semantic-router/pkg/extproc/req_filter_header_mutation.go b/src/semantic-router/pkg/extproc/req_filter_header_mutation.go
new file mode 100644
index 000000000..2cd8a90e2
--- /dev/null
+++ b/src/semantic-router/pkg/extproc/req_filter_header_mutation.go
@@ -0,0 +1,58 @@
+package extproc
+
+import (
+ corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
+
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
+)
+
+// buildHeaderMutations builds header mutations based on the decision's header_mutation plugin configuration
+// Returns (setHeaders, removeHeaders) to be applied to the request
+func (r *OpenAIRouter) buildHeaderMutations(decision *config.Decision) ([]*corev3.HeaderValueOption, []string) {
+ if decision == nil {
+ return nil, nil
+ }
+
+ // Get header mutation configuration
+ headerConfig := decision.GetHeaderMutationConfig()
+ if headerConfig == nil {
+ return nil, nil
+ }
+
+ logging.Debugf("Building header mutations for decision %s: add=%d, update=%d, delete=%d",
+ decision.Name, len(headerConfig.Add), len(headerConfig.Update), len(headerConfig.Delete))
+
+ var setHeaders []*corev3.HeaderValueOption
+ var removeHeaders []string
+
+ // Apply additions (add new headers)
+ for _, headerPair := range headerConfig.Add {
+ setHeaders = append(setHeaders, &corev3.HeaderValueOption{
+ Header: &corev3.HeaderValue{
+ Key: headerPair.Name,
+ RawValue: []byte(headerPair.Value),
+ },
+ })
+ logging.Debugf("Adding header: %s=%s", headerPair.Name, headerPair.Value)
+ }
+
+ // Apply updates (modify existing headers - in Envoy this is the same as set)
+ for _, headerPair := range headerConfig.Update {
+ setHeaders = append(setHeaders, &corev3.HeaderValueOption{
+ Header: &corev3.HeaderValue{
+ Key: headerPair.Name,
+ RawValue: []byte(headerPair.Value),
+ },
+ })
+ logging.Debugf("Updating header: %s=%s", headerPair.Name, headerPair.Value)
+ }
+
+ // Apply deletions
+ for _, headerName := range headerConfig.Delete {
+ removeHeaders = append(removeHeaders, headerName)
+ logging.Debugf("Deleting header: %s", headerName)
+ }
+
+ return setHeaders, removeHeaders
+}
diff --git a/src/semantic-router/pkg/extproc/req_filter_jailbreak.go b/src/semantic-router/pkg/extproc/req_filter_jailbreak.go
index 85ca6ffbf..1b86e0a9c 100644
--- a/src/semantic-router/pkg/extproc/req_filter_jailbreak.go
+++ b/src/semantic-router/pkg/extproc/req_filter_jailbreak.go
@@ -18,17 +18,17 @@ func (r *OpenAIRouter) performSecurityChecks(ctx *RequestContext, userContent st
// Perform PII classification on all message content
allContent := pii.ExtractAllContent(userContent, nonUserMessages)
- // Check if jailbreak detection is enabled for this category
+ // Check if jailbreak detection is enabled for this decision
jailbreakEnabled := r.Classifier.IsJailbreakEnabled()
if categoryName != "" && r.Config != nil {
- // Use category-specific setting if available
- jailbreakEnabled = jailbreakEnabled && r.Config.IsJailbreakEnabledForCategory(categoryName)
+ // Use decision-specific setting if available
+ jailbreakEnabled = jailbreakEnabled && r.Config.IsJailbreakEnabledForDecision(categoryName)
}
- // Get category-specific threshold
+ // Get decision-specific threshold
jailbreakThreshold := r.Config.PromptGuard.Threshold
if categoryName != "" && r.Config != nil {
- jailbreakThreshold = r.Config.GetJailbreakThresholdForCategory(categoryName)
+ jailbreakThreshold = r.Config.GetJailbreakThresholdForDecision(categoryName)
}
// Perform jailbreak detection on all message content
diff --git a/src/semantic-router/pkg/extproc/req_filter_pii.go b/src/semantic-router/pkg/extproc/req_filter_pii.go
index c22542cea..7bd2706bf 100644
--- a/src/semantic-router/pkg/extproc/req_filter_pii.go
+++ b/src/semantic-router/pkg/extproc/req_filter_pii.go
@@ -15,41 +15,45 @@ import (
)
// performPIIDetection performs PII detection and policy check
-// Returns (allowedModel, errorResponse).
-// - If errorResponse is not nil, the request should be blocked.
-// - If allowedModel is not empty, it's the model that passes PII policy (may be different from selectedModel)
-// - isAutoModel indicates whether this is an auto model (true) or a specified model (false)
-func (r *OpenAIRouter) performPIIDetection(ctx *RequestContext, userContent string, nonUserMessages []string, categoryName string, selectedModel string, isAutoModel bool) (string, *ext_proc.ProcessingResponse) {
- // Check if PII detection is enabled for this category
- if !r.isPIIDetectionEnabled(categoryName) {
- return selectedModel, nil
+// Returns errorResponse if the request should be blocked, nil otherwise
+func (r *OpenAIRouter) performPIIDetection(ctx *RequestContext, userContent string, nonUserMessages []string, decisionName string) *ext_proc.ProcessingResponse {
+ // Check if PII detection is enabled for this decision
+ if !r.isPIIDetectionEnabled(decisionName) {
+ return nil
}
// Detect PII in content
- detectedPII := r.detectPIIWithTracing(ctx, userContent, nonUserMessages, categoryName)
+ detectedPII := r.detectPIIWithTracing(ctx, userContent, nonUserMessages, decisionName)
if len(detectedPII) == 0 {
- return selectedModel, nil
+ return nil
}
- // Check PII policy and find alternative model if needed
- return r.checkPIIPolicyAndFindAlternative(ctx, selectedModel, detectedPII, categoryName, isAutoModel)
+ // Check PII policy
+ return r.checkPIIPolicy(ctx, detectedPII, decisionName)
}
-// isPIIDetectionEnabled checks if PII detection is enabled for the given category
-func (r *OpenAIRouter) isPIIDetectionEnabled(categoryName string) bool {
+// isPIIDetectionEnabled checks if PII detection is enabled for the given decision
+func (r *OpenAIRouter) isPIIDetectionEnabled(decisionName string) bool {
+ // Use PIIChecker to check if PII detection is enabled for this decision
+ // This checks if the decision has a PII plugin with enabled: true
+ if !r.PIIChecker.IsPIIEnabled(decisionName) {
+ return false
+ }
+
+ // Also check if there's a valid threshold configured
piiThreshold := float32(0.0)
- if categoryName != "" && r.Config != nil {
- piiThreshold = r.Config.GetPIIThresholdForCategory(categoryName)
+ if decisionName != "" && r.Config != nil {
+ piiThreshold = r.Config.GetPIIThresholdForDecision(decisionName)
} else {
piiThreshold = r.Config.PIIModel.Threshold
}
if piiThreshold == 0.0 {
- logging.Infof("PII detection disabled for category: %s", categoryName)
+ logging.Infof("PII detection disabled for decision %s: threshold is 0", decisionName)
return false
}
- logging.Infof("PII detection enabled for category %s (threshold: %.3f)", categoryName, piiThreshold)
+ logging.Infof("PII detection enabled for decision %s (threshold: %.3f)", decisionName, piiThreshold)
return true
}
@@ -83,90 +87,29 @@ func (r *OpenAIRouter) detectPIIWithTracing(ctx *RequestContext, userContent str
return detectedPII
}
-// checkPIIPolicyAndFindAlternative checks if the selected model passes PII policy
-// and finds an alternative model if needed
-func (r *OpenAIRouter) checkPIIPolicyAndFindAlternative(ctx *RequestContext, selectedModel string, detectedPII []string, categoryName string, isAutoModel bool) (string, *ext_proc.ProcessingResponse) {
- // Check if PII policy is enabled for this model
- if selectedModel == "" || !r.PIIChecker.IsPIIEnabled(selectedModel) {
- return selectedModel, nil
- }
-
- // Check if the selected model passes PII policy
- allowed, deniedPII, err := r.PIIChecker.CheckPolicy(selectedModel, detectedPII)
+// checkPIIPolicy checks if the decision allows the detected PII types
+func (r *OpenAIRouter) checkPIIPolicy(ctx *RequestContext, detectedPII []string, decisionName string) *ext_proc.ProcessingResponse {
+ // Check if the decision passes PII policy
+ allowed, deniedPII, err := r.PIIChecker.CheckPolicy(decisionName, detectedPII)
if err != nil {
- logging.Errorf("Error checking PII policy for model %s: %v", selectedModel, err)
- return selectedModel, nil
+ logging.Errorf("Error checking PII policy for decision %s: %v", decisionName, err)
+ return nil
}
if allowed {
- return selectedModel, nil
- }
-
- // Model violates PII policy - find alternative or return error
- logging.Warnf("Model %s violates PII policy, finding alternative", selectedModel)
-
- if isAutoModel && categoryName != "" {
- // For auto models, try to find an alternative model from the same category
- return r.findAlternativeModelForPII(ctx, selectedModel, detectedPII, categoryName)
- }
-
- // For non-auto models, return error (no alternative available)
- return r.createPIIViolationResponse(ctx, selectedModel, deniedPII)
-}
-
-// findAlternativeModelForPII finds an alternative model that passes PII policy
-func (r *OpenAIRouter) findAlternativeModelForPII(ctx *RequestContext, originalModel string, detectedPII []string, categoryName string) (string, *ext_proc.ProcessingResponse) {
- alternativeModels := r.Classifier.GetModelsForCategory(categoryName)
- allowedModels := r.PIIChecker.FilterModelsForPII(alternativeModels, detectedPII)
-
- if len(allowedModels) > 0 {
- // Select the best allowed model from this category
- allowedModel := r.Classifier.SelectBestModelFromList(allowedModels, categoryName)
- logging.Infof("Selected alternative model %s that passes PII policy", allowedModel)
- metrics.RecordRoutingReasonCode("pii_policy_alternative_selected", allowedModel)
- return allowedModel, nil
+ return nil
}
- // No alternative models pass PII policy, try default model
- logging.Warnf("No models in category %s pass PII policy, trying default", categoryName)
- return r.tryDefaultModelForPII(ctx, detectedPII)
-}
-
-// tryDefaultModelForPII tries to use the default model if it passes PII policy
-func (r *OpenAIRouter) tryDefaultModelForPII(ctx *RequestContext, detectedPII []string) (string, *ext_proc.ProcessingResponse) {
- defaultModel := r.Config.DefaultModel
-
- // Check if default model passes policy
- defaultAllowed, defaultDeniedPII, _ := r.PIIChecker.CheckPolicy(defaultModel, detectedPII)
- if defaultAllowed {
- return defaultModel, nil
- }
-
- // Default model also violates PII policy
- logging.Errorf("Default model %s also violates PII policy, returning error", defaultModel)
- logging.LogEvent("routing_block", map[string]interface{}{
- "reason_code": "pii_policy_denied_default_model",
- "request_id": ctx.RequestID,
- "model": defaultModel,
- "denied_pii": defaultDeniedPII,
- })
- metrics.RecordRequestError(defaultModel, "pii_policy_denied")
-
- piiResponse := http.CreatePIIViolationResponse(defaultModel, defaultDeniedPII, ctx.ExpectStreamingResponse)
- return "", piiResponse
-}
-
-// createPIIViolationResponse creates an error response for PII policy violation
-func (r *OpenAIRouter) createPIIViolationResponse(ctx *RequestContext, model string, deniedPII []string) (string, *ext_proc.ProcessingResponse) {
- logging.Warnf("Model %s violates PII policy, returning error", model)
+ // Decision violates PII policy - return error
+ logging.Warnf("Decision %s violates PII policy, blocking request", decisionName)
logging.LogEvent("routing_block", map[string]interface{}{
"reason_code": "pii_policy_denied",
"request_id": ctx.RequestID,
- "model": model,
+ "decision": decisionName,
"denied_pii": deniedPII,
})
- metrics.RecordRequestError(model, "pii_policy_denied")
+ metrics.RecordRequestError(decisionName, "pii_policy_denied")
- piiResponse := http.CreatePIIViolationResponse(model, deniedPII, ctx.ExpectStreamingResponse)
- return "", piiResponse
+ piiResponse := http.CreatePIIViolationResponse(decisionName, deniedPII, ctx.ExpectStreamingResponse)
+ return piiResponse
}
diff --git a/src/semantic-router/pkg/extproc/req_filter_reason.go b/src/semantic-router/pkg/extproc/req_filter_reason.go
index ca9e27f89..e16fe92b2 100644
--- a/src/semantic-router/pkg/extproc/req_filter_reason.go
+++ b/src/semantic-router/pkg/extproc/req_filter_reason.go
@@ -112,22 +112,22 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled
return modifiedBody, nil
}
-// getReasoningEffort returns the reasoning effort level for a given category and model
+// getReasoningEffort returns the reasoning effort level for a given decision and model
func (r *OpenAIRouter) getReasoningEffort(categoryName string, modelName string) string {
// Handle case where Config is nil (e.g., in tests)
if r.Config == nil {
return "medium"
}
- // Find the category and model configuration
- for _, category := range r.Config.Categories {
- if category.Name == categoryName {
- // Find the specific model in the category's model scores
- for _, modelScore := range category.ModelScores {
- if modelScore.Model == modelName {
+ // Find the decision and model configuration
+ for _, decision := range r.Config.Decisions {
+ if decision.Name == categoryName {
+ // Find the specific model in the decision's model refs
+ for _, modelRef := range decision.ModelRefs {
+ if modelRef.Model == modelName {
// Use model-specific effort if configured
- if modelScore.ReasoningEffort != "" {
- return modelScore.ReasoningEffort
+ if modelRef.ReasoningEffort != "" {
+ return modelRef.ReasoningEffort
}
break
}
diff --git a/src/semantic-router/pkg/extproc/req_filter_sys_prompt.go b/src/semantic-router/pkg/extproc/req_filter_sys_prompt.go
index 500f0d5b0..2eb3c6340 100644
--- a/src/semantic-router/pkg/extproc/req_filter_sys_prompt.go
+++ b/src/semantic-router/pkg/extproc/req_filter_sys_prompt.go
@@ -19,34 +19,36 @@ func (r *OpenAIRouter) addSystemPromptIfConfigured(modifiedBody []byte, category
return modifiedBody, nil
}
- // Try to get the most up-to-date category configuration from global config first
+ // Try to get the most up-to-date decision configuration from global config first
globalConfig := config.Get()
- var category *config.Category
+ var decision *config.Decision
if globalConfig != nil {
- category = globalConfig.GetCategoryByName(categoryName)
+ decision = globalConfig.GetDecisionByName(categoryName)
}
// If not found in global config, fall back to router's config
- if category == nil {
- category = r.Classifier.GetCategoryByName(categoryName)
+ if decision == nil {
+ decision = r.Classifier.GetDecisionByName(categoryName)
}
- if category == nil || category.SystemPrompt == "" {
+ // Get system prompt configuration from plugins
+ systemPromptConfig := decision.GetSystemPromptConfig()
+ if decision == nil || systemPromptConfig == nil || systemPromptConfig.SystemPrompt == "" {
return modifiedBody, nil
}
- if !category.IsSystemPromptEnabled() {
- logging.Infof("System prompt disabled for category: %s", categoryName)
+ if !decision.IsSystemPromptEnabled() {
+ logging.Infof("System prompt disabled for decision: %s", categoryName)
return modifiedBody, nil
}
// Start system prompt injection span
promptCtx, promptSpan := tracing.StartSpan(ctx.TraceContext, tracing.SpanSystemPromptInjection)
- mode := category.GetSystemPromptMode()
+ mode := decision.GetSystemPromptMode()
var injected bool
var err error
- modifiedBody, injected, err = addSystemPromptToRequestBody(modifiedBody, category.SystemPrompt, mode)
+ modifiedBody, injected, err = addSystemPromptToRequestBody(modifiedBody, systemPromptConfig.SystemPrompt, mode)
if err != nil {
logging.Errorf("Error adding system prompt to request: %v", err)
tracing.RecordError(promptSpan, err)
diff --git a/src/semantic-router/pkg/extproc/router.go b/src/semantic-router/pkg/extproc/router.go
index afee16668..3746ad4cd 100644
--- a/src/semantic-router/pkg/extproc/router.go
+++ b/src/semantic-router/pkg/extproc/router.go
@@ -33,13 +33,25 @@ var _ ext_proc.ExternalProcessorServer = (*OpenAIRouter)(nil)
// NewOpenAIRouter creates a new OpenAI API router instance
func NewOpenAIRouter(configPath string) (*OpenAIRouter, error) {
- // Always parse fresh config for router construction (supports live reload)
- cfg, err := config.Parse(configPath)
- if err != nil {
- return nil, fmt.Errorf("failed to load config: %w", err)
+ var cfg *config.RouterConfig
+ var err error
+
+ // Check if we should use the global config (Kubernetes mode) or parse from file
+ globalCfg := config.Get()
+ if globalCfg != nil && globalCfg.ConfigSource == config.ConfigSourceKubernetes {
+ // Use the global config that's managed by the Kubernetes controller
+ cfg = globalCfg
+ logging.Infof("Using Kubernetes-managed configuration")
+ } else {
+ // Parse fresh config from file for file-based configuration (supports live reload)
+ cfg, err = config.Parse(configPath)
+ if err != nil {
+ return nil, fmt.Errorf("failed to load config: %w", err)
+ }
+ // Update global config reference for packages that rely on config.GetConfig()
+ config.Replace(cfg)
+ logging.Debugf("Parsed configuration from file: %s", configPath)
}
- // Update global config reference for packages that rely on config.GetConfig()
- config.Replace(cfg)
// Load category mapping if classifier is enabled
var categoryMapping *classification.CategoryMapping
@@ -134,7 +146,7 @@ func NewOpenAIRouter(configPath string) (*OpenAIRouter, error) {
}
// Create utility components
- piiChecker := pii.NewPolicyChecker(cfg, cfg.ModelConfig)
+ piiChecker := pii.NewPolicyChecker(cfg)
classifier, err := classification.NewClassifier(cfg, categoryMapping, piiMapping, jailbreakMapping)
if err != nil {
diff --git a/src/semantic-router/pkg/extproc/server.go b/src/semantic-router/pkg/extproc/server.go
index e9f4a0e45..02474e6a1 100644
--- a/src/semantic-router/pkg/extproc/server.go
+++ b/src/semantic-router/pkg/extproc/server.go
@@ -18,6 +18,7 @@ import (
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
tlsutil "github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/tls"
)
@@ -158,6 +159,15 @@ func (rs *RouterService) Process(stream ext_proc.ExternalProcessor_ProcessServer
// watchConfigAndReload watches the config file and reloads router on changes.
func (s *Server) watchConfigAndReload(ctx context.Context) {
+ // Check if we're using Kubernetes config source
+ cfg := config.Get()
+ if cfg != nil && cfg.ConfigSource == config.ConfigSourceKubernetes {
+ logging.Infof("ConfigSource is kubernetes, watching for config updates from controller")
+ // Watch for config updates from the Kubernetes controller
+ s.watchKubernetesConfigUpdates(ctx)
+ return
+ }
+
watcher, err := fsnotify.NewWatcher()
if err != nil {
logging.LogEvent("config_watcher_error", map[string]interface{}{
@@ -234,3 +244,35 @@ func (s *Server) watchConfigAndReload(ctx context.Context) {
}
}
}
+
+// watchKubernetesConfigUpdates watches for config updates from the Kubernetes controller
+func (s *Server) watchKubernetesConfigUpdates(ctx context.Context) {
+ updateCh := config.WatchConfigUpdates()
+
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case newCfg := <-updateCh:
+ if newCfg == nil {
+ continue
+ }
+
+ // Build a new router with the updated config
+ // Note: We pass the configPath but NewOpenAIRouter will use the global config
+ newRouter, err := NewOpenAIRouter(s.configPath)
+ if err != nil {
+ logging.LogEvent("config_reload_failed", map[string]interface{}{
+ "source": "kubernetes",
+ "error": err.Error(),
+ })
+ continue
+ }
+
+ s.service.Swap(newRouter)
+ logging.LogEvent("config_reloaded", map[string]interface{}{
+ "source": "kubernetes",
+ })
+ }
+ }
+}
diff --git a/src/semantic-router/pkg/headers/headers.go b/src/semantic-router/pkg/headers/headers.go
index 7ebb2e186..46206ebfb 100644
--- a/src/semantic-router/pkg/headers/headers.go
+++ b/src/semantic-router/pkg/headers/headers.go
@@ -24,10 +24,16 @@ const (
// Vector Semantic Router decision-making information for debugging and monitoring.
// Headers are only added when the request is successful and did not hit the cache.
const (
- // VSRSelectedCategory indicates the category selected by VSR during classification.
+ // VSRSelectedCategory indicates the category selected by VSR during domain classification.
+ // This comes from the domain classifier (MMLU categories).
// Example values: "math", "business", "biology", "computer_science"
VSRSelectedCategory = "x-vsr-selected-category"
+ // VSRSelectedDecision indicates the decision selected by VSR during decision evaluation.
+ // This is the final routing decision made by the DecisionEngine.
+ // Example values: "math_decision", "business_decision", "thinking_decision"
+ VSRSelectedDecision = "x-vsr-selected-decision"
+
// VSRSelectedReasoning indicates whether reasoning mode was determined to be used.
// Values: "on" (reasoning enabled) or "off" (reasoning disabled)
VSRSelectedReasoning = "x-vsr-selected-reasoning"
diff --git a/src/semantic-router/pkg/k8s/controller.go b/src/semantic-router/pkg/k8s/controller.go
new file mode 100644
index 000000000..c6abddba2
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/controller.go
@@ -0,0 +1,76 @@
+/*
+Copyright 2025 vLLM Semantic Router.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package k8s
+
+import (
+ "context"
+ "fmt"
+
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
+)
+
+// Controller watches IntelligentPool and IntelligentRoute CRDs and updates configuration
+// This is now a wrapper around the Reconciler for backward compatibility
+type Controller struct {
+ namespace string
+ reconciler *Reconciler
+ stopCh chan struct{}
+}
+
+// ControllerConfig holds configuration for the controller
+type ControllerConfig struct {
+ Namespace string
+ Kubeconfig string
+ StaticConfig *config.RouterConfig
+ OnConfigUpdate func(*config.RouterConfig) error
+}
+
+// NewController creates a new Kubernetes controller using controller-runtime
+// This is now a wrapper around the new Reconciler implementation
+func NewController(cfg ControllerConfig) (*Controller, error) {
+ // Convert ControllerConfig to ReconcilerConfig
+ reconcilerCfg := ReconcilerConfig(cfg)
+
+ reconciler, err := NewReconciler(reconcilerCfg)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create reconciler: %w", err)
+ }
+
+ return &Controller{
+ namespace: cfg.Namespace,
+ reconciler: reconciler,
+ stopCh: make(chan struct{}),
+ }, nil
+}
+
+// Start starts the controller
+func (c *Controller) Start(ctx context.Context) error {
+ if err := c.reconciler.Start(ctx); err != nil {
+ return err
+ }
+
+ <-c.stopCh
+ logging.Infof("Kubernetes controller stopped")
+ return nil
+}
+
+// Stop stops the controller
+func (c *Controller) Stop() {
+ c.reconciler.Stop()
+ close(c.stopCh)
+}
diff --git a/src/semantic-router/pkg/k8s/converter.go b/src/semantic-router/pkg/k8s/converter.go
new file mode 100644
index 000000000..12a40bbb6
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/converter.go
@@ -0,0 +1,269 @@
+/*
+Copyright 2025 vLLM Semantic Router.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package k8s
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/apis/vllm.ai/v1alpha1"
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+)
+
+// CRDConverter converts Kubernetes CRDs to internal configuration structures
+type CRDConverter struct{}
+
+// NewCRDConverter creates a new CRD converter
+func NewCRDConverter() *CRDConverter {
+ return &CRDConverter{}
+}
+
+// ConvertIntelligentPool converts IntelligentPool CRD to BackendModels config
+func (c *CRDConverter) ConvertIntelligentPool(pool *v1alpha1.IntelligentPool) (*config.BackendModels, error) {
+ if pool == nil {
+ return nil, fmt.Errorf("pool cannot be nil")
+ }
+
+ backendModels := &config.BackendModels{
+ DefaultModel: pool.Spec.DefaultModel,
+ ModelConfig: make(map[string]config.ModelParams),
+ // VLLMEndpoints is not managed by CRD, will be loaded from static config
+ VLLMEndpoints: nil,
+ }
+
+ // Convert models
+ for _, model := range pool.Spec.Models {
+ modelParams := config.ModelParams{
+ ReasoningFamily: model.ReasoningFamily,
+ }
+
+ // Convert pricing
+ if model.Pricing != nil {
+ modelParams.Pricing = config.ModelPricing{
+ PromptPer1M: model.Pricing.InputTokenPrice * 1000000, // Convert per-token to per-1M
+ CompletionPer1M: model.Pricing.OutputTokenPrice * 1000000, // Convert per-token to per-1M
+ }
+ }
+
+ // Convert LoRAs
+ if len(model.LoRAs) > 0 {
+ modelParams.LoRAs = make([]config.LoRAAdapter, len(model.LoRAs))
+ for i, lora := range model.LoRAs {
+ modelParams.LoRAs[i] = config.LoRAAdapter{
+ Name: lora.Name,
+ Description: lora.Description,
+ }
+ }
+ }
+
+ backendModels.ModelConfig[model.Name] = modelParams
+ }
+
+ return backendModels, nil
+}
+
+// ConvertIntelligentRoute converts IntelligentRoute CRD to IntelligentRouting config
+func (c *CRDConverter) ConvertIntelligentRoute(route *v1alpha1.IntelligentRoute) (*config.IntelligentRouting, error) {
+ if route == nil {
+ return nil, fmt.Errorf("route cannot be nil")
+ }
+
+ intelligentRouting := &config.IntelligentRouting{
+ KeywordRules: make([]config.KeywordRule, 0),
+ EmbeddingRules: make([]config.EmbeddingRule, 0),
+ Categories: make([]config.Category, 0),
+ Decisions: make([]config.Decision, 0),
+ Strategy: "priority", // Always use priority strategy
+ }
+
+ // Convert keyword signals
+ for _, signal := range route.Spec.Signals.Keywords {
+ intelligentRouting.KeywordRules = append(intelligentRouting.KeywordRules, config.KeywordRule{
+ Name: signal.Name,
+ Operator: signal.Operator,
+ Keywords: signal.Keywords,
+ CaseSensitive: signal.CaseSensitive,
+ })
+ }
+
+ // Convert embedding signals
+ for _, signal := range route.Spec.Signals.Embeddings {
+ embeddingRule := config.EmbeddingRule{
+ Name: signal.Name,
+ SimilarityThreshold: signal.Threshold,
+ Candidates: signal.Candidates,
+ AggregationMethodConfiged: config.AggregationMethod(signal.AggregationMethod),
+ }
+ intelligentRouting.EmbeddingRules = append(intelligentRouting.EmbeddingRules, embeddingRule)
+ }
+
+ // Convert domain signals to categories (only metadata)
+ // Domains is now an array of DomainSignal with name and description
+ for _, domain := range route.Spec.Signals.Domains {
+ category := config.Category{
+ CategoryMetadata: config.CategoryMetadata{
+ Name: domain.Name,
+ Description: domain.Description,
+ MMLUCategories: []string{domain.Name}, // Single MMLU category
+ },
+ }
+ intelligentRouting.Categories = append(intelligentRouting.Categories, category)
+ }
+
+ // Convert decisions
+ for _, decision := range route.Spec.Decisions {
+ configDecision, err := c.convertDecision(decision)
+ if err != nil {
+ return nil, fmt.Errorf("failed to convert decision %s: %w", decision.Name, err)
+ }
+ intelligentRouting.Decisions = append(intelligentRouting.Decisions, configDecision)
+ }
+
+ return intelligentRouting, nil
+}
+
+// convertDecision converts a CRD Decision to config Decision
+func (c *CRDConverter) convertDecision(decision v1alpha1.Decision) (config.Decision, error) {
+ configDecision := config.Decision{
+ Name: decision.Name,
+ Description: decision.Description,
+ Priority: int(decision.Priority),
+ Rules: config.RuleCombination{
+ Operator: decision.Signals.Operator,
+ Conditions: make([]config.RuleCondition, 0),
+ },
+ ModelRefs: make([]config.ModelRef, 0),
+ Plugins: make([]config.DecisionPlugin, 0),
+ }
+
+ // Convert signal conditions
+ for _, condition := range decision.Signals.Conditions {
+ configDecision.Rules.Conditions = append(configDecision.Rules.Conditions, config.RuleCondition{
+ Type: condition.Type,
+ Name: condition.Name,
+ })
+ }
+
+ // Convert model refs
+ for _, ms := range decision.ModelRefs {
+ modelRef := config.ModelRef{
+ Model: ms.Model,
+ LoRAName: ms.LoRAName,
+ ModelReasoningControl: config.ModelReasoningControl{
+ UseReasoning: &ms.UseReasoning,
+ ReasoningDescription: ms.ReasoningDescription,
+ ReasoningEffort: ms.ReasoningEffort,
+ },
+ }
+ configDecision.ModelRefs = append(configDecision.ModelRefs, modelRef)
+ break // Only take the first model
+ }
+
+ // Convert plugins
+ for _, plugin := range decision.Plugins {
+ var pluginConfig any
+ if plugin.Configuration != nil && plugin.Configuration.Raw != nil {
+ // Validate plugin configuration format
+ if err := validatePluginConfiguration(plugin.Type, plugin.Configuration.Raw); err != nil {
+ return config.Decision{}, fmt.Errorf("invalid configuration for plugin %s in decision %s: %w", plugin.Type, decision.Name, err)
+ }
+ // Store the raw bytes from RawExtension
+ // The Get*Config methods will unmarshal this to the appropriate type
+ pluginConfig = plugin.Configuration.Raw
+ }
+ configDecision.Plugins = append(configDecision.Plugins, config.DecisionPlugin{
+ Type: plugin.Type,
+ Configuration: pluginConfig,
+ })
+ }
+
+ return configDecision, nil
+}
+
+// validatePluginConfiguration validates that plugin configuration matches the expected schema
+func validatePluginConfiguration(pluginType string, rawConfig []byte) error {
+ if len(rawConfig) == 0 {
+ return nil // Empty configuration is allowed
+ }
+
+ switch pluginType {
+ case "semantic-cache":
+ var cfg config.SemanticCachePluginConfig
+ decoder := json.NewDecoder(bytes.NewReader(rawConfig))
+ decoder.DisallowUnknownFields()
+ if err := decoder.Decode(&cfg); err != nil {
+ return fmt.Errorf("failed to unmarshal semantic-cache config: %w", err)
+ }
+
+ case "jailbreak":
+ var cfg config.JailbreakPluginConfig
+ decoder := json.NewDecoder(bytes.NewReader(rawConfig))
+ decoder.DisallowUnknownFields()
+ if err := decoder.Decode(&cfg); err != nil {
+ return fmt.Errorf("failed to unmarshal jailbreak config: %w", err)
+ }
+
+ case "pii":
+ var cfg config.PIIPluginConfig
+ decoder := json.NewDecoder(bytes.NewReader(rawConfig))
+ decoder.DisallowUnknownFields()
+ if err := decoder.Decode(&cfg); err != nil {
+ return fmt.Errorf("failed to unmarshal pii config: %w", err)
+ }
+
+ case "system_prompt":
+ var cfg config.SystemPromptPluginConfig
+ decoder := json.NewDecoder(bytes.NewReader(rawConfig))
+ decoder.DisallowUnknownFields()
+ if err := decoder.Decode(&cfg); err != nil {
+ return fmt.Errorf("failed to unmarshal system_prompt config: %w", err)
+ }
+ // Validate mode if present
+ if cfg.Mode != "" && cfg.Mode != "replace" && cfg.Mode != "insert" {
+ return fmt.Errorf("system_prompt mode must be 'replace' or 'insert', got: %s", cfg.Mode)
+ }
+
+ case "header_mutation":
+ var cfg config.HeaderMutationPluginConfig
+ decoder := json.NewDecoder(bytes.NewReader(rawConfig))
+ decoder.DisallowUnknownFields()
+ if err := decoder.Decode(&cfg); err != nil {
+ return fmt.Errorf("failed to unmarshal header_mutation config: %w", err)
+ }
+ // Validate that at least one operation is specified
+ if len(cfg.Add) == 0 && len(cfg.Update) == 0 && len(cfg.Delete) == 0 {
+ return fmt.Errorf("header_mutation plugin must specify at least one of: add, update, delete")
+ }
+ // Validate header pairs
+ for _, h := range cfg.Add {
+ if h.Name == "" {
+ return fmt.Errorf("header_mutation add: header name cannot be empty")
+ }
+ }
+ for _, h := range cfg.Update {
+ if h.Name == "" {
+ return fmt.Errorf("header_mutation update: header name cannot be empty")
+ }
+ }
+
+ default:
+ return fmt.Errorf("unknown plugin type: %s", pluginType)
+ }
+
+ return nil
+}
diff --git a/src/semantic-router/pkg/k8s/converter_test.go b/src/semantic-router/pkg/k8s/converter_test.go
new file mode 100644
index 000000000..8b5019688
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/converter_test.go
@@ -0,0 +1,465 @@
+package k8s
+
+import (
+ "encoding/json"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "gopkg.in/yaml.v3"
+ k8syaml "k8s.io/apimachinery/pkg/util/yaml"
+
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/apis/vllm.ai/v1alpha1"
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+)
+
+// TestConverterWithTestData tests the converter with input/output test data
+// This test reads YAML files from testdata/input, converts them, and writes output to testdata/output
+func TestConverterWithTestData(t *testing.T) {
+ testdataDir := "testdata"
+ inputDir := filepath.Join(testdataDir, "input")
+ outputDir := filepath.Join(testdataDir, "output")
+ baseConfigPath := filepath.Join(testdataDir, "base-config.yaml")
+
+ // Ensure output directory exists
+ err := os.MkdirAll(outputDir, 0o755)
+ require.NoError(t, err, "Failed to create output directory")
+
+ // Load base config (static parts)
+ baseConfigData, err := os.ReadFile(baseConfigPath)
+ require.NoError(t, err, "Failed to read base config file: %s", baseConfigPath)
+
+ var baseConfig config.RouterConfig
+ err = yaml.Unmarshal(baseConfigData, &baseConfig)
+ require.NoError(t, err, "Failed to unmarshal base config")
+
+ // Read all input files
+ inputFiles, err := os.ReadDir(inputDir)
+ require.NoError(t, err, "Failed to read input directory")
+
+ converter := NewCRDConverter()
+
+ for _, inputFile := range inputFiles {
+ if !strings.HasSuffix(inputFile.Name(), ".yaml") && !strings.HasSuffix(inputFile.Name(), ".yml") {
+ continue
+ }
+
+ t.Run(inputFile.Name(), func(t *testing.T) {
+ inputPath := filepath.Join(inputDir, inputFile.Name())
+ outputPath := filepath.Join(outputDir, inputFile.Name())
+
+ // Read input file
+ inputData, err := os.ReadFile(inputPath)
+ require.NoError(t, err, "Failed to read input file: %s", inputPath)
+
+ // Parse YAML documents (pool and route)
+ pool, route, err := parseInputYAML(inputData)
+ require.NoError(t, err, "Failed to parse input YAML: %s", inputPath)
+ require.NotNil(t, pool, "IntelligentPool should not be nil")
+ require.NotNil(t, route, "IntelligentRoute should not be nil")
+
+ // Validate CRDs
+ err = validateCRDs(pool, route, &baseConfig)
+ require.NoError(t, err, "CRD validation failed for %s", inputFile.Name())
+
+ // Convert pool to backend models
+ backendModels, err := converter.ConvertIntelligentPool(pool)
+ require.NoError(t, err, "Failed to convert IntelligentPool")
+
+ // Convert route to intelligent routing
+ intelligentRouting, err := converter.ConvertIntelligentRoute(route)
+ require.NoError(t, err, "Failed to convert IntelligentRoute")
+
+ // Merge base config with CRD-derived config
+ outputConfig := mergeConfigs(&baseConfig, backendModels, intelligentRouting)
+
+ // Convert plugin configurations from []byte to map for YAML serialization
+ normalizePluginConfigurations(outputConfig)
+
+ // Marshal to YAML with 2-space indentation
+ var buf strings.Builder
+ encoder := yaml.NewEncoder(&buf)
+ encoder.SetIndent(2) // Set 2-space indentation to match yamllint config
+ err = encoder.Encode(outputConfig)
+ require.NoError(t, err, "Failed to marshal output config")
+ encoder.Close()
+
+ // Write output file
+ err = os.WriteFile(outputPath, []byte(buf.String()), 0o644)
+ require.NoError(t, err, "Failed to write output file: %s", outputPath)
+
+ t.Logf("Generated output file: %s", outputPath)
+
+ // Validate the output can be unmarshaled back
+ var validateConfig config.RouterConfig
+ err = yaml.Unmarshal([]byte(buf.String()), &validateConfig)
+ require.NoError(t, err, "Failed to unmarshal generated output")
+
+ // Basic validation
+ assert.NotNil(t, validateConfig.BackendModels, "BackendModels should not be nil")
+ assert.NotNil(t, validateConfig.IntelligentRouting, "IntelligentRouting should not be nil")
+ assert.Len(t, validateConfig.BackendModels.ModelConfig, len(backendModels.ModelConfig), "BackendModels count mismatch")
+ assert.Len(t, validateConfig.IntelligentRouting.Decisions, len(intelligentRouting.Decisions), "Decisions count mismatch")
+ })
+ }
+}
+
+// mergeConfigs merges base config with CRD-derived dynamic parts
+func mergeConfigs(baseConfig *config.RouterConfig, backendModels *config.BackendModels, intelligentRouting *config.IntelligentRouting) *config.RouterConfig {
+ // Start with a copy of base config (contains all static parts)
+ merged := *baseConfig
+
+ // Override config source
+ merged.ConfigSource = config.ConfigSourceKubernetes
+
+ // Override dynamic parts from CRDs
+ merged.BackendModels = *backendModels
+
+ // Merge IntelligentRouting while preserving ReasoningConfig from base
+ merged.IntelligentRouting.KeywordRules = intelligentRouting.KeywordRules
+ merged.IntelligentRouting.EmbeddingRules = intelligentRouting.EmbeddingRules
+ merged.IntelligentRouting.Categories = intelligentRouting.Categories
+ merged.IntelligentRouting.Decisions = intelligentRouting.Decisions
+ merged.IntelligentRouting.Strategy = intelligentRouting.Strategy
+ // Keep ReasoningConfig from base (ReasoningFamilies, DefaultReasoningEffort)
+
+ return &merged
+}
+
+// parseInputYAML parses a multi-document YAML file containing IntelligentPool and IntelligentRoute
+func parseInputYAML(data []byte) (*v1alpha1.IntelligentPool, *v1alpha1.IntelligentRoute, error) {
+ decoder := k8syaml.NewYAMLOrJSONDecoder(strings.NewReader(string(data)), 4096)
+
+ var pool *v1alpha1.IntelligentPool
+ var route *v1alpha1.IntelligentRoute
+
+ for {
+ var obj map[string]interface{}
+ err := decoder.Decode(&obj)
+ if err != nil {
+ // Check for EOF
+ if strings.Contains(err.Error(), "EOF") {
+ break
+ }
+ return nil, nil, err
+ }
+
+ if obj == nil {
+ continue
+ }
+
+ kind, ok := obj["kind"].(string)
+ if !ok {
+ continue
+ }
+
+ // Re-marshal to JSON (runtime.RawExtension expects JSON)
+ objData, err := json.Marshal(obj)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ switch kind {
+ case "IntelligentPool":
+ pool = &v1alpha1.IntelligentPool{}
+ if err := json.Unmarshal(objData, pool); err != nil {
+ return nil, nil, err
+ }
+ case "IntelligentRoute":
+ route = &v1alpha1.IntelligentRoute{}
+ if err := json.Unmarshal(objData, route); err != nil {
+ return nil, nil, err
+ }
+ }
+ }
+
+ return pool, route, nil
+}
+
+// normalizePluginConfigurations converts plugin configurations from []byte to map[string]interface{}
+// This is needed for proper YAML serialization
+func normalizePluginConfigurations(cfg *config.RouterConfig) {
+ for i := range cfg.IntelligentRouting.Decisions {
+ decision := &cfg.IntelligentRouting.Decisions[i]
+ for j := range decision.Plugins {
+ plugin := &decision.Plugins[j]
+ if plugin.Configuration != nil {
+ // If configuration is []byte (from Kubernetes RawExtension), convert to map
+ if bytes, ok := plugin.Configuration.([]byte); ok {
+ var configMap map[string]interface{}
+ if err := json.Unmarshal(bytes, &configMap); err == nil {
+ plugin.Configuration = configMap
+ }
+ }
+ }
+ }
+ }
+}
+
+// validateCRDs validates IntelligentPool and IntelligentRoute CRDs
+// This mirrors the validation logic in controller.go
+func validateCRDs(pool *v1alpha1.IntelligentPool, route *v1alpha1.IntelligentRoute, staticConfig *config.RouterConfig) error {
+ // Build model map
+ modelMap := make(map[string]*v1alpha1.ModelConfig)
+ for i := range pool.Spec.Models {
+ model := &pool.Spec.Models[i]
+ modelMap[model.Name] = model
+ }
+
+ // Build signal name sets
+ keywordSignalNames := make(map[string]bool)
+ embeddingSignalNames := make(map[string]bool)
+ domainSignalNames := make(map[string]bool)
+
+ // Check for duplicate keyword signals
+ for _, signal := range route.Spec.Signals.Keywords {
+ if keywordSignalNames[signal.Name] {
+ return fmt.Errorf("duplicate keyword signal name: %s", signal.Name)
+ }
+ keywordSignalNames[signal.Name] = true
+ }
+
+ // Check for duplicate embedding signals
+ for _, signal := range route.Spec.Signals.Embeddings {
+ if embeddingSignalNames[signal.Name] {
+ return fmt.Errorf("duplicate embedding signal name: %s", signal.Name)
+ }
+ embeddingSignalNames[signal.Name] = true
+ }
+
+ // Check for duplicate domain signals
+ for _, domain := range route.Spec.Signals.Domains {
+ if domainSignalNames[domain.Name] {
+ return fmt.Errorf("duplicate domain signal name: %s", domain.Name)
+ }
+ domainSignalNames[domain.Name] = true
+ }
+
+ // Validate decisions
+ for _, decision := range route.Spec.Decisions {
+ // Validate signal references
+ for _, condition := range decision.Signals.Conditions {
+ switch condition.Type {
+ case "keyword":
+ if !keywordSignalNames[condition.Name] {
+ return fmt.Errorf("decision %s references unknown keyword signal: %s", decision.Name, condition.Name)
+ }
+ case "embedding":
+ if !embeddingSignalNames[condition.Name] {
+ return fmt.Errorf("decision %s references unknown embedding signal: %s", decision.Name, condition.Name)
+ }
+ case "domain":
+ if !domainSignalNames[condition.Name] {
+ return fmt.Errorf("decision %s references unknown domain signal: %s", decision.Name, condition.Name)
+ }
+ }
+ }
+
+ // Validate model references
+ for _, ms := range decision.ModelRefs {
+ model, ok := modelMap[ms.Model]
+ if !ok {
+ return fmt.Errorf("decision %s references unknown model: %s", decision.Name, ms.Model)
+ }
+
+ // Validate LoRA reference
+ if ms.LoRAName != "" {
+ found := false
+ for _, lora := range model.LoRAs {
+ if lora.Name == ms.LoRAName {
+ found = true
+ break
+ }
+ }
+ if !found {
+ return fmt.Errorf("decision %s references unknown LoRA %s for model %s", decision.Name, ms.LoRAName, ms.Model)
+ }
+ }
+ }
+ }
+
+ // Validate reasoning families
+ if staticConfig != nil && staticConfig.ReasoningFamilies != nil {
+ for _, model := range pool.Spec.Models {
+ if model.ReasoningFamily != "" {
+ if _, ok := staticConfig.ReasoningFamilies[model.ReasoningFamily]; !ok {
+ return fmt.Errorf("model %s references unknown reasoning family: %s", model.Name, model.ReasoningFamily)
+ }
+ }
+ }
+ }
+
+ return nil
+}
+
+// TestCRDValidationErrors tests that validation catches various error conditions
+func TestCRDValidationErrors(t *testing.T) {
+ baseConfig := &config.RouterConfig{
+ IntelligentRouting: config.IntelligentRouting{
+ ReasoningConfig: config.ReasoningConfig{
+ ReasoningFamilies: map[string]config.ReasoningFamilyConfig{
+ "qwen3": {
+ Type: "chat_template_kwargs",
+ Parameter: "enable_thinking",
+ },
+ },
+ },
+ },
+ }
+
+ t.Run("DuplicateKeywordSignal", func(t *testing.T) {
+ pool := &v1alpha1.IntelligentPool{
+ Spec: v1alpha1.IntelligentPoolSpec{
+ DefaultModel: "test-model",
+ Models: []v1alpha1.ModelConfig{
+ {Name: "test-model"},
+ },
+ },
+ }
+
+ route := &v1alpha1.IntelligentRoute{
+ Spec: v1alpha1.IntelligentRouteSpec{
+ Signals: v1alpha1.Signals{
+ Keywords: []v1alpha1.KeywordSignal{
+ {Name: "urgent", Operator: "OR", Keywords: []string{"urgent"}},
+ {Name: "urgent", Operator: "OR", Keywords: []string{"critical"}}, // Duplicate!
+ },
+ },
+ Decisions: []v1alpha1.Decision{},
+ },
+ }
+
+ err := validateCRDs(pool, route, baseConfig)
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "duplicate keyword signal name: urgent")
+ })
+
+ t.Run("UnknownKeywordSignalReference", func(t *testing.T) {
+ pool := &v1alpha1.IntelligentPool{
+ Spec: v1alpha1.IntelligentPoolSpec{
+ DefaultModel: "test-model",
+ Models: []v1alpha1.ModelConfig{
+ {Name: "test-model"},
+ },
+ },
+ }
+
+ route := &v1alpha1.IntelligentRoute{
+ Spec: v1alpha1.IntelligentRouteSpec{
+ Signals: v1alpha1.Signals{
+ Keywords: []v1alpha1.KeywordSignal{
+ {Name: "urgent", Operator: "OR", Keywords: []string{"urgent"}},
+ },
+ },
+ Decisions: []v1alpha1.Decision{
+ {
+ Name: "test-decision",
+ Priority: 100,
+ Signals: v1alpha1.SignalCombination{
+ Operator: "AND",
+ Conditions: []v1alpha1.SignalCondition{
+ {Type: "keyword", Name: "nonexistent"}, // Unknown signal!
+ },
+ },
+ ModelRefs: []v1alpha1.ModelRef{
+ {Model: "test-model"},
+ },
+ },
+ },
+ },
+ }
+
+ err := validateCRDs(pool, route, baseConfig)
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "references unknown keyword signal: nonexistent")
+ })
+
+ t.Run("UnknownModelReference", func(t *testing.T) {
+ pool := &v1alpha1.IntelligentPool{
+ Spec: v1alpha1.IntelligentPoolSpec{
+ DefaultModel: "test-model",
+ Models: []v1alpha1.ModelConfig{
+ {Name: "test-model"},
+ },
+ },
+ }
+
+ route := &v1alpha1.IntelligentRoute{
+ Spec: v1alpha1.IntelligentRouteSpec{
+ Signals: v1alpha1.Signals{
+ Keywords: []v1alpha1.KeywordSignal{
+ {Name: "urgent", Operator: "OR", Keywords: []string{"urgent"}},
+ },
+ },
+ Decisions: []v1alpha1.Decision{
+ {
+ Name: "test-decision",
+ Priority: 100,
+ Signals: v1alpha1.SignalCombination{
+ Operator: "AND",
+ Conditions: []v1alpha1.SignalCondition{
+ {Type: "keyword", Name: "urgent"},
+ },
+ },
+ ModelRefs: []v1alpha1.ModelRef{
+ {Model: "nonexistent-model"}, // Unknown model!
+ },
+ },
+ },
+ },
+ }
+
+ err := validateCRDs(pool, route, baseConfig)
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "references unknown model: nonexistent-model")
+ })
+
+ t.Run("UnknownLoRAReference", func(t *testing.T) {
+ pool := &v1alpha1.IntelligentPool{
+ Spec: v1alpha1.IntelligentPoolSpec{
+ DefaultModel: "test-model",
+ Models: []v1alpha1.ModelConfig{
+ {
+ Name: "test-model",
+ LoRAs: []v1alpha1.LoRAConfig{
+ {Name: "expert-lora"},
+ },
+ },
+ },
+ },
+ }
+
+ route := &v1alpha1.IntelligentRoute{
+ Spec: v1alpha1.IntelligentRouteSpec{
+ Signals: v1alpha1.Signals{
+ Keywords: []v1alpha1.KeywordSignal{
+ {Name: "urgent", Operator: "OR", Keywords: []string{"urgent"}},
+ },
+ },
+ Decisions: []v1alpha1.Decision{
+ {
+ Name: "test-decision",
+ Priority: 100,
+ Signals: v1alpha1.SignalCombination{
+ Operator: "AND",
+ Conditions: []v1alpha1.SignalCondition{
+ {Type: "keyword", Name: "urgent"},
+ },
+ },
+ ModelRefs: []v1alpha1.ModelRef{
+ {Model: "test-model", LoRAName: "nonexistent-lora"}, // Unknown LoRA!
+ },
+ },
+ },
+ },
+ }
+
+ err := validateCRDs(pool, route, baseConfig)
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "references unknown LoRA nonexistent-lora")
+ })
+}
diff --git a/src/semantic-router/pkg/k8s/reconciler.go b/src/semantic-router/pkg/k8s/reconciler.go
new file mode 100644
index 000000000..ce2fc9fb3
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/reconciler.go
@@ -0,0 +1,453 @@
+/*
+Copyright 2025 vLLM Semantic Router.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package k8s
+
+import (
+ "context"
+ "fmt"
+ "sync"
+ "time"
+
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/client-go/rest"
+ "k8s.io/client-go/tools/clientcmd"
+ "sigs.k8s.io/controller-runtime/pkg/cache"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/manager"
+ "sigs.k8s.io/controller-runtime/pkg/metrics/server"
+
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/apis/vllm.ai/v1alpha1"
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
+)
+
+// Reconciler reconciles IntelligentPool and IntelligentRoute CRDs
+type Reconciler struct {
+ client client.Client
+ scheme *runtime.Scheme
+ namespace string
+ converter *CRDConverter
+ staticConfig *config.RouterConfig
+ onConfigUpdate func(*config.RouterConfig) error
+ mu sync.RWMutex
+ lastPool *v1alpha1.IntelligentPool
+ lastRoute *v1alpha1.IntelligentRoute
+}
+
+// ReconcilerConfig holds configuration for the reconciler
+type ReconcilerConfig struct {
+ Namespace string
+ Kubeconfig string // Optional: if empty, uses in-cluster config
+ StaticConfig *config.RouterConfig
+ OnConfigUpdate func(*config.RouterConfig) error
+}
+
+// NewReconciler creates a new reconciler with controller-runtime
+func NewReconciler(cfg ReconcilerConfig) (*Reconciler, error) {
+ // Build REST config
+ var restConfig *rest.Config
+ var err error
+ if cfg.Kubeconfig != "" {
+ restConfig, err = clientcmd.BuildConfigFromFlags("", cfg.Kubeconfig)
+ } else {
+ restConfig, err = rest.InClusterConfig()
+ }
+ if err != nil {
+ return nil, fmt.Errorf("failed to build REST config: %w", err)
+ }
+
+ // Create scheme and register our types
+ scheme := runtime.NewScheme()
+ err = v1alpha1.AddToScheme(scheme)
+ if err != nil {
+ return nil, fmt.Errorf("failed to add v1alpha1 to scheme: %w", err)
+ }
+
+ // Create manager options
+ options := manager.Options{
+ Scheme: scheme,
+ Cache: cache.Options{
+ DefaultNamespaces: map[string]cache.Config{
+ cfg.Namespace: {},
+ },
+ },
+ // Disable metrics server to avoid port conflicts
+ Metrics: server.Options{
+ BindAddress: "0", // "0" disables the metrics server
+ },
+ }
+
+ // Create manager
+ mgr, err := manager.New(restConfig, options)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create manager: %w", err)
+ }
+
+ reconciler := &Reconciler{
+ client: mgr.GetClient(),
+ scheme: scheme,
+ namespace: cfg.Namespace,
+ converter: NewCRDConverter(),
+ staticConfig: cfg.StaticConfig,
+ onConfigUpdate: cfg.OnConfigUpdate,
+ }
+
+ // Start the manager in a goroutine
+ go func() {
+ if err := mgr.Start(context.Background()); err != nil {
+ logging.Errorf("Failed to start manager: %v", err)
+ }
+ }()
+
+ // Wait for cache to sync
+ if !mgr.GetCache().WaitForCacheSync(context.Background()) {
+ return nil, fmt.Errorf("failed to wait for cache sync")
+ }
+
+ return reconciler, nil
+}
+
+// Start starts watching for CRD changes
+func (r *Reconciler) Start(ctx context.Context) error {
+ logging.Infof("Starting Kubernetes reconciler in namespace %s", r.namespace)
+
+ // Initial sync
+ if err := r.reconcile(ctx); err != nil {
+ logging.Warnf("Initial reconciliation failed (will retry on CRD changes): %v", err)
+ }
+
+ // Start watch loops
+ go r.watchLoop(ctx)
+
+ return nil
+}
+
+// Stop stops the reconciler
+func (r *Reconciler) Stop() {
+ logging.Infof("Stopping Kubernetes reconciler")
+}
+
+// watchLoop continuously watches for CRD changes using informers
+func (r *Reconciler) watchLoop(ctx context.Context) {
+ // Use a ticker to periodically check for changes
+ // The cache will automatically update via informers
+ ticker := time.NewTicker(5 * time.Second)
+ defer ticker.Stop()
+
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case <-ticker.C:
+ if err := r.reconcile(ctx); err != nil {
+ logging.Debugf("Reconciliation check: %v", err)
+ }
+ }
+ }
+}
+
+// reconcile performs the reconciliation logic
+func (r *Reconciler) reconcile(ctx context.Context) error {
+ // Get IntelligentPool
+ pool, err := r.getIntelligentPool(ctx)
+ if err != nil {
+ return fmt.Errorf("failed to get IntelligentPool: %w", err)
+ }
+
+ // Get IntelligentRoute
+ route, err := r.getIntelligentRoute(ctx)
+ if err != nil {
+ return fmt.Errorf("failed to get IntelligentRoute: %w", err)
+ }
+
+ // Check if anything changed
+ r.mu.RLock()
+ poolChanged := r.lastPool == nil || pool.Generation != r.lastPool.Generation
+ routeChanged := r.lastRoute == nil || route.Generation != r.lastRoute.Generation
+ r.mu.RUnlock()
+
+ if !poolChanged && !routeChanged {
+ return nil // No changes
+ }
+
+ logging.Infof("CRD changes detected, reconciling configuration")
+
+ // Validate and update
+ if err := r.validateAndUpdate(ctx, pool, route); err != nil {
+ return fmt.Errorf("validation/update failed: %w", err)
+ }
+
+ // Update last seen versions
+ r.mu.Lock()
+ r.lastPool = pool.DeepCopy()
+ r.lastRoute = route.DeepCopy()
+ r.mu.Unlock()
+
+ return nil
+}
+
+// getIntelligentPool retrieves the IntelligentPool from the namespace
+func (r *Reconciler) getIntelligentPool(ctx context.Context) (*v1alpha1.IntelligentPool, error) {
+ poolList := &v1alpha1.IntelligentPoolList{}
+ if err := r.client.List(ctx, poolList, client.InNamespace(r.namespace)); err != nil {
+ return nil, fmt.Errorf("failed to list IntelligentPools: %w", err)
+ }
+
+ if len(poolList.Items) == 0 {
+ return nil, fmt.Errorf("no IntelligentPool found in namespace %s", r.namespace)
+ }
+ if len(poolList.Items) > 1 {
+ return nil, fmt.Errorf("multiple IntelligentPools found in namespace %s, expected exactly 1", r.namespace)
+ }
+
+ return &poolList.Items[0], nil
+}
+
+// getIntelligentRoute retrieves the IntelligentRoute from the namespace
+func (r *Reconciler) getIntelligentRoute(ctx context.Context) (*v1alpha1.IntelligentRoute, error) {
+ routeList := &v1alpha1.IntelligentRouteList{}
+ if err := r.client.List(ctx, routeList, client.InNamespace(r.namespace)); err != nil {
+ return nil, fmt.Errorf("failed to list IntelligentRoutes: %w", err)
+ }
+
+ if len(routeList.Items) == 0 {
+ return nil, fmt.Errorf("no IntelligentRoute found in namespace %s", r.namespace)
+ }
+ if len(routeList.Items) > 1 {
+ return nil, fmt.Errorf("multiple IntelligentRoutes found in namespace %s, expected exactly 1", r.namespace)
+ }
+
+ return &routeList.Items[0], nil
+}
+
+// validateAndUpdate validates CRDs and updates configuration
+func (r *Reconciler) validateAndUpdate(ctx context.Context, pool *v1alpha1.IntelligentPool, route *v1alpha1.IntelligentRoute) error {
+ // Validate
+ if err := r.validate(pool, route); err != nil {
+ // Update status to Invalid
+ r.updatePoolStatus(ctx, pool, metav1.ConditionFalse, "ValidationFailed", err.Error())
+ r.updateRouteStatus(ctx, route, metav1.ConditionFalse, "ValidationFailed", err.Error())
+ return err
+ }
+
+ // Convert to internal config
+ backendModels, err := r.converter.ConvertIntelligentPool(pool)
+ if err != nil {
+ return fmt.Errorf("failed to convert IntelligentPool: %w", err)
+ }
+
+ intelligentRouting, err := r.converter.ConvertIntelligentRoute(route)
+ if err != nil {
+ return fmt.Errorf("failed to convert IntelligentRoute: %w", err)
+ }
+
+ // Create new config by merging with static config
+ newConfig := *r.staticConfig
+ newConfig.BackendModels = *backendModels
+ newConfig.IntelligentRouting = *intelligentRouting
+
+ // Call update callback
+ if r.onConfigUpdate != nil {
+ if err := r.onConfigUpdate(&newConfig); err != nil {
+ r.updatePoolStatus(ctx, pool, metav1.ConditionFalse, "UpdateFailed", err.Error())
+ r.updateRouteStatus(ctx, route, metav1.ConditionFalse, "UpdateFailed", err.Error())
+ return fmt.Errorf("config update failed: %w", err)
+ }
+ }
+
+ // Update status to Ready
+ r.updatePoolStatus(ctx, pool, metav1.ConditionTrue, "Ready", "Configuration applied successfully")
+ r.updateRouteStatus(ctx, route, metav1.ConditionTrue, "Ready", "Configuration applied successfully")
+
+ logging.Infof("Configuration updated successfully from CRDs")
+ return nil
+}
+
+// validate validates the CRDs
+func (r *Reconciler) validate(pool *v1alpha1.IntelligentPool, route *v1alpha1.IntelligentRoute) error {
+ // Build model map
+ modelMap := make(map[string]*v1alpha1.ModelConfig)
+ for i := range pool.Spec.Models {
+ model := &pool.Spec.Models[i]
+ modelMap[model.Name] = model
+ }
+
+ // Build signal name sets
+ keywordSignalNames := make(map[string]bool)
+ embeddingSignalNames := make(map[string]bool)
+ domainSignalNames := make(map[string]bool)
+
+ for _, signal := range route.Spec.Signals.Keywords {
+ if keywordSignalNames[signal.Name] {
+ return fmt.Errorf("duplicate keyword signal name: %s", signal.Name)
+ }
+ keywordSignalNames[signal.Name] = true
+ }
+
+ for _, signal := range route.Spec.Signals.Embeddings {
+ if embeddingSignalNames[signal.Name] {
+ return fmt.Errorf("duplicate embedding signal name: %s", signal.Name)
+ }
+ embeddingSignalNames[signal.Name] = true
+ }
+
+ // Domains is now an array of DomainSignal with name and description
+ for _, domain := range route.Spec.Signals.Domains {
+ if domainSignalNames[domain.Name] {
+ return fmt.Errorf("duplicate domain signal name: %s", domain.Name)
+ }
+ domainSignalNames[domain.Name] = true
+ }
+
+ // Validate decisions
+ for _, decision := range route.Spec.Decisions {
+ // Validate signal references
+ for _, condition := range decision.Signals.Conditions {
+ switch condition.Type {
+ case "keyword":
+ if !keywordSignalNames[condition.Name] {
+ return fmt.Errorf("decision %s references unknown keyword signal: %s", decision.Name, condition.Name)
+ }
+ case "embedding":
+ if !embeddingSignalNames[condition.Name] {
+ return fmt.Errorf("decision %s references unknown embedding signal: %s", decision.Name, condition.Name)
+ }
+ case "domain":
+ if !domainSignalNames[condition.Name] {
+ return fmt.Errorf("decision %s references unknown domain signal: %s", decision.Name, condition.Name)
+ }
+ }
+ }
+
+ // Validate model scores
+ for _, ms := range decision.ModelRefs {
+ model, ok := modelMap[ms.Model]
+ if !ok {
+ return fmt.Errorf("decision %s references unknown model: %s", decision.Name, ms.Model)
+ }
+
+ // Validate LoRA reference
+ if ms.LoRAName != "" {
+ found := false
+ for _, lora := range model.LoRAs {
+ if lora.Name == ms.LoRAName {
+ found = true
+ break
+ }
+ }
+ if !found {
+ return fmt.Errorf("decision %s references unknown LoRA %s for model %s", decision.Name, ms.LoRAName, ms.Model)
+ }
+ }
+ }
+ }
+
+ // Validate reasoning families
+ if r.staticConfig != nil && r.staticConfig.ReasoningFamilies != nil {
+ for _, model := range pool.Spec.Models {
+ if model.ReasoningFamily != "" {
+ if _, ok := r.staticConfig.ReasoningFamilies[model.ReasoningFamily]; !ok {
+ return fmt.Errorf("model %s references unknown reasoning family: %s", model.Name, model.ReasoningFamily)
+ }
+ }
+ }
+ }
+
+ return nil
+}
+
+// updatePoolStatus updates the status of IntelligentPool
+func (r *Reconciler) updatePoolStatus(ctx context.Context, pool *v1alpha1.IntelligentPool, status metav1.ConditionStatus, reason, message string) {
+ // Create a copy to update
+ poolCopy := pool.DeepCopy()
+
+ // Update conditions
+ condition := metav1.Condition{
+ Type: "Ready",
+ Status: status,
+ Reason: reason,
+ Message: message,
+ LastTransitionTime: metav1.Now(),
+ ObservedGeneration: poolCopy.Generation,
+ }
+
+ // Find and update existing condition or append new one
+ found := false
+ for i, c := range poolCopy.Status.Conditions {
+ if c.Type == "Ready" {
+ poolCopy.Status.Conditions[i] = condition
+ found = true
+ break
+ }
+ }
+ if !found {
+ poolCopy.Status.Conditions = append(poolCopy.Status.Conditions, condition)
+ }
+
+ poolCopy.Status.ObservedGeneration = poolCopy.Generation
+ poolCopy.Status.ModelCount = int32(len(poolCopy.Spec.Models)) //nolint:gosec // Model count is unlikely to overflow int32
+
+ // Update status subresource
+ if err := r.client.Status().Update(ctx, poolCopy); err != nil {
+ logging.Errorf("Failed to update IntelligentPool status: %v", err)
+ }
+}
+
+// updateRouteStatus updates the status of IntelligentRoute
+func (r *Reconciler) updateRouteStatus(ctx context.Context, route *v1alpha1.IntelligentRoute, status metav1.ConditionStatus, reason, message string) {
+ // Create a copy to update
+ routeCopy := route.DeepCopy()
+
+ // Update conditions
+ condition := metav1.Condition{
+ Type: "Ready",
+ Status: status,
+ Reason: reason,
+ Message: message,
+ LastTransitionTime: metav1.Now(),
+ ObservedGeneration: routeCopy.Generation,
+ }
+
+ // Find and update existing condition or append new one
+ found := false
+ for i, c := range routeCopy.Status.Conditions {
+ if c.Type == "Ready" {
+ routeCopy.Status.Conditions[i] = condition
+ found = true
+ break
+ }
+ }
+ if !found {
+ routeCopy.Status.Conditions = append(routeCopy.Status.Conditions, condition)
+ }
+
+ routeCopy.Status.ObservedGeneration = routeCopy.Generation
+
+ // Update statistics
+ routeCopy.Status.Statistics = &v1alpha1.RouteStatistics{
+ Decisions: int32(len(routeCopy.Spec.Decisions)), //nolint:gosec // Decision count is unlikely to overflow int32
+ Keywords: int32(len(routeCopy.Spec.Signals.Keywords)), //nolint:gosec // Keyword count is unlikely to overflow int32
+ Embeddings: int32(len(routeCopy.Spec.Signals.Embeddings)), //nolint:gosec // Embedding count is unlikely to overflow int32
+ Domains: int32(len(routeCopy.Spec.Signals.Domains)), //nolint:gosec // Domain count is unlikely to overflow int32
+ }
+
+ // Update status subresource
+ if err := r.client.Status().Update(ctx, routeCopy); err != nil {
+ logging.Errorf("Failed to update IntelligentRoute status: %v", err)
+ }
+}
diff --git a/src/semantic-router/pkg/k8s/testdata/README.md b/src/semantic-router/pkg/k8s/testdata/README.md
new file mode 100644
index 000000000..68b2bbd8e
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/README.md
@@ -0,0 +1,167 @@
+# Test Data for CRD Converter
+
+This directory contains test data for the Kubernetes CRD to RouterConfig converter.
+
+## Directory Structure
+
+```
+testdata/
+βββ base-config.yaml # Static base configuration (shared across all tests)
+βββ input/ # Input CRD YAML files (IntelligentPool + IntelligentRoute)
+β βββ 01-basic.yaml
+β βββ 02-keyword-only.yaml
+β βββ ...
+β βββ 15-keyword-embedding-domain-no-plugin.yaml
+βββ output/ # Generated RouterConfig YAML files
+ βββ 01-basic.yaml
+ βββ 02-keyword-only.yaml
+ βββ ...
+ βββ 15-keyword-embedding-domain-no-plugin.yaml
+```
+
+## Base Configuration
+
+`base-config.yaml` contains static configuration that doesn't come from CRDs:
+
+- Reasoning families (deepseek, qwen3, gpt)
+- Default reasoning effort level
+- BERT model configuration
+- Semantic cache settings
+- Tools configuration
+- Prompt guard settings
+- Classifier configuration
+- Router options
+- Embedding models paths
+- API configuration
+- Observability settings
+
+## Test Scenarios Overview
+
+| # | File | Keyword | Embedding | Domain | Plugin | Use Case |
+|---|------|---------|-----------|--------|--------|----------|
+| 1 | 01-basic.yaml | β | β | β | β | Basic comprehensive example |
+| 2 | 02-keyword-only.yaml | β | β | β | β | FAQ detection, greetings |
+| 3 | 03-embedding-only.yaml | β | β | β | β | Customer support, technical issues |
+| 4 | 04-domain-only.yaml | β | β | β | β | STEM queries, subject routing |
+| 5 | 05-keyword-embedding.yaml | β | β | β | β | Urgent support with semantics |
+| 6 | 06-keyword-domain.yaml | β | β | β | β | Academic homework assistance |
+| 7 | 07-domain-embedding.yaml | β | β | β | β | Research queries by domain |
+| 8 | 08-keyword-embedding-domain.yaml | β | β | β | β | Comprehensive tech support |
+| 9 | 09-keyword-plugin.yaml | β | β | β | β | FAQ with caching |
+| 10 | 10-embedding-plugin.yaml | β | β | β | β | PII-protected queries |
+| 11 | 11-domain-plugin.yaml | β | β | β | β | Legal advice with disclaimers |
+| 12 | 12-keyword-embedding-plugin.yaml | β | β | β | β | Security queries with protection |
+| 13 | 13-keyword-domain-plugin.yaml | β | β | β | β | Medical queries with PII |
+| 14 | 14-domain-embedding-plugin.yaml | β | β | β | β | Financial advice with protection |
+| 15 | 15-keyword-embedding-domain-plugin.yaml | β | β | β | β | Enterprise compliance (full) |
+| 16 | 16-keyword-embedding-domain-no-plugin.yaml | β | β | β | β | Educational tutorials |
+
+## Test Scenarios Details
+
+### Signal Type Combinations (No Plugins)
+
+1. **02-keyword-only.yaml** - Only keyword signals
+ - Use case: FAQ detection, greeting responses
+ - Signals: urgent, greeting keywords
+
+2. **03-embedding-only.yaml** - Only embedding signals
+ - Use case: Customer support, technical issue detection
+ - Signals: customer_support, technical_issue embeddings
+
+3. **04-domain-only.yaml** - Only domain signals
+ - Use case: STEM queries, subject-specific routing
+ - Signals: math, physics, computer_science, chemistry domains
+
+4. **05-keyword-embedding.yaml** - Keyword + Embedding
+ - Use case: Urgent support requests with semantic matching
+ - Signals: urgent keywords + support_request embeddings
+
+5. **06-keyword-domain.yaml** - Keyword + Domain
+ - Use case: Academic homework assistance
+ - Signals: homework keywords + math/physics/chemistry domains
+
+6. **07-domain-embedding.yaml** - Domain + Embedding
+ - Use case: Research queries in specific domains
+ - Signals: research_question embeddings + biology/chemistry/physics domains
+
+7. **08-keyword-embedding-domain.yaml** - All three signal types
+ - Use case: Comprehensive technical support routing
+ - Signals: urgent keywords + technical_help embeddings + CS/engineering/math domains
+
+### Signal Type Combinations (With Plugins)
+
+8. **09-keyword-plugin.yaml** - Keyword + Plugins
+ - Use case: FAQ with aggressive caching
+ - Plugins: semantic-cache, header_mutation
+
+9. **10-embedding-plugin.yaml** - Embedding + Plugins
+ - Use case: PII-protected sensitive data handling
+ - Plugins: pii (redaction), jailbreak protection
+
+10. **11-domain-plugin.yaml** - Domain + Plugins
+ - Use case: Legal advice with disclaimers
+ - Plugins: system_prompt, semantic-cache
+
+11. **12-keyword-embedding-plugin.yaml** - Keyword + Embedding + Plugins
+ - Use case: Security queries with protection
+ - Plugins: jailbreak, system_prompt, header_mutation
+
+12. **13-keyword-domain-plugin.yaml** - Keyword + Domain + Plugins
+ - Use case: Medical queries with PII protection
+ - Plugins: pii (hash mode), system_prompt, semantic-cache
+
+13. **14-domain-embedding-plugin.yaml** - Domain + Embedding + Plugins
+ - Use case: Financial advice with comprehensive protection
+ - Plugins: pii, system_prompt, jailbreak, semantic-cache
+
+14. **15-keyword-embedding-domain-plugin.yaml** - Keyword + Embedding + Domain + Plugins
+ - Use case: Enterprise compliance and legal queries with full protection
+ - Signals: compliance/confidential keywords + business_analysis/legal_review embeddings + business/law/economics domains
+ - Plugins: pii (hash/mask modes), jailbreak, system_prompt, semantic-cache, header_mutation
+ - Multiple decisions with different plugin configurations
+
+15. **16-keyword-embedding-domain-no-plugin.yaml** - All signals, no plugins
+ - Use case: Educational tutorials across multiple domains
+ - Signals: tutorial keywords + learning_intent embeddings + CS/math/engineering domains
+ - Multiple decisions with different priorities
+
+## Plugin Types Used
+
+- **semantic-cache**: Cache responses for similar queries
+- **pii**: Detect and redact/mask/hash PII entities
+- **jailbreak**: Detect and block jailbreak attempts
+- **system_prompt**: Inject custom system prompts
+- **header_mutation**: Add custom headers to requests
+
+## Running Tests
+
+```bash
+cd src/semantic-router
+go test ./pkg/k8s -v -run TestConverterWithTestData
+```
+
+This will:
+
+1. Load `base-config.yaml` as the static configuration base
+2. Parse each input YAML file (IntelligentPool + IntelligentRoute)
+3. Convert CRDs to RouterConfig format
+4. Merge static base config with dynamic CRD-derived config
+5. Generate output YAML files in `testdata/output/`
+6. Validate that output can be unmarshaled correctly
+
+## Output Structure
+
+Each generated output file contains:
+
+- **Static parts** (from base-config.yaml):
+ - embedding_models, bert_model, classifier, prompt_guard
+ - semantic_cache, observability, api, tools
+ - reasoning_families, default_reasoning_effort
+
+- **Dynamic parts** (from CRDs):
+ - keyword_rules (from signals.keywords)
+ - embedding_rules (from signals.embeddings)
+ - categories (from signals.domains)
+ - decisions (from decisions)
+ - model_config (from IntelligentPool.models)
+ - default_model (from IntelligentPool.defaultModel)
diff --git a/src/semantic-router/pkg/k8s/testdata/base-config.yaml b/src/semantic-router/pkg/k8s/testdata/base-config.yaml
new file mode 100644
index 000000000..9b220c5ba
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/base-config.yaml
@@ -0,0 +1,126 @@
+# Base configuration shared across all test cases
+# This simulates the static parts of config.yaml that don't come from CRDs
+
+# Reasoning family configurations
+reasoning_families:
+ deepseek:
+ type: "reasoning_effort"
+ parameter: "reasoning_effort"
+ qwen3:
+ type: "chat_template_kwargs"
+ parameter: "enable_thinking"
+ gpt:
+ type: "reasoning_effort"
+ parameter: "reasoning_effort"
+
+# Global default reasoning effort level
+default_reasoning_effort: high
+
+# BERT model configuration
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+
+# Semantic cache configuration
+semantic_cache:
+ enabled: true
+ backend_type: "memory"
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: "fifo"
+ use_hnsw: true
+ hnsw_m: 16
+ hnsw_ef_construction: 200
+ embedding_model: "bert"
+
+# Tools configuration
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: "config/tools_db.json"
+ fallback_to_empty: true
+
+# Prompt guard configuration
+prompt_guard:
+ enabled: true
+ use_modernbert: true
+ model_id: "models/jailbreak_classifier_modernbert-base_model"
+ threshold: 0.7
+ use_cpu: true
+ jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
+
+# Classifier configuration
+classifier:
+ category_model:
+ model_id: "models/category_classifier_modernbert-base_model"
+ use_modernbert: true
+ threshold: 0.6
+ use_cpu: true
+ category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
+ pii_model:
+ model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
+ use_modernbert: true
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
+
+# Router configuration
+router:
+ high_confidence_threshold: 0.99
+ low_latency_threshold_ms: 2000
+ lora_baseline_score: 0.8
+ traditional_baseline_score: 0.7
+ embedding_baseline_score: 0.75
+ success_confidence_threshold: 0.8
+ large_batch_threshold: 4
+ lora_default_execution_time_ms: 1345
+ traditional_default_execution_time_ms: 4567
+ default_confidence_threshold: 0.95
+ default_max_latency_ms: 5000
+ default_batch_size: 4
+ default_avg_execution_time_ms: 3000
+ lora_default_confidence: 0.99
+ traditional_default_confidence: 0.95
+ lora_default_success_rate: 0.98
+ traditional_default_success_rate: 0.95
+
+# Embedding models configuration
+embedding_models:
+ qwen3_model_path: "models/Qwen3-Embedding-0.6B"
+ gemma_model_path: "models/embeddinggemma-300m"
+ use_cpu: true
+
+# API configuration
+api:
+ batch_classification:
+ max_batch_size: 100
+ concurrency_threshold: 5
+ max_concurrency: 8
+ metrics:
+ enabled: true
+ detailed_goroutine_tracking: true
+ high_resolution_timing: false
+ sample_rate: 1.0
+ duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
+ size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
+
+# Observability configuration
+observability:
+ tracing:
+ enabled: false
+ provider: "opentelemetry"
+ exporter:
+ type: "otlp"
+ endpoint: "jaeger:4317"
+ insecure: true
+ sampling:
+ type: "always_on"
+ rate: 1.0
+ resource:
+ service_name: "vllm-semantic-router"
+ service_version: "v0.1.0"
+ deployment_environment: "development"
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/01-basic.yaml b/src/semantic-router/pkg/k8s/testdata/input/01-basic.yaml
new file mode 100644
index 000000000..9bd351c47
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/01-basic.yaml
@@ -0,0 +1,90 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: test-pool
+ namespace: default
+spec:
+ defaultModel: "qwen-2.5-7b"
+ models:
+ - name: "qwen-2.5-7b"
+ reasoningFamily: "qwen3"
+ pricing:
+ inputTokenPrice: 0.0000005 # $0.5 per 1M input tokens
+ outputTokenPrice: 0.000001 # $1 per 1M output tokens
+ loras:
+ - name: "tech-support"
+ description: "Technical support specialist"
+
+ - name: "qwen-2.5-72b"
+ reasoningFamily: "qwen3"
+ pricing:
+ inputTokenPrice: 0.000003 # $3 per 1M input tokens
+ outputTokenPrice: 0.000006 # $6 per 1M output tokens
+ loras:
+ - name: "advanced-reasoning"
+ description: "Advanced reasoning and problem solving"
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: test-route
+ namespace: default
+spec:
+ signals:
+ keywords:
+ - name: "urgent"
+ operator: "OR"
+ keywords: ["urgent", "emergency", "asap"]
+ caseSensitive: false
+ embeddings:
+ - name: "tech_support"
+ threshold: 0.75
+ candidates:
+ - "I need help with technical issues"
+ - "Can you help me troubleshoot this problem?"
+ - "Something is not working correctly"
+ aggregationMethod: "max"
+ domains:
+ - name: "computer_science"
+ description: "Computer science and programming"
+ - name: "math"
+ description: "Mathematics and quantitative reasoning"
+
+ decisions:
+ - name: "urgent_tech"
+ priority: 100
+ description: "Urgent technical support requests - use large model with reasoning"
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "urgent"
+ - type: "embedding"
+ name: "tech_support"
+ modelRefs:
+ - model: "qwen-2.5-72b"
+ loraName: "advanced-reasoning"
+ useReasoning: true
+ reasoningEffort: "high"
+ plugins:
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.9
+
+ - name: "general_tech"
+ priority: 50
+ description: "General technical queries - use small model for efficiency"
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "embedding"
+ name: "tech_support"
+ - type: "domain"
+ name: "computer_science"
+ modelRefs:
+ - model: "qwen-2.5-7b"
+ loraName: "tech-support"
+ useReasoning: false
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/02-keyword-only.yaml b/src/semantic-router/pkg/k8s/testdata/input/02-keyword-only.yaml
new file mode 100644
index 000000000..d58b1234f
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/02-keyword-only.yaml
@@ -0,0 +1,70 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: keyword-pool
+ namespace: default
+spec:
+ defaultModel: "gemma-2-9b"
+ models:
+ - name: "gemma-2-9b"
+ pricing:
+ inputTokenPrice: 0.0000004 # $0.4 per 1M input tokens - fast for simple keyword matching
+ outputTokenPrice: 0.0000008 # $0.8 per 1M output tokens
+ loras:
+ - name: "greeting-handler"
+ description: "Optimized for greeting responses"
+
+ - name: "gemma-2-27b"
+ pricing:
+ inputTokenPrice: 0.000002 # $2 per 1M input tokens - better for complex urgent requests
+ outputTokenPrice: 0.000004 # $4 per 1M output tokens
+ loras:
+ - name: "urgent-specialist"
+ description: "Specialized in handling urgent requests"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: keyword-route
+ namespace: default
+spec:
+ signals:
+ keywords:
+ - name: "urgent"
+ operator: "OR"
+ keywords: ["urgent", "emergency", "critical", "asap"]
+ caseSensitive: false
+ - name: "greeting"
+ operator: "OR"
+ keywords: ["hello", "hi", "hey", "greetings"]
+ caseSensitive: false
+
+ decisions:
+ - name: "urgent_request"
+ description: "Handle urgent requests with larger model"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "urgent"
+ modelRefs:
+ - model: "gemma-2-27b"
+ loraName: "urgent-specialist"
+ useReasoning: false
+
+ - name: "greeting_response"
+ description: "Handle greetings with fast small model"
+ priority: 50
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "greeting"
+ modelRefs:
+ - model: "gemma-2-9b"
+ loraName: "greeting-handler"
+ useReasoning: false
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/03-embedding-only.yaml b/src/semantic-router/pkg/k8s/testdata/input/03-embedding-only.yaml
new file mode 100644
index 000000000..fd24225b1
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/03-embedding-only.yaml
@@ -0,0 +1,79 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: embedding-pool
+ namespace: default
+spec:
+ defaultModel: "deepseek-v3"
+ models:
+ - name: "deepseek-v3"
+ reasoningFamily: "deepseek"
+ pricing:
+ inputTokenPrice: 0.00000027 # $0.27 per 1M input tokens - efficient for semantic matching
+ outputTokenPrice: 0.0000011 # $1.1 per 1M output tokens
+ loras:
+ - name: "customer-support"
+ description: "Customer support specialist"
+
+ - name: "deepseek-r1"
+ reasoningFamily: "deepseek"
+ pricing:
+ inputTokenPrice: 0.00000055 # $0.55 per 1M input tokens - better reasoning
+ outputTokenPrice: 0.0000022 # $2.2 per 1M output tokens
+ loras:
+ - name: "technical-expert"
+ description: "Technical problem solving expert"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: embedding-route
+ namespace: default
+spec:
+ signals:
+ embeddings:
+ - name: "customer_support"
+ threshold: 0.75
+ candidates:
+ - "I need help with my account"
+ - "Can you assist me with a problem?"
+ - "I have a question about my order"
+ aggregationMethod: "max"
+ - name: "technical_issue"
+ threshold: 0.80
+ candidates:
+ - "The system is not working properly"
+ - "I'm experiencing technical difficulties"
+ - "There's a bug in the application"
+ aggregationMethod: "mean"
+
+ decisions:
+ - name: "tech_troubleshoot"
+ description: "Technical troubleshooting - use reasoning model"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "embedding"
+ name: "technical_issue"
+ modelRefs:
+ - model: "deepseek-r1"
+ loraName: "technical-expert"
+ useReasoning: true
+ reasoningEffort: "high"
+
+ - name: "support_ticket"
+ description: "Customer support requests - use fast model"
+ priority: 80
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "embedding"
+ name: "customer_support"
+ modelRefs:
+ - model: "deepseek-v3"
+ loraName: "customer-support"
+ useReasoning: false
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/04-domain-only.yaml b/src/semantic-router/pkg/k8s/testdata/input/04-domain-only.yaml
new file mode 100644
index 000000000..380e31ed1
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/04-domain-only.yaml
@@ -0,0 +1,76 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: domain-pool
+ namespace: default
+spec:
+ defaultModel: "mistral-7b"
+ models:
+ - name: "mistral-7b"
+ pricing:
+ inputTokenPrice: 0.0000007 # $0.7 per 1M - fast for simple STEM queries
+ outputTokenPrice: 0.0000014 # $1.4 per 1M
+ loras:
+ - name: "stem-tutor"
+ description: "STEM education tutor"
+
+ - name: "mistral-large"
+ pricing:
+ inputTokenPrice: 0.000003 # $3 per 1M - better for complex STEM problems
+ outputTokenPrice: 0.000009 # $9 per 1M
+ loras:
+ - name: "math-expert"
+ description: "Mathematics specialist"
+ - name: "science-expert"
+ description: "Science specialist"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: domain-route
+ namespace: default
+spec:
+ signals:
+ domains:
+ - name: "math"
+ description: "Mathematics and quantitative reasoning"
+ - name: "physics"
+ description: "Physics and physical sciences"
+ - name: "computer_science"
+ description: "Computer science and programming"
+ - name: "chemistry"
+ description: "Chemistry and chemical sciences"
+
+ decisions:
+ - name: "stem_query"
+ description: "Complex STEM domain queries - use large model"
+ priority: 100
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ - type: "domain"
+ name: "physics"
+ - type: "domain"
+ name: "computer_science"
+ modelRefs:
+ - model: "mistral-large"
+ loraName: "math-expert"
+ useReasoning: false
+
+ - name: "chemistry_query"
+ description: "Chemistry domain queries - use small model"
+ priority: 80
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
+ - model: "mistral-7b"
+ loraName: "stem-tutor"
+ useReasoning: false
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/05-keyword-embedding.yaml b/src/semantic-router/pkg/k8s/testdata/input/05-keyword-embedding.yaml
new file mode 100644
index 000000000..7eb126f1c
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/05-keyword-embedding.yaml
@@ -0,0 +1,82 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: hybrid-pool
+ namespace: default
+spec:
+ defaultModel: "qwen-2.5-14b"
+ models:
+ - name: "qwen-2.5-14b"
+ reasoningFamily: "qwen3"
+ pricing:
+ inputTokenPrice: 0.000001 # $1 per 1M - balanced performance
+ outputTokenPrice: 0.000002 # $2 per 1M
+ loras:
+ - name: "support-agent"
+ description: "Customer support agent"
+
+ - name: "qwen-2.5-72b"
+ reasoningFamily: "qwen3"
+ pricing:
+ inputTokenPrice: 0.000003 # $3 per 1M - for urgent/complex issues
+ outputTokenPrice: 0.000006 # $6 per 1M
+ loras:
+ - name: "emergency-specialist"
+ description: "Emergency support specialist"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: hybrid-route
+ namespace: default
+spec:
+ signals:
+ keywords:
+ - name: "urgent"
+ operator: "OR"
+ keywords: ["urgent", "emergency", "critical"]
+ caseSensitive: false
+
+ embeddings:
+ - name: "support_request"
+ threshold: 0.70
+ candidates:
+ - "I need immediate assistance"
+ - "Please help me resolve this issue"
+ - "Can someone help me urgently?"
+ aggregationMethod: "max"
+
+ decisions:
+ - name: "urgent_support"
+ description: "Urgent support requests - use large model with reasoning"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "urgent"
+ - type: "embedding"
+ name: "support_request"
+ modelRefs:
+ - model: "qwen-2.5-72b"
+ loraName: "emergency-specialist"
+ useReasoning: true
+ reasoningEffort: "high"
+
+ - name: "general_support"
+ description: "General support requests - use small model"
+ priority: 50
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "keyword"
+ name: "urgent"
+ - type: "embedding"
+ name: "support_request"
+ modelRefs:
+ - model: "qwen-2.5-14b"
+ loraName: "support-agent"
+ useReasoning: false
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/06-keyword-domain.yaml b/src/semantic-router/pkg/k8s/testdata/input/06-keyword-domain.yaml
new file mode 100644
index 000000000..49e205d17
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/06-keyword-domain.yaml
@@ -0,0 +1,81 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: keyword-domain-pool
+ namespace: default
+spec:
+ defaultModel: "deepseek-v3"
+ models:
+ - name: "deepseek-v3"
+ reasoningFamily: "deepseek"
+ pricing:
+ inputTokenPrice: 0.00000027 # $0.27 per 1M - efficient for homework help
+ outputTokenPrice: 0.0000011 # $1.1 per 1M
+ loras:
+ - name: "homework-helper"
+ description: "Homework assistance specialist"
+
+ - name: "deepseek-r1"
+ reasoningFamily: "deepseek"
+ pricing:
+ inputTokenPrice: 0.00000055 # $0.55 per 1M - better for complex academic problems
+ outputTokenPrice: 0.0000022 # $2.2 per 1M
+ loras:
+ - name: "academic-expert"
+ description: "Advanced academic problem solver"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: keyword-domain-route
+ namespace: default
+spec:
+ signals:
+ keywords:
+ - name: "homework"
+ operator: "OR"
+ keywords: ["homework", "assignment", "exercise", "problem set"]
+ caseSensitive: false
+
+ domains:
+ - name: "math"
+ description: "Mathematics and quantitative reasoning"
+ - name: "physics"
+ description: "Physics and physical sciences"
+ - name: "chemistry"
+ description: "Chemistry and chemical sciences"
+
+ decisions:
+ - name: "math_homework"
+ description: "Math homework assistance - use reasoning model"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "homework"
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: "deepseek-r1"
+ loraName: "academic-expert"
+ useReasoning: true
+ reasoningEffort: "medium"
+
+ - name: "science_homework"
+ description: "Science homework assistance - use fast model"
+ priority: 90
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "homework"
+ - type: "domain"
+ name: "physics"
+ modelRefs:
+ - model: "deepseek-v3"
+ loraName: "homework-helper"
+ useReasoning: false
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/07-domain-embedding.yaml b/src/semantic-router/pkg/k8s/testdata/input/07-domain-embedding.yaml
new file mode 100644
index 000000000..281f0c769
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/07-domain-embedding.yaml
@@ -0,0 +1,81 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: domain-embedding-pool
+ namespace: default
+spec:
+ defaultModel: "gpt-4o-mini"
+ models:
+ - name: "gpt-4o-mini"
+ pricing:
+ inputTokenPrice: 0.00000015 # $0.15 per 1M - fast for research queries
+ outputTokenPrice: 0.0000006 # $0.6 per 1M
+ loras:
+ - name: "research-assistant"
+ description: "Research assistant"
+
+ - name: "gpt-4o"
+ pricing:
+ inputTokenPrice: 0.0000025 # $2.5 per 1M - better for complex research
+ outputTokenPrice: 0.00001 # $10 per 1M
+ loras:
+ - name: "research-expert"
+ description: "Advanced research specialist"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: domain-embedding-route
+ namespace: default
+spec:
+ signals:
+ embeddings:
+ - name: "research_question"
+ threshold: 0.78
+ candidates:
+ - "Can you explain the theory behind this concept?"
+ - "What are the latest research findings on this topic?"
+ - "I'm conducting research and need detailed information"
+ aggregationMethod: "max"
+
+ domains:
+ - name: "biology"
+ description: "Biology and life sciences"
+ - name: "chemistry"
+ description: "Chemistry and chemical sciences"
+ - name: "physics"
+ description: "Physics and physical sciences"
+
+ decisions:
+ - name: "biology_research"
+ description: "Biology research queries"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "embedding"
+ name: "research_question"
+ - type: "domain"
+ name: "biology"
+ modelRefs:
+ - model: "gpt-4o"
+ use_reasoning: true
+ reasoning_effort: "high"
+
+ - name: "general_science_research"
+ description: "General science research"
+ priority: 80
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "embedding"
+ name: "research_question"
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
+ - model: "gpt-4o"
+ use_reasoning: true
+ reasoning_effort: "medium"
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/08-keyword-embedding-domain.yaml b/src/semantic-router/pkg/k8s/testdata/input/08-keyword-embedding-domain.yaml
new file mode 100644
index 000000000..f7cdfbc21
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/08-keyword-embedding-domain.yaml
@@ -0,0 +1,91 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: full-signal-pool
+ namespace: default
+spec:
+ defaultModel: "qwen-2.5-32b"
+ models:
+ - name: "qwen-2.5-32b"
+ reasoningFamily: "qwen3"
+ pricing:
+ inputTokenPrice: 0.000002 # $2 per 1M - balanced for multi-signal routing
+ outputTokenPrice: 0.000004 # $4 per 1M
+ loras:
+ - name: "multi-domain-expert"
+ description: "Multi-domain specialist"
+
+ - name: "qwen-2.5-72b"
+ reasoningFamily: "qwen3"
+ pricing:
+ inputTokenPrice: 0.000003 # $3 per 1M - for complex multi-signal queries
+ outputTokenPrice: 0.000006 # $6 per 1M
+ loras:
+ - name: "advanced-multi-domain"
+ description: "Advanced multi-domain expert"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: full-signal-route
+ namespace: default
+spec:
+ signals:
+ keywords:
+ - name: "urgent"
+ operator: "OR"
+ keywords: ["urgent", "emergency", "critical"]
+ caseSensitive: false
+
+ embeddings:
+ - name: "technical_help"
+ threshold: 0.75
+ candidates:
+ - "I need technical assistance with this problem"
+ - "Can you help me debug this issue?"
+ - "Something is broken and needs fixing"
+ aggregationMethod: "max"
+
+ domains:
+ - name: "computer_science"
+ description: "Computer science and programming"
+ - name: "engineering"
+ description: "Engineering and technical problem-solving"
+ - name: "math"
+ description: "Mathematics and quantitative reasoning"
+
+ decisions:
+ - name: "urgent_tech_cs"
+ description: "Urgent technical computer science issues"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "urgent"
+ - type: "embedding"
+ name: "technical_help"
+ - type: "domain"
+ name: "computer_science"
+ modelRefs:
+ - model: "qwen-2.5-72b"
+ use_reasoning: true
+ reasoning_effort: "high"
+
+ - name: "tech_engineering"
+ description: "Technical engineering queries"
+ priority: 80
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "embedding"
+ name: "technical_help"
+ - type: "domain"
+ name: "engineering"
+ modelRefs:
+ - model: "qwen-2.5-72b"
+ use_reasoning: true
+ reasoning_effort: "medium"
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/09-keyword-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/input/09-keyword-plugin.yaml
new file mode 100644
index 000000000..e07a896a1
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/09-keyword-plugin.yaml
@@ -0,0 +1,64 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: keyword-plugin-pool
+ namespace: default
+spec:
+ defaultModel: "llama-3.3-70b"
+ models:
+ - name: "llama-3.3-70b"
+ pricing:
+ inputTokenPrice: 0.0000006 # $0.6 per 1M - with semantic cache
+ outputTokenPrice: 0.0000012 # $1.2 per 1M
+ loras:
+ - name: "faq-cached"
+ description: "FAQ with semantic caching"
+
+ - name: "llama-3.1-405b"
+ pricing:
+ inputTokenPrice: 0.000003 # $3 per 1M - for complex cached queries
+ outputTokenPrice: 0.000015 # $15 per 1M
+ loras:
+ - name: "expert-cached"
+ description: "Expert responses with caching"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: keyword-plugin-route
+ namespace: default
+spec:
+ signals:
+ keywords:
+ - name: "faq"
+ operator: "OR"
+ keywords: ["faq", "frequently asked", "common question"]
+ caseSensitive: false
+
+ decisions:
+ - name: "cached_faq"
+ description: "FAQ with semantic cache"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "faq"
+ modelRefs:
+ - model: "llama-3.3-70b"
+ use_reasoning: false
+ plugins:
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
+ - type: "header_mutation"
+ configuration:
+ add:
+ - name: "X-Cache-Strategy"
+ value: "aggressive"
+ - name: "X-Route-Type"
+ value: "keyword"
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/10-embedding-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/input/10-embedding-plugin.yaml
new file mode 100644
index 000000000..57c634ccc
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/10-embedding-plugin.yaml
@@ -0,0 +1,68 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: embedding-plugin-pool
+ namespace: default
+spec:
+ defaultModel: "deepseek-v3"
+ models:
+ - name: "deepseek-v3"
+ reasoningFamily: "deepseek"
+ pricing:
+ inputTokenPrice: 0.00000027 # $0.27 per 1M - with jailbreak protection
+ outputTokenPrice: 0.0000011 # $1.1 per 1M
+ loras:
+ - name: "secure-assistant"
+ description: "Security-aware assistant"
+
+ - name: "deepseek-r1"
+ reasoningFamily: "deepseek"
+ pricing:
+ inputTokenPrice: 0.00000055 # $0.55 per 1M - advanced security
+ outputTokenPrice: 0.0000022 # $2.2 per 1M
+ loras:
+ - name: "security-expert"
+ description: "Advanced security specialist"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: embedding-plugin-route
+ namespace: default
+spec:
+ signals:
+ embeddings:
+ - name: "sensitive_data"
+ threshold: 0.80
+ candidates:
+ - "I need to share my personal information"
+ - "Here is my credit card number"
+ - "My social security number is"
+ aggregationMethod: "max"
+
+ decisions:
+ - name: "pii_protected"
+ description: "PII-protected queries"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "embedding"
+ name: "sensitive_data"
+ modelRefs:
+ - model: "deepseek-r1"
+ use_reasoning: true
+ reasoning_effort: "medium"
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ threshold: 0.8
+ pii_types_allowed: ["CREDIT_CARD", "SSN", "EMAIL"]
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ threshold: 0.85
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/11-domain-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/input/11-domain-plugin.yaml
new file mode 100644
index 000000000..49d9b5f7f
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/11-domain-plugin.yaml
@@ -0,0 +1,61 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: domain-plugin-pool
+ namespace: default
+spec:
+ defaultModel: "gpt-4o-mini"
+ models:
+ - name: "gpt-4o-mini"
+ pricing:
+ inputTokenPrice: 0.00000015 # $0.15 per 1M - for simple legal queries
+ outputTokenPrice: 0.0000006 # $0.6 per 1M
+ loras:
+ - name: "legal-assistant"
+ description: "Legal assistant"
+
+ - name: "gpt-4o"
+ pricing:
+ inputTokenPrice: 0.0000025 # $2.5 per 1M - for complex legal analysis
+ outputTokenPrice: 0.00001 # $10 per 1M
+ loras:
+ - name: "legal-expert"
+ description: "Legal domain specialist"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: domain-plugin-route
+ namespace: default
+spec:
+ signals:
+ domains:
+ - name: "law"
+ description: "Legal questions and law-related topics"
+ - name: "business"
+ description: "Business and management related queries"
+
+ decisions:
+ - name: "legal_advice"
+ description: "Legal domain with system prompt"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
+ - model: "gpt-4o"
+ use_reasoning: true
+ reasoning_effort: "high"
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a legal expert. Provide accurate legal information but remind users to consult a licensed attorney."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.90
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/12-keyword-embedding-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/input/12-keyword-embedding-plugin.yaml
new file mode 100644
index 000000000..aeb1b8631
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/12-keyword-embedding-plugin.yaml
@@ -0,0 +1,81 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: ke-plugin-pool
+ namespace: default
+spec:
+ defaultModel: "qwen-2.5-32b"
+ models:
+ - name: "qwen-2.5-32b"
+ reasoningFamily: "qwen3"
+ pricing:
+ inputTokenPrice: 0.000002 # $2 per 1M - with header mutation
+ outputTokenPrice: 0.000004 # $4 per 1M
+ loras:
+ - name: "api-router"
+ description: "API routing specialist"
+
+ - name: "qwen-2.5-72b"
+ reasoningFamily: "qwen3"
+ pricing:
+ inputTokenPrice: 0.000003 # $3 per 1M - advanced routing
+ outputTokenPrice: 0.000006 # $6 per 1M
+ loras:
+ - name: "advanced-router"
+ description: "Advanced API routing expert"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: ke-plugin-route
+ namespace: default
+spec:
+ signals:
+ keywords:
+ - name: "security"
+ operator: "OR"
+ keywords: ["security", "vulnerability", "exploit", "hack"]
+ caseSensitive: false
+
+ embeddings:
+ - name: "threat_detection"
+ threshold: 0.82
+ candidates:
+ - "I found a security vulnerability in the system"
+ - "There's a potential exploit that needs attention"
+ - "We need to patch this security issue"
+ aggregationMethod: "max"
+
+ decisions:
+ - name: "security_alert"
+ description: "Security-related queries with jailbreak protection"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "security"
+ - type: "embedding"
+ name: "threat_detection"
+ modelRefs:
+ - model: "qwen-2.5-72b"
+ use_reasoning: true
+ reasoning_effort: "high"
+ plugins:
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ threshold: 0.90
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a security expert. Provide helpful security guidance while being cautious about potential misuse."
+ - type: "header_mutation"
+ configuration:
+ add:
+ - name: "X-Security-Level"
+ value: "high"
+ - name: "X-Route-Type"
+ value: "keyword-embedding"
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/13-keyword-domain-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/input/13-keyword-domain-plugin.yaml
new file mode 100644
index 000000000..c3f231ab8
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/13-keyword-domain-plugin.yaml
@@ -0,0 +1,76 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: kd-plugin-pool
+ namespace: default
+spec:
+ defaultModel: "deepseek-v3"
+ models:
+ - name: "deepseek-v3"
+ reasoningFamily: "deepseek"
+ pricing:
+ inputTokenPrice: 0.00000027 # $0.27 per 1M - for general medical queries
+ outputTokenPrice: 0.0000011 # $1.1 per 1M
+ loras:
+ - name: "medical-assistant"
+ description: "Medical assistant"
+
+ - name: "deepseek-r1"
+ reasoningFamily: "deepseek"
+ pricing:
+ inputTokenPrice: 0.00000055 # $0.55 per 1M - for complex medical diagnosis
+ outputTokenPrice: 0.0000022 # $2.2 per 1M
+ loras:
+ - name: "medical-specialist"
+ description: "Medical domain expert"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: kd-plugin-route
+ namespace: default
+spec:
+ signals:
+ keywords:
+ - name: "diagnosis"
+ operator: "OR"
+ keywords: ["diagnosis", "symptoms", "treatment", "medical"]
+ caseSensitive: false
+
+ domains:
+ - name: "health"
+ description: "Health and medical information"
+ - name: "biology"
+ description: "Biology and life sciences"
+
+ decisions:
+ - name: "medical_query"
+ description: "Medical queries with PII protection"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "diagnosis"
+ - type: "domain"
+ name: "health"
+ modelRefs:
+ - model: "deepseek-r1"
+ use_reasoning: true
+ reasoning_effort: "high"
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ threshold: 0.9
+ pii_types_allowed: ["PERSON", "DATE_OF_BIRTH", "MEDICAL_RECORD"]
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a medical information assistant. Provide general health information but always advise users to consult healthcare professionals."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.88
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/14-domain-embedding-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/input/14-domain-embedding-plugin.yaml
new file mode 100644
index 000000000..1adc09d4f
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/14-domain-embedding-plugin.yaml
@@ -0,0 +1,81 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: de-plugin-pool
+ namespace: default
+spec:
+ defaultModel: "gpt-4o-mini"
+ models:
+ - name: "gpt-4o-mini"
+ pricing:
+ inputTokenPrice: 0.00000015 # $0.15 per 1M - for simple financial queries
+ outputTokenPrice: 0.0000006 # $0.6 per 1M
+ loras:
+ - name: "finance-assistant"
+ description: "Financial assistant"
+
+ - name: "gpt-4o"
+ pricing:
+ inputTokenPrice: 0.0000025 # $2.5 per 1M - for complex financial analysis
+ outputTokenPrice: 0.00001 # $10 per 1M
+ loras:
+ - name: "financial-advisor"
+ description: "Financial domain specialist"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: de-plugin-route
+ namespace: default
+spec:
+ signals:
+ embeddings:
+ - name: "financial_advice"
+ threshold: 0.78
+ candidates:
+ - "I need advice on investment strategies"
+ - "Can you help me with financial planning?"
+ - "What should I do with my retirement savings?"
+ aggregationMethod: "max"
+
+ domains:
+ - name: "economics"
+ description: "Economics and financial topics"
+ - name: "business"
+ description: "Business and management"
+
+ decisions:
+ - name: "investment_advice"
+ description: "Financial advice with comprehensive protection"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "embedding"
+ name: "financial_advice"
+ - type: "domain"
+ name: "economics"
+ modelRefs:
+ - model: "gpt-4o"
+ use_reasoning: true
+ reasoning_effort: "high"
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ threshold: 0.85
+ pii_types_allowed: ["CREDIT_CARD", "BANK_ACCOUNT", "SSN"]
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a financial information assistant. Provide general financial education but remind users this is not professional financial advice."
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ threshold: 0.85
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.92
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/15-keyword-embedding-domain-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/input/15-keyword-embedding-domain-plugin.yaml
new file mode 100644
index 000000000..d7e10596d
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/15-keyword-embedding-domain-plugin.yaml
@@ -0,0 +1,164 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: ked-plugin-pool
+ namespace: default
+spec:
+ defaultModel: "qwen-2.5-32b"
+ models:
+ - name: "qwen-2.5-32b"
+ reasoningFamily: "qwen3"
+ pricing:
+ inputTokenPrice: 0.000002 # $2 per 1M - for standard enterprise queries
+ outputTokenPrice: 0.000004 # $4 per 1M
+ loras:
+ - name: "enterprise-assistant"
+ description: "Enterprise assistant"
+
+ - name: "qwen-2.5-72b"
+ reasoningFamily: "qwen3"
+ pricing:
+ inputTokenPrice: 0.000003 # $3 per 1M - for complex enterprise scenarios
+ outputTokenPrice: 0.000006 # $6 per 1M
+ loras:
+ - name: "enterprise-specialist"
+ description: "Enterprise domain specialist"
+ - name: "compliance-expert"
+ description: "Compliance and security expert"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: ked-plugin-route
+ namespace: default
+spec:
+ signals:
+ keywords:
+ - name: "compliance"
+ operator: "OR"
+ keywords: ["compliance", "regulation", "audit", "policy"]
+ caseSensitive: false
+ - name: "confidential"
+ operator: "OR"
+ keywords: ["confidential", "sensitive", "private", "restricted"]
+ caseSensitive: false
+
+ embeddings:
+ - name: "business_analysis"
+ threshold: 0.76
+ candidates:
+ - "I need to analyze business metrics and performance"
+ - "Can you help me with strategic business planning?"
+ - "We need insights on market trends and competition"
+ aggregationMethod: "max"
+ - name: "legal_review"
+ threshold: 0.80
+ candidates:
+ - "This document needs legal review and compliance check"
+ - "We need to ensure regulatory compliance"
+ - "Can you review this for legal implications?"
+ aggregationMethod: "mean"
+
+ domains:
+ - name: "business"
+ description: "Business and management"
+ - name: "law"
+ description: "Legal questions and law-related topics"
+ - name: "economics"
+ description: "Economics and financial topics"
+
+ decisions:
+ - name: "compliance_legal"
+ description: "Compliance and legal queries with full protection"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "compliance"
+ - type: "embedding"
+ name: "legal_review"
+ - type: "domain"
+ name: "law"
+ modelRefs:
+ - model: "qwen-2.5-72b"
+ use_reasoning: true
+ reasoning_effort: "high"
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ threshold: 0.9
+ pii_types_allowed: ["PERSON", "ORGANIZATION", "EMAIL", "PHONE_NUMBER"]
+ - type: "jailbreak"
+ configuration:
+ enabled: true
+ threshold: 0.88
+ - type: "system_prompt"
+ configuration:
+ system_prompt: "You are a legal compliance assistant. Provide accurate information about regulations and compliance requirements. Always remind users to consult legal professionals for specific advice."
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.93
+ - type: "header_mutation"
+ configuration:
+ add:
+ - name: "X-Compliance-Level"
+ value: "high"
+ - name: "X-Audit-Required"
+ value: "true"
+
+ - name: "confidential_business"
+ description: "Confidential business analysis"
+ priority: 90
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "confidential"
+ - type: "embedding"
+ name: "business_analysis"
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: "qwen-2.5-72b"
+ use_reasoning: true
+ reasoning_effort: "medium"
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
+ threshold: 0.85
+ pii_types_allowed: ["PERSON", "ORGANIZATION", "FINANCIAL_DATA"]
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.90
+ - type: "header_mutation"
+ configuration:
+ add:
+ - name: "X-Confidentiality"
+ value: "high"
+
+ - name: "general_business"
+ description: "General business and economics queries"
+ priority: 50
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "embedding"
+ name: "business_analysis"
+ - type: "domain"
+ name: "economics"
+ modelRefs:
+ - model: "qwen-2.5-72b"
+ use_reasoning: false
+ plugins:
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.85
+
diff --git a/src/semantic-router/pkg/k8s/testdata/input/16-keyword-embedding-domain-no-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/input/16-keyword-embedding-domain-no-plugin.yaml
new file mode 100644
index 000000000..4183a361f
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/input/16-keyword-embedding-domain-no-plugin.yaml
@@ -0,0 +1,107 @@
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentPool
+metadata:
+ name: ked-no-plugin-pool
+ namespace: default
+spec:
+ defaultModel: "qwen-2.5-14b"
+ models:
+ - name: "qwen-2.5-14b"
+ reasoningFamily: "qwen3"
+ pricing:
+ inputTokenPrice: 0.000001 # $1 per 1M - for general queries
+ outputTokenPrice: 0.000002 # $2 per 1M
+ loras:
+ - name: "general-assistant"
+ description: "General purpose assistant"
+
+ - name: "qwen-2.5-32b"
+ reasoningFamily: "qwen3"
+ pricing:
+ inputTokenPrice: 0.000002 # $2 per 1M - for complex general queries
+ outputTokenPrice: 0.000004 # $4 per 1M
+ loras:
+ - name: "advanced-assistant"
+ description: "Advanced general purpose assistant"
+
+---
+apiVersion: vllm.ai/v1alpha1
+kind: IntelligentRoute
+metadata:
+ name: ked-no-plugin-route
+ namespace: default
+spec:
+ signals:
+ keywords:
+ - name: "tutorial"
+ operator: "OR"
+ keywords: ["tutorial", "guide", "how-to", "learn"]
+ caseSensitive: false
+
+ embeddings:
+ - name: "learning_intent"
+ threshold: 0.72
+ candidates:
+ - "I want to learn about this topic"
+ - "Can you teach me how to do this?"
+ - "I'm trying to understand this concept"
+ aggregationMethod: "max"
+
+ domains:
+ - name: "computer_science"
+ description: "Computer science and programming"
+ - name: "math"
+ description: "Mathematics and quantitative reasoning"
+ - name: "engineering"
+ description: "Engineering and technical problem-solving"
+
+ decisions:
+ - name: "cs_tutorial"
+ description: "Computer science tutorials"
+ priority: 100
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "tutorial"
+ - type: "embedding"
+ name: "learning_intent"
+ - type: "domain"
+ name: "computer_science"
+ modelRefs:
+ - model: "qwen-2.5-32b"
+ use_reasoning: true
+ reasoning_effort: "medium"
+
+ - name: "math_tutorial"
+ description: "Math tutorials"
+ priority: 90
+ signals:
+ operator: "AND"
+ conditions:
+ - type: "keyword"
+ name: "tutorial"
+ - type: "embedding"
+ name: "learning_intent"
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: "qwen-2.5-32b"
+ use_reasoning: true
+ reasoning_effort: "high"
+
+ - name: "general_learning"
+ description: "General learning queries"
+ priority: 50
+ signals:
+ operator: "OR"
+ conditions:
+ - type: "keyword"
+ name: "tutorial"
+ - type: "embedding"
+ name: "learning_intent"
+ modelRefs:
+ - model: "qwen-2.5-32b"
+ use_reasoning: false
+
diff --git a/src/semantic-router/pkg/k8s/testdata/output/01-basic.yaml b/src/semantic-router/pkg/k8s/testdata/output/01-basic.yaml
new file mode 100644
index 000000000..dabcf8df0
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/01-basic.yaml
@@ -0,0 +1,178 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+keyword_rules:
+ - name: urgent
+ operator: OR
+ keywords:
+ - urgent
+ - emergency
+ - asap
+ case_sensitive: false
+embedding_rules:
+ - name: tech_support
+ threshold: 0.75
+ candidates:
+ - I need help with technical issues
+ - Can you help me troubleshoot this problem?
+ - Something is not working correctly
+ aggregation_method: max
+categories:
+ - name: computer_science
+ description: Computer science and programming
+ mmlu_categories:
+ - computer_science
+ - name: math
+ description: Mathematics and quantitative reasoning
+ mmlu_categories:
+ - math
+decisions:
+ - name: urgent_tech
+ description: Urgent technical support requests - use large model with reasoning
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: urgent
+ - type: embedding
+ name: tech_support
+ modelRefs:
+ - model: qwen-2.5-72b
+ lora_name: advanced-reasoning
+ use_reasoning: true
+ reasoning_effort: high
+ plugins:
+ - type: semantic-cache
+ configuration:
+ enabled: true
+ similarity_threshold: 0.9
+ - name: general_tech
+ description: General technical queries - use small model for efficiency
+ priority: 50
+ rules:
+ operator: OR
+ conditions:
+ - type: embedding
+ name: tech_support
+ - type: domain
+ name: computer_science
+ modelRefs:
+ - model: qwen-2.5-7b
+ lora_name: tech-support
+ use_reasoning: false
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ qwen-2.5-7b:
+ pricing:
+ prompt_per_1m: 0.5
+ completion_per_1m: 1
+ reasoning_family: qwen3
+ loras:
+ - name: tech-support
+ description: Technical support specialist
+ qwen-2.5-72b:
+ pricing:
+ prompt_per_1m: 3
+ completion_per_1m: 6
+ reasoning_family: qwen3
+ loras:
+ - name: advanced-reasoning
+ description: Advanced reasoning and problem solving
+default_model: qwen-2.5-7b
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/02-keyword-only.yaml b/src/semantic-router/pkg/k8s/testdata/output/02-keyword-only.yaml
new file mode 100644
index 000000000..3543494b9
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/02-keyword-only.yaml
@@ -0,0 +1,159 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+keyword_rules:
+ - name: urgent
+ operator: OR
+ keywords:
+ - urgent
+ - emergency
+ - critical
+ - asap
+ case_sensitive: false
+ - name: greeting
+ operator: OR
+ keywords:
+ - hello
+ - hi
+ - hey
+ - greetings
+ case_sensitive: false
+categories: []
+decisions:
+ - name: urgent_request
+ description: Handle urgent requests with larger model
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: urgent
+ modelRefs:
+ - model: gemma-2-27b
+ lora_name: urgent-specialist
+ use_reasoning: false
+ - name: greeting_response
+ description: Handle greetings with fast small model
+ priority: 50
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: greeting
+ modelRefs:
+ - model: gemma-2-9b
+ lora_name: greeting-handler
+ use_reasoning: false
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ gemma-2-9b:
+ pricing:
+ prompt_per_1m: 0.39999999999999997
+ completion_per_1m: 0.7999999999999999
+ loras:
+ - name: greeting-handler
+ description: Optimized for greeting responses
+ gemma-2-27b:
+ pricing:
+ prompt_per_1m: 2
+ completion_per_1m: 4
+ loras:
+ - name: urgent-specialist
+ description: Specialized in handling urgent requests
+default_model: gemma-2-9b
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/03-embedding-only.yaml b/src/semantic-router/pkg/k8s/testdata/output/03-embedding-only.yaml
new file mode 100644
index 000000000..c07332074
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/03-embedding-only.yaml
@@ -0,0 +1,160 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+embedding_rules:
+ - name: customer_support
+ threshold: 0.75
+ candidates:
+ - I need help with my account
+ - Can you assist me with a problem?
+ - I have a question about my order
+ aggregation_method: max
+ - name: technical_issue
+ threshold: 0.8
+ candidates:
+ - The system is not working properly
+ - I'm experiencing technical difficulties
+ - There's a bug in the application
+ aggregation_method: mean
+categories: []
+decisions:
+ - name: tech_troubleshoot
+ description: Technical troubleshooting - use reasoning model
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: embedding
+ name: technical_issue
+ modelRefs:
+ - model: deepseek-r1
+ lora_name: technical-expert
+ use_reasoning: true
+ reasoning_effort: high
+ - name: support_ticket
+ description: Customer support requests - use fast model
+ priority: 80
+ rules:
+ operator: AND
+ conditions:
+ - type: embedding
+ name: customer_support
+ modelRefs:
+ - model: deepseek-v3
+ lora_name: customer-support
+ use_reasoning: false
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ deepseek-r1:
+ pricing:
+ prompt_per_1m: 0.55
+ completion_per_1m: 2.2
+ reasoning_family: deepseek
+ loras:
+ - name: technical-expert
+ description: Technical problem solving expert
+ deepseek-v3:
+ pricing:
+ prompt_per_1m: 0.27
+ completion_per_1m: 1.1
+ reasoning_family: deepseek
+ loras:
+ - name: customer-support
+ description: Customer support specialist
+default_model: deepseek-v3
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/04-domain-only.yaml b/src/semantic-router/pkg/k8s/testdata/output/04-domain-only.yaml
new file mode 100644
index 000000000..574ddd642
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/04-domain-only.yaml
@@ -0,0 +1,164 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+categories:
+ - name: math
+ description: Mathematics and quantitative reasoning
+ mmlu_categories:
+ - math
+ - name: physics
+ description: Physics and physical sciences
+ mmlu_categories:
+ - physics
+ - name: computer_science
+ description: Computer science and programming
+ mmlu_categories:
+ - computer_science
+ - name: chemistry
+ description: Chemistry and chemical sciences
+ mmlu_categories:
+ - chemistry
+decisions:
+ - name: stem_query
+ description: Complex STEM domain queries - use large model
+ priority: 100
+ rules:
+ operator: OR
+ conditions:
+ - type: domain
+ name: math
+ - type: domain
+ name: physics
+ - type: domain
+ name: computer_science
+ modelRefs:
+ - model: mistral-large
+ lora_name: math-expert
+ use_reasoning: false
+ - name: chemistry_query
+ description: Chemistry domain queries - use small model
+ priority: 80
+ rules:
+ operator: AND
+ conditions:
+ - type: domain
+ name: chemistry
+ modelRefs:
+ - model: mistral-7b
+ lora_name: stem-tutor
+ use_reasoning: false
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ mistral-7b:
+ pricing:
+ prompt_per_1m: 0.7
+ completion_per_1m: 1.4
+ loras:
+ - name: stem-tutor
+ description: STEM education tutor
+ mistral-large:
+ pricing:
+ prompt_per_1m: 3
+ completion_per_1m: 9
+ loras:
+ - name: math-expert
+ description: Mathematics specialist
+ - name: science-expert
+ description: Science specialist
+default_model: mistral-7b
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/05-keyword-embedding.yaml b/src/semantic-router/pkg/k8s/testdata/output/05-keyword-embedding.yaml
new file mode 100644
index 000000000..262173273
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/05-keyword-embedding.yaml
@@ -0,0 +1,165 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+keyword_rules:
+ - name: urgent
+ operator: OR
+ keywords:
+ - urgent
+ - emergency
+ - critical
+ case_sensitive: false
+embedding_rules:
+ - name: support_request
+ threshold: 0.7
+ candidates:
+ - I need immediate assistance
+ - Please help me resolve this issue
+ - Can someone help me urgently?
+ aggregation_method: max
+categories: []
+decisions:
+ - name: urgent_support
+ description: Urgent support requests - use large model with reasoning
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: urgent
+ - type: embedding
+ name: support_request
+ modelRefs:
+ - model: qwen-2.5-72b
+ lora_name: emergency-specialist
+ use_reasoning: true
+ reasoning_effort: high
+ - name: general_support
+ description: General support requests - use small model
+ priority: 50
+ rules:
+ operator: OR
+ conditions:
+ - type: keyword
+ name: urgent
+ - type: embedding
+ name: support_request
+ modelRefs:
+ - model: qwen-2.5-14b
+ lora_name: support-agent
+ use_reasoning: false
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ qwen-2.5-14b:
+ pricing:
+ prompt_per_1m: 1
+ completion_per_1m: 2
+ reasoning_family: qwen3
+ loras:
+ - name: support-agent
+ description: Customer support agent
+ qwen-2.5-72b:
+ pricing:
+ prompt_per_1m: 3
+ completion_per_1m: 6
+ reasoning_family: qwen3
+ loras:
+ - name: emergency-specialist
+ description: Emergency support specialist
+default_model: qwen-2.5-14b
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/06-keyword-domain.yaml b/src/semantic-router/pkg/k8s/testdata/output/06-keyword-domain.yaml
new file mode 100644
index 000000000..ea9846fb3
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/06-keyword-domain.yaml
@@ -0,0 +1,170 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+keyword_rules:
+ - name: homework
+ operator: OR
+ keywords:
+ - homework
+ - assignment
+ - exercise
+ - problem set
+ case_sensitive: false
+categories:
+ - name: math
+ description: Mathematics and quantitative reasoning
+ mmlu_categories:
+ - math
+ - name: physics
+ description: Physics and physical sciences
+ mmlu_categories:
+ - physics
+ - name: chemistry
+ description: Chemistry and chemical sciences
+ mmlu_categories:
+ - chemistry
+decisions:
+ - name: math_homework
+ description: Math homework assistance - use reasoning model
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: homework
+ - type: domain
+ name: math
+ modelRefs:
+ - model: deepseek-r1
+ lora_name: academic-expert
+ use_reasoning: true
+ reasoning_effort: medium
+ - name: science_homework
+ description: Science homework assistance - use fast model
+ priority: 90
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: homework
+ - type: domain
+ name: physics
+ modelRefs:
+ - model: deepseek-v3
+ lora_name: homework-helper
+ use_reasoning: false
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ deepseek-r1:
+ pricing:
+ prompt_per_1m: 0.55
+ completion_per_1m: 2.2
+ reasoning_family: deepseek
+ loras:
+ - name: academic-expert
+ description: Advanced academic problem solver
+ deepseek-v3:
+ pricing:
+ prompt_per_1m: 0.27
+ completion_per_1m: 1.1
+ reasoning_family: deepseek
+ loras:
+ - name: homework-helper
+ description: Homework assistance specialist
+default_model: deepseek-v3
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/07-domain-embedding.yaml b/src/semantic-router/pkg/k8s/testdata/output/07-domain-embedding.yaml
new file mode 100644
index 000000000..82bd9c44b
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/07-domain-embedding.yaml
@@ -0,0 +1,164 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+embedding_rules:
+ - name: research_question
+ threshold: 0.78
+ candidates:
+ - Can you explain the theory behind this concept?
+ - What are the latest research findings on this topic?
+ - I'm conducting research and need detailed information
+ aggregation_method: max
+categories:
+ - name: biology
+ description: Biology and life sciences
+ mmlu_categories:
+ - biology
+ - name: chemistry
+ description: Chemistry and chemical sciences
+ mmlu_categories:
+ - chemistry
+ - name: physics
+ description: Physics and physical sciences
+ mmlu_categories:
+ - physics
+decisions:
+ - name: biology_research
+ description: Biology research queries
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: embedding
+ name: research_question
+ - type: domain
+ name: biology
+ modelRefs:
+ - model: gpt-4o
+ use_reasoning: false
+ - name: general_science_research
+ description: General science research
+ priority: 80
+ rules:
+ operator: AND
+ conditions:
+ - type: embedding
+ name: research_question
+ - type: domain
+ name: chemistry
+ modelRefs:
+ - model: gpt-4o
+ use_reasoning: false
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ gpt-4o:
+ pricing:
+ prompt_per_1m: 2.5
+ completion_per_1m: 10
+ loras:
+ - name: research-expert
+ description: Advanced research specialist
+ gpt-4o-mini:
+ pricing:
+ prompt_per_1m: 0.15
+ completion_per_1m: 0.6
+ loras:
+ - name: research-assistant
+ description: Research assistant
+default_model: gpt-4o-mini
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/08-keyword-embedding-domain.yaml b/src/semantic-router/pkg/k8s/testdata/output/08-keyword-embedding-domain.yaml
new file mode 100644
index 000000000..7ea77a6ef
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/08-keyword-embedding-domain.yaml
@@ -0,0 +1,176 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+keyword_rules:
+ - name: urgent
+ operator: OR
+ keywords:
+ - urgent
+ - emergency
+ - critical
+ case_sensitive: false
+embedding_rules:
+ - name: technical_help
+ threshold: 0.75
+ candidates:
+ - I need technical assistance with this problem
+ - Can you help me debug this issue?
+ - Something is broken and needs fixing
+ aggregation_method: max
+categories:
+ - name: computer_science
+ description: Computer science and programming
+ mmlu_categories:
+ - computer_science
+ - name: engineering
+ description: Engineering and technical problem-solving
+ mmlu_categories:
+ - engineering
+ - name: math
+ description: Mathematics and quantitative reasoning
+ mmlu_categories:
+ - math
+decisions:
+ - name: urgent_tech_cs
+ description: Urgent technical computer science issues
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: urgent
+ - type: embedding
+ name: technical_help
+ - type: domain
+ name: computer_science
+ modelRefs:
+ - model: qwen-2.5-72b
+ use_reasoning: false
+ - name: tech_engineering
+ description: Technical engineering queries
+ priority: 80
+ rules:
+ operator: AND
+ conditions:
+ - type: embedding
+ name: technical_help
+ - type: domain
+ name: engineering
+ modelRefs:
+ - model: qwen-2.5-72b
+ use_reasoning: false
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ qwen-2.5-32b:
+ pricing:
+ prompt_per_1m: 2
+ completion_per_1m: 4
+ reasoning_family: qwen3
+ loras:
+ - name: multi-domain-expert
+ description: Multi-domain specialist
+ qwen-2.5-72b:
+ pricing:
+ prompt_per_1m: 3
+ completion_per_1m: 6
+ reasoning_family: qwen3
+ loras:
+ - name: advanced-multi-domain
+ description: Advanced multi-domain expert
+default_model: qwen-2.5-32b
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/09-keyword-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/output/09-keyword-plugin.yaml
new file mode 100644
index 000000000..f75fe4941
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/09-keyword-plugin.yaml
@@ -0,0 +1,149 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+keyword_rules:
+ - name: faq
+ operator: OR
+ keywords:
+ - faq
+ - frequently asked
+ - common question
+ case_sensitive: false
+categories: []
+decisions:
+ - name: cached_faq
+ description: FAQ with semantic cache
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: faq
+ modelRefs:
+ - model: llama-3.3-70b
+ use_reasoning: false
+ plugins:
+ - type: semantic-cache
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
+ - type: header_mutation
+ configuration:
+ add:
+ - name: X-Cache-Strategy
+ value: aggressive
+ - name: X-Route-Type
+ value: keyword
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ llama-3.1-405b:
+ pricing:
+ prompt_per_1m: 3
+ completion_per_1m: 15
+ loras:
+ - name: expert-cached
+ description: Expert responses with caching
+ llama-3.3-70b:
+ pricing:
+ prompt_per_1m: 0.6
+ completion_per_1m: 1.2
+ loras:
+ - name: faq-cached
+ description: FAQ with semantic caching
+default_model: llama-3.3-70b
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/10-embedding-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/output/10-embedding-plugin.yaml
new file mode 100644
index 000000000..667b7ffe1
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/10-embedding-plugin.yaml
@@ -0,0 +1,152 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+embedding_rules:
+ - name: sensitive_data
+ threshold: 0.8
+ candidates:
+ - I need to share my personal information
+ - Here is my credit card number
+ - My social security number is
+ aggregation_method: max
+categories: []
+decisions:
+ - name: pii_protected
+ description: PII-protected queries
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: embedding
+ name: sensitive_data
+ modelRefs:
+ - model: deepseek-r1
+ use_reasoning: false
+ plugins:
+ - type: pii
+ configuration:
+ enabled: true
+ pii_types_allowed:
+ - CREDIT_CARD
+ - SSN
+ - EMAIL
+ threshold: 0.8
+ - type: jailbreak
+ configuration:
+ enabled: true
+ threshold: 0.85
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ deepseek-r1:
+ pricing:
+ prompt_per_1m: 0.55
+ completion_per_1m: 2.2
+ reasoning_family: deepseek
+ loras:
+ - name: security-expert
+ description: Advanced security specialist
+ deepseek-v3:
+ pricing:
+ prompt_per_1m: 0.27
+ completion_per_1m: 1.1
+ reasoning_family: deepseek
+ loras:
+ - name: secure-assistant
+ description: Security-aware assistant
+default_model: deepseek-v3
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/11-domain-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/output/11-domain-plugin.yaml
new file mode 100644
index 000000000..2d71964bc
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/11-domain-plugin.yaml
@@ -0,0 +1,145 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+categories:
+ - name: law
+ description: Legal questions and law-related topics
+ mmlu_categories:
+ - law
+ - name: business
+ description: Business and management related queries
+ mmlu_categories:
+ - business
+decisions:
+ - name: legal_advice
+ description: Legal domain with system prompt
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: domain
+ name: law
+ modelRefs:
+ - model: gpt-4o
+ use_reasoning: false
+ plugins:
+ - type: system_prompt
+ configuration:
+ system_prompt: You are a legal expert. Provide accurate legal information but remind users to consult a licensed attorney.
+ - type: semantic-cache
+ configuration:
+ enabled: true
+ similarity_threshold: 0.9
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ gpt-4o:
+ pricing:
+ prompt_per_1m: 2.5
+ completion_per_1m: 10
+ loras:
+ - name: legal-expert
+ description: Legal domain specialist
+ gpt-4o-mini:
+ pricing:
+ prompt_per_1m: 0.15
+ completion_per_1m: 0.6
+ loras:
+ - name: legal-assistant
+ description: Legal assistant
+default_model: gpt-4o-mini
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/12-keyword-embedding-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/output/12-keyword-embedding-plugin.yaml
new file mode 100644
index 000000000..98712c838
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/12-keyword-embedding-plugin.yaml
@@ -0,0 +1,165 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+keyword_rules:
+ - name: security
+ operator: OR
+ keywords:
+ - security
+ - vulnerability
+ - exploit
+ - hack
+ case_sensitive: false
+embedding_rules:
+ - name: threat_detection
+ threshold: 0.82
+ candidates:
+ - I found a security vulnerability in the system
+ - There's a potential exploit that needs attention
+ - We need to patch this security issue
+ aggregation_method: max
+categories: []
+decisions:
+ - name: security_alert
+ description: Security-related queries with jailbreak protection
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: security
+ - type: embedding
+ name: threat_detection
+ modelRefs:
+ - model: qwen-2.5-72b
+ use_reasoning: false
+ plugins:
+ - type: jailbreak
+ configuration:
+ enabled: true
+ threshold: 0.9
+ - type: system_prompt
+ configuration:
+ system_prompt: You are a security expert. Provide helpful security guidance while being cautious about potential misuse.
+ - type: header_mutation
+ configuration:
+ add:
+ - name: X-Security-Level
+ value: high
+ - name: X-Route-Type
+ value: keyword-embedding
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ qwen-2.5-32b:
+ pricing:
+ prompt_per_1m: 2
+ completion_per_1m: 4
+ reasoning_family: qwen3
+ loras:
+ - name: api-router
+ description: API routing specialist
+ qwen-2.5-72b:
+ pricing:
+ prompt_per_1m: 3
+ completion_per_1m: 6
+ reasoning_family: qwen3
+ loras:
+ - name: advanced-router
+ description: Advanced API routing expert
+default_model: qwen-2.5-32b
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/13-keyword-domain-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/output/13-keyword-domain-plugin.yaml
new file mode 100644
index 000000000..de126f08b
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/13-keyword-domain-plugin.yaml
@@ -0,0 +1,166 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+keyword_rules:
+ - name: diagnosis
+ operator: OR
+ keywords:
+ - diagnosis
+ - symptoms
+ - treatment
+ - medical
+ case_sensitive: false
+categories:
+ - name: health
+ description: Health and medical information
+ mmlu_categories:
+ - health
+ - name: biology
+ description: Biology and life sciences
+ mmlu_categories:
+ - biology
+decisions:
+ - name: medical_query
+ description: Medical queries with PII protection
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: diagnosis
+ - type: domain
+ name: health
+ modelRefs:
+ - model: deepseek-r1
+ use_reasoning: false
+ plugins:
+ - type: pii
+ configuration:
+ enabled: true
+ pii_types_allowed:
+ - PERSON
+ - DATE_OF_BIRTH
+ - MEDICAL_RECORD
+ threshold: 0.9
+ - type: system_prompt
+ configuration:
+ system_prompt: You are a medical information assistant. Provide general health information but always advise users to consult healthcare professionals.
+ - type: semantic-cache
+ configuration:
+ enabled: true
+ similarity_threshold: 0.88
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ deepseek-r1:
+ pricing:
+ prompt_per_1m: 0.55
+ completion_per_1m: 2.2
+ reasoning_family: deepseek
+ loras:
+ - name: medical-specialist
+ description: Medical domain expert
+ deepseek-v3:
+ pricing:
+ prompt_per_1m: 0.27
+ completion_per_1m: 1.1
+ reasoning_family: deepseek
+ loras:
+ - name: medical-assistant
+ description: Medical assistant
+default_model: deepseek-v3
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/14-domain-embedding-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/output/14-domain-embedding-plugin.yaml
new file mode 100644
index 000000000..fd0e4ea7a
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/14-domain-embedding-plugin.yaml
@@ -0,0 +1,167 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+embedding_rules:
+ - name: financial_advice
+ threshold: 0.78
+ candidates:
+ - I need advice on investment strategies
+ - Can you help me with financial planning?
+ - What should I do with my retirement savings?
+ aggregation_method: max
+categories:
+ - name: economics
+ description: Economics and financial topics
+ mmlu_categories:
+ - economics
+ - name: business
+ description: Business and management
+ mmlu_categories:
+ - business
+decisions:
+ - name: investment_advice
+ description: Financial advice with comprehensive protection
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: embedding
+ name: financial_advice
+ - type: domain
+ name: economics
+ modelRefs:
+ - model: gpt-4o
+ use_reasoning: false
+ plugins:
+ - type: pii
+ configuration:
+ enabled: true
+ pii_types_allowed:
+ - CREDIT_CARD
+ - BANK_ACCOUNT
+ - SSN
+ threshold: 0.85
+ - type: system_prompt
+ configuration:
+ system_prompt: You are a financial information assistant. Provide general financial education but remind users this is not professional financial advice.
+ - type: jailbreak
+ configuration:
+ enabled: true
+ threshold: 0.85
+ - type: semantic-cache
+ configuration:
+ enabled: true
+ similarity_threshold: 0.92
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ gpt-4o:
+ pricing:
+ prompt_per_1m: 2.5
+ completion_per_1m: 10
+ loras:
+ - name: financial-advisor
+ description: Financial domain specialist
+ gpt-4o-mini:
+ pricing:
+ prompt_per_1m: 0.15
+ completion_per_1m: 0.6
+ loras:
+ - name: finance-assistant
+ description: Financial assistant
+default_model: gpt-4o-mini
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/15-keyword-embedding-domain-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/output/15-keyword-embedding-domain-plugin.yaml
new file mode 100644
index 000000000..965ce783e
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/15-keyword-embedding-domain-plugin.yaml
@@ -0,0 +1,260 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+keyword_rules:
+ - name: compliance
+ operator: OR
+ keywords:
+ - compliance
+ - regulation
+ - audit
+ - policy
+ case_sensitive: false
+ - name: confidential
+ operator: OR
+ keywords:
+ - confidential
+ - sensitive
+ - private
+ - restricted
+ case_sensitive: false
+embedding_rules:
+ - name: business_analysis
+ threshold: 0.76
+ candidates:
+ - I need to analyze business metrics and performance
+ - Can you help me with strategic business planning?
+ - We need insights on market trends and competition
+ aggregation_method: max
+ - name: legal_review
+ threshold: 0.8
+ candidates:
+ - This document needs legal review and compliance check
+ - We need to ensure regulatory compliance
+ - Can you review this for legal implications?
+ aggregation_method: mean
+categories:
+ - name: business
+ description: Business and management
+ mmlu_categories:
+ - business
+ - name: law
+ description: Legal questions and law-related topics
+ mmlu_categories:
+ - law
+ - name: economics
+ description: Economics and financial topics
+ mmlu_categories:
+ - economics
+decisions:
+ - name: compliance_legal
+ description: Compliance and legal queries with full protection
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: compliance
+ - type: embedding
+ name: legal_review
+ - type: domain
+ name: law
+ modelRefs:
+ - model: qwen-2.5-72b
+ use_reasoning: false
+ plugins:
+ - type: pii
+ configuration:
+ enabled: true
+ pii_types_allowed:
+ - PERSON
+ - ORGANIZATION
+ - EMAIL
+ - PHONE_NUMBER
+ threshold: 0.9
+ - type: jailbreak
+ configuration:
+ enabled: true
+ threshold: 0.88
+ - type: system_prompt
+ configuration:
+ system_prompt: You are a legal compliance assistant. Provide accurate information about regulations and compliance requirements. Always remind users to consult legal professionals for specific advice.
+ - type: semantic-cache
+ configuration:
+ enabled: true
+ similarity_threshold: 0.93
+ - type: header_mutation
+ configuration:
+ add:
+ - name: X-Compliance-Level
+ value: high
+ - name: X-Audit-Required
+ value: "true"
+ - name: confidential_business
+ description: Confidential business analysis
+ priority: 90
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: confidential
+ - type: embedding
+ name: business_analysis
+ - type: domain
+ name: business
+ modelRefs:
+ - model: qwen-2.5-72b
+ use_reasoning: false
+ plugins:
+ - type: pii
+ configuration:
+ enabled: true
+ pii_types_allowed:
+ - PERSON
+ - ORGANIZATION
+ - FINANCIAL_DATA
+ threshold: 0.85
+ - type: semantic-cache
+ configuration:
+ enabled: true
+ similarity_threshold: 0.9
+ - type: header_mutation
+ configuration:
+ add:
+ - name: X-Confidentiality
+ value: high
+ - name: general_business
+ description: General business and economics queries
+ priority: 50
+ rules:
+ operator: OR
+ conditions:
+ - type: embedding
+ name: business_analysis
+ - type: domain
+ name: economics
+ modelRefs:
+ - model: qwen-2.5-72b
+ use_reasoning: false
+ plugins:
+ - type: semantic-cache
+ configuration:
+ enabled: true
+ similarity_threshold: 0.85
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ qwen-2.5-32b:
+ pricing:
+ prompt_per_1m: 2
+ completion_per_1m: 4
+ reasoning_family: qwen3
+ loras:
+ - name: enterprise-assistant
+ description: Enterprise assistant
+ qwen-2.5-72b:
+ pricing:
+ prompt_per_1m: 3
+ completion_per_1m: 6
+ reasoning_family: qwen3
+ loras:
+ - name: enterprise-specialist
+ description: Enterprise domain specialist
+ - name: compliance-expert
+ description: Compliance and security expert
+default_model: qwen-2.5-32b
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/k8s/testdata/output/16-keyword-embedding-domain-no-plugin.yaml b/src/semantic-router/pkg/k8s/testdata/output/16-keyword-embedding-domain-no-plugin.yaml
new file mode 100644
index 000000000..ef8f84655
--- /dev/null
+++ b/src/semantic-router/pkg/k8s/testdata/output/16-keyword-embedding-domain-no-plugin.yaml
@@ -0,0 +1,192 @@
+config_source: kubernetes
+embedding_models:
+ qwen3_model_path: models/Qwen3-Embedding-0.6B
+ gemma_model_path: models/embeddinggemma-300m
+ use_cpu: true
+bert_model:
+ model_id: models/all-MiniLM-L12-v2
+ threshold: 0.6
+ use_cpu: true
+classifier:
+ category_model:
+ model_id: models/category_classifier_modernbert-base_model
+ threshold: 0.6
+ use_cpu: true
+ use_modernbert: true
+ category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
+ pii_model:
+ model_id: models/pii_classifier_modernbert-base_presidio_token_model
+ threshold: 0.7
+ use_cpu: true
+ pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
+prompt_guard:
+ enabled: true
+ model_id: models/jailbreak_classifier_modernbert-base_model
+ threshold: 0.7
+ use_cpu: true
+ use_modernbert: true
+ jailbreak_mapping_path: models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json
+semantic_cache:
+ backend_type: memory
+ enabled: true
+ similarity_threshold: 0.8
+ max_entries: 1000
+ ttl_seconds: 3600
+ eviction_policy: fifo
+ embedding_model: bert
+observability:
+ tracing:
+ enabled: false
+ provider: opentelemetry
+ exporter:
+ type: otlp
+ endpoint: jaeger:4317
+ insecure: true
+ sampling:
+ type: always_on
+ rate: 1
+ resource:
+ service_name: vllm-semantic-router
+ service_version: v0.1.0
+ deployment_environment: development
+api:
+ batch_classification:
+ metrics:
+ sample_rate: 1
+ duration_buckets:
+ - 0.001
+ - 0.005
+ - 0.01
+ - 0.025
+ - 0.05
+ - 0.1
+ - 0.25
+ - 0.5
+ - 1
+ - 2.5
+ - 5
+ - 10
+ - 30
+ size_buckets:
+ - 1
+ - 2
+ - 5
+ - 10
+ - 20
+ - 50
+ - 100
+ - 200
+ enabled: true
+ detailed_goroutine_tracking: true
+clear_route_cache: false
+keyword_rules:
+ - name: tutorial
+ operator: OR
+ keywords:
+ - tutorial
+ - guide
+ - how-to
+ - learn
+ case_sensitive: false
+embedding_rules:
+ - name: learning_intent
+ threshold: 0.72
+ candidates:
+ - I want to learn about this topic
+ - Can you teach me how to do this?
+ - I'm trying to understand this concept
+ aggregation_method: max
+categories:
+ - name: computer_science
+ description: Computer science and programming
+ mmlu_categories:
+ - computer_science
+ - name: math
+ description: Mathematics and quantitative reasoning
+ mmlu_categories:
+ - math
+ - name: engineering
+ description: Engineering and technical problem-solving
+ mmlu_categories:
+ - engineering
+decisions:
+ - name: cs_tutorial
+ description: Computer science tutorials
+ priority: 100
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: tutorial
+ - type: embedding
+ name: learning_intent
+ - type: domain
+ name: computer_science
+ modelRefs:
+ - model: qwen-2.5-32b
+ use_reasoning: false
+ - name: math_tutorial
+ description: Math tutorials
+ priority: 90
+ rules:
+ operator: AND
+ conditions:
+ - type: keyword
+ name: tutorial
+ - type: embedding
+ name: learning_intent
+ - type: domain
+ name: math
+ modelRefs:
+ - model: qwen-2.5-32b
+ use_reasoning: false
+ - name: general_learning
+ description: General learning queries
+ priority: 50
+ rules:
+ operator: OR
+ conditions:
+ - type: keyword
+ name: tutorial
+ - type: embedding
+ name: learning_intent
+ modelRefs:
+ - model: qwen-2.5-32b
+ use_reasoning: false
+strategy: priority
+default_reasoning_effort: high
+reasoning_families:
+ deepseek:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ gpt:
+ type: reasoning_effort
+ parameter: reasoning_effort
+ qwen3:
+ type: chat_template_kwargs
+ parameter: enable_thinking
+model_config:
+ qwen-2.5-14b:
+ pricing:
+ prompt_per_1m: 1
+ completion_per_1m: 2
+ reasoning_family: qwen3
+ loras:
+ - name: general-assistant
+ description: General purpose assistant
+ qwen-2.5-32b:
+ pricing:
+ prompt_per_1m: 2
+ completion_per_1m: 4
+ reasoning_family: qwen3
+ loras:
+ - name: advanced-assistant
+ description: Advanced general purpose assistant
+default_model: qwen-2.5-14b
+vllm_endpoints: []
+tools:
+ enabled: true
+ top_k: 3
+ similarity_threshold: 0.2
+ tools_db_path: config/tools_db.json
+ fallback_to_empty: true
diff --git a/src/semantic-router/pkg/utils/pii/policy.go b/src/semantic-router/pkg/utils/pii/policy.go
index 998f21149..8260c95ea 100644
--- a/src/semantic-router/pkg/utils/pii/policy.go
+++ b/src/semantic-router/pkg/utils/pii/policy.go
@@ -7,66 +7,55 @@ import (
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
)
-// PolicyChecker handles PII policy validation
+// PolicyChecker handles PII policy validation based on decisions
type PolicyChecker struct {
- Config *config.RouterConfig
- ModelConfigs map[string]config.ModelParams
+ Config *config.RouterConfig
}
-// IsPIIEnabled checks if PII detection is enabled and properly configured
-// For LoRA adapters, it falls back to the base model's PII policy if not found
-func (c *PolicyChecker) IsPIIEnabled(model string) bool {
- modelConfig, exists := c.ModelConfigs[model]
- if !exists {
- // Try to find base model for LoRA adapters
- baseModel := c.findBaseModelForLoRA(model)
- if baseModel != "" {
- logging.Infof("LoRA adapter '%s' not found in model configs, falling back to base model '%s'", model, baseModel)
- modelConfig, exists = c.ModelConfigs[baseModel]
- }
+// IsPIIEnabled checks if PII detection is enabled for a given decision
+func (c *PolicyChecker) IsPIIEnabled(decisionName string) bool {
+ if decisionName == "" {
+ logging.Infof("No decision specified, PII detection disabled")
+ return false
+ }
+
+ decision := c.Config.GetDecisionByName(decisionName)
+ if decision == nil {
+ logging.Infof("Decision %s not found, PII detection disabled", decisionName)
+ return false
}
- if !exists {
- logging.Infof("No PII policy found for model %s, allowing request", model)
+ piiConfig := decision.GetPIIConfig()
+ if piiConfig == nil {
+ logging.Infof("No PII config found for decision %s, PII detection disabled", decisionName)
return false
}
- // if it is allowed by default, then it is not enabled
- return !modelConfig.PIIPolicy.AllowByDefault
+
+ // PII detection is enabled if the plugin is enabled
+ return piiConfig.Enabled
}
// NewPolicyChecker creates a new PII policy checker
-func NewPolicyChecker(cfg *config.RouterConfig, modelConfigs map[string]config.ModelParams) *PolicyChecker {
+func NewPolicyChecker(cfg *config.RouterConfig) *PolicyChecker {
return &PolicyChecker{
- Config: cfg,
- ModelConfigs: modelConfigs,
+ Config: cfg,
}
}
-// CheckPolicy checks if the detected PII types are allowed for the given model
-// For LoRA adapters, it falls back to the base model's PII policy if not found
-func (pc *PolicyChecker) CheckPolicy(model string, detectedPII []string) (bool, []string, error) {
- if !pc.IsPIIEnabled(model) {
- logging.Infof("PII detection is disabled, allowing request")
+// CheckPolicy checks if the detected PII types are allowed for the given decision
+func (pc *PolicyChecker) CheckPolicy(decisionName string, detectedPII []string) (bool, []string, error) {
+ if !pc.IsPIIEnabled(decisionName) {
+ logging.Infof("PII detection is disabled for decision %s, allowing request", decisionName)
return true, nil, nil
}
- modelConfig, exists := pc.ModelConfigs[model]
- if !exists {
- // Try to find base model for LoRA adapters
- baseModel := pc.findBaseModelForLoRA(model)
- if baseModel != "" {
- logging.Infof("LoRA adapter '%s' not found in model configs, falling back to base model '%s' for PII policy", model, baseModel)
- modelConfig, exists = pc.ModelConfigs[baseModel]
- }
- }
-
- if !exists {
- // If no specific config, allow by default
- logging.Infof("No PII policy found for model %s, allowing request", model)
+ decision := pc.Config.GetDecisionByName(decisionName)
+ if decision == nil {
+ logging.Infof("Decision %s not found, allowing request", decisionName)
return true, nil, nil
}
- policy := modelConfig.PIIPolicy
+ policy := decision.GetDecisionPIIPolicy()
var deniedPII []string
for _, piiType := range detectedPII {
@@ -87,32 +76,14 @@ func (pc *PolicyChecker) CheckPolicy(model string, detectedPII []string) (bool,
}
if len(deniedPII) > 0 {
- logging.Warnf("PII policy violation for model %s: denied PII types %v", model, deniedPII)
+ logging.Warnf("PII policy violation for decision %s: denied PII types %v", decisionName, deniedPII)
return false, deniedPII, nil
}
- logging.Infof("PII policy check passed for model %s", model)
+ logging.Infof("PII policy check passed for decision %s", decisionName)
return true, nil, nil
}
-// FilterModelsForPII filters the list of candidate models based on PII policy compliance
-func (pc *PolicyChecker) FilterModelsForPII(candidateModels []string, detectedPII []string) []string {
- var allowedModels []string
-
- for _, model := range candidateModels {
- allowed, _, err := pc.CheckPolicy(model, detectedPII)
- if err != nil {
- logging.Errorf("Error checking PII policy for model %s: %v", model, err)
- continue
- }
- if allowed {
- allowedModels = append(allowedModels, model)
- }
- }
-
- return allowedModels
-}
-
// ExtractAllContent extracts all content from user and non-user messages for PII analysis
func ExtractAllContent(userContent string, nonUserMessages []string) []string {
var allContent []string
@@ -122,17 +93,3 @@ func ExtractAllContent(userContent string, nonUserMessages []string) []string {
allContent = append(allContent, nonUserMessages...)
return allContent
}
-
-// findBaseModelForLoRA finds the base model for a given LoRA adapter name
-// Returns empty string if the LoRA adapter is not found in any model's LoRA list
-func (pc *PolicyChecker) findBaseModelForLoRA(loraName string) string {
- for modelName, modelConfig := range pc.ModelConfigs {
- for _, lora := range modelConfig.LoRAs {
- if lora.Name == loraName {
- logging.Debugf("Found base model '%s' for LoRA adapter '%s'", modelName, loraName)
- return modelName
- }
- }
- }
- return ""
-}
diff --git a/src/semantic-router/pkg/utils/pii/policy_test.go b/src/semantic-router/pkg/utils/pii/policy_test.go
deleted file mode 100644
index 178b9f133..000000000
--- a/src/semantic-router/pkg/utils/pii/policy_test.go
+++ /dev/null
@@ -1,167 +0,0 @@
-package pii
-
-import (
- "testing"
-
- "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
-)
-
-// TestIsPIIEnabled_LoRAFallback tests that LoRA adapters fall back to base model's PII policy
-func TestIsPIIEnabled_LoRAFallback(t *testing.T) {
- tests := []struct {
- name string
- modelConfigs map[string]config.ModelParams
- model string
- expectedResult bool
- description string
- }{
- {
- name: "LoRA adapter inherits base model PII policy (enabled)",
- modelConfigs: map[string]config.ModelParams{
- "base-model": {
- PIIPolicy: config.PIIPolicy{
- AllowByDefault: false, // PII policy enabled
- },
- LoRAs: []config.LoRAAdapter{
- {Name: "science-expert"},
- {Name: "humanities-expert"},
- },
- },
- },
- model: "humanities-expert",
- expectedResult: true, // Should inherit base model's policy (enabled)
- description: "LoRA adapter should inherit base model's PII policy when not explicitly configured",
- },
- {
- name: "LoRA adapter inherits base model PII policy (disabled)",
- modelConfigs: map[string]config.ModelParams{
- "base-model": {
- PIIPolicy: config.PIIPolicy{
- AllowByDefault: true, // PII policy disabled
- },
- LoRAs: []config.LoRAAdapter{
- {Name: "general-expert"},
- },
- },
- },
- model: "general-expert",
- expectedResult: false, // Should inherit base model's policy (disabled)
- description: "LoRA adapter should inherit base model's disabled PII policy",
- },
- {
- name: "Base model PII policy check",
- modelConfigs: map[string]config.ModelParams{
- "base-model": {
- PIIPolicy: config.PIIPolicy{
- AllowByDefault: false,
- },
- },
- },
- model: "base-model",
- expectedResult: true,
- description: "Base model should use its own PII policy",
- },
- {
- name: "Unknown model without LoRA mapping",
- modelConfigs: map[string]config.ModelParams{
- "base-model": {
- PIIPolicy: config.PIIPolicy{
- AllowByDefault: false,
- },
- },
- },
- model: "unknown-model",
- expectedResult: false,
- description: "Unknown model should return false (no policy found)",
- },
- {
- name: "LoRA adapter with explicit PII policy overrides base model",
- modelConfigs: map[string]config.ModelParams{
- "base-model": {
- PIIPolicy: config.PIIPolicy{
- AllowByDefault: false, // Base model has strict policy
- },
- LoRAs: []config.LoRAAdapter{
- {Name: "permissive-lora"},
- },
- },
- "permissive-lora": {
- PIIPolicy: config.PIIPolicy{
- AllowByDefault: true, // LoRA has permissive policy
- },
- },
- },
- model: "permissive-lora",
- expectedResult: false, // Should use LoRA's own policy (disabled)
- description: "LoRA adapter with explicit policy should not fall back to base model",
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- checker := &PolicyChecker{
- ModelConfigs: tt.modelConfigs,
- }
-
- result := checker.IsPIIEnabled(tt.model)
-
- if result != tt.expectedResult {
- t.Errorf("%s: expected %v, got %v", tt.description, tt.expectedResult, result)
- }
- })
- }
-}
-
-// TestCheckPolicy_LoRAFallback tests that CheckPolicy falls back to base model for LoRA adapters
-func TestCheckPolicy_LoRAFallback(t *testing.T) {
- tests := []struct {
- name string
- modelConfigs map[string]config.ModelParams
- model string
- detectedPII []string
- expectedAllowed bool
- expectedDenied []string
- description string
- }{
- {
- name: "LoRA adapter inherits base model's strict PII policy",
- modelConfigs: map[string]config.ModelParams{
- "base-model": {
- PIIPolicy: config.PIIPolicy{
- AllowByDefault: false,
- PIITypes: []string{"GPE"}, // Only allow GPE
- },
- LoRAs: []config.LoRAAdapter{
- {Name: "science-expert"},
- },
- },
- },
- model: "science-expert",
- detectedPII: []string{"EMAIL_ADDRESS", "CREDIT_CARD"},
- expectedAllowed: false,
- expectedDenied: []string{"EMAIL_ADDRESS", "CREDIT_CARD"},
- description: "LoRA should inherit base model's strict policy and block non-allowed PII",
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- checker := &PolicyChecker{
- ModelConfigs: tt.modelConfigs,
- }
-
- allowed, deniedPII, err := checker.CheckPolicy(tt.model, tt.detectedPII)
- if err != nil {
- t.Errorf("Unexpected error: %v", err)
- }
-
- if allowed != tt.expectedAllowed {
- t.Errorf("%s: expected allowed=%v, got %v", tt.description, tt.expectedAllowed, allowed)
- }
-
- if len(deniedPII) != len(tt.expectedDenied) {
- t.Errorf("%s: expected denied PII %v, got %v", tt.description, tt.expectedDenied, deniedPII)
- }
- })
- }
-}
diff --git a/test_file.txt b/test_file.txt
new file mode 100644
index 000000000..9daeafb98
--- /dev/null
+++ b/test_file.txt
@@ -0,0 +1 @@
+test
diff --git a/tools/linter/yaml/.yamllint b/tools/linter/yaml/.yamllint
index 0bbec0533..550788fbd 100644
--- a/tools/linter/yaml/.yamllint
+++ b/tools/linter/yaml/.yamllint
@@ -6,6 +6,7 @@ ignore: |
.git
.github
node_modules
+ .crd-ref-docs.yaml
deploy/helm
.venv
diff --git a/tools/make/docs.mk b/tools/make/docs.mk
index 9c48aa3bf..72aeb880f 100644
--- a/tools/make/docs.mk
+++ b/tools/make/docs.mk
@@ -32,3 +32,66 @@ docs-lint-fix: ## Fix lint issues in documentation website source files
@$(LOG_TARGET)
cd website && npm run lint:fix
+##@ CRD Documentation
+
+CRD_REF_DOCS_VERSION ?= latest
+CRD_REF_DOCS := $(shell command -v crd-ref-docs 2> /dev/null)
+
+.PHONY: install-crd-ref-docs
+install-crd-ref-docs: ## Install crd-ref-docs tool
+ @$(LOG_TARGET)
+ @if [ -z "$(CRD_REF_DOCS)" ]; then \
+ echo "Installing crd-ref-docs..."; \
+ go install github.com/elastic/crd-ref-docs@$(CRD_REF_DOCS_VERSION); \
+ else \
+ echo "crd-ref-docs is already installed at $(CRD_REF_DOCS)"; \
+ fi
+
+.PHONY: docs-crd
+docs-crd: install-crd-ref-docs markdown-lint-fix ## Generate CRD API reference documentation
+ @$(LOG_TARGET)
+ @echo "Generating CRD documentation from Go API types..."
+ @if [ -d "src/semantic-router/pkg/apis/vllm.ai/v1alpha1" ]; then \
+ crd-ref-docs \
+ --source-path=./src/semantic-router/pkg/apis/vllm.ai/v1alpha1 \
+ --config=.crd-ref-docs.yaml \
+ --renderer=markdown \
+ --output-path=./website/docs/api/crd-reference.md; \
+ echo "β
CRD documentation generated at website/docs/api/crd-reference.md"; \
+ else \
+ echo "β οΈ API directory not found, generating from CRD YAML files..."; \
+ crd-ref-docs \
+ --source-path=./deploy/kubernetes/crds \
+ --renderer=markdown \
+ --output-path=./website/docs/api/crd-reference.md; \
+ echo "β
CRD documentation generated from YAML at website/docs/api/crd-reference.md"; \
+ fi
+ @echo "π Adding Docusaurus frontmatter..."
+ @if ! grep -q "^---" website/docs/api/crd-reference.md; then \
+ echo "---" > website/docs/api/crd-reference.md.tmp; \
+ echo "sidebar_position: 3" >> website/docs/api/crd-reference.md.tmp; \
+ echo "title: CRD API Reference" >> website/docs/api/crd-reference.md.tmp; \
+ echo "description: Kubernetes Custom Resource Definitions (CRDs) API reference for vLLM Semantic Router" >> website/docs/api/crd-reference.md.tmp; \
+ echo "---" >> website/docs/api/crd-reference.md.tmp; \
+ echo "" >> website/docs/api/crd-reference.md.tmp; \
+ cat website/docs/api/crd-reference.md >> website/docs/api/crd-reference.md.tmp; \
+ mv website/docs/api/crd-reference.md.tmp website/docs/api/crd-reference.md; \
+ echo "β
Frontmatter added"; \
+ else \
+ echo "β
Frontmatter already exists"; \
+ fi
+
+.PHONY: docs-crd-watch
+docs-crd-watch: ## Watch for CRD changes and regenerate documentation
+ @$(LOG_TARGET)
+ @echo "Watching for CRD changes..."
+ @while true; do \
+ $(MAKE) docs-crd; \
+ sleep 5; \
+ done
+
+.PHONY: docs-all
+docs-all: docs-crd docs-build ## Generate all documentation (CRD + website)
+ @$(LOG_TARGET)
+ @echo "β
All documentation generated successfully"
+
diff --git a/tools/make/golang.mk b/tools/make/golang.mk
index 36f2be0af..075179c32 100644
--- a/tools/make/golang.mk
+++ b/tools/make/golang.mk
@@ -46,6 +46,11 @@ install-controller-gen: ## Install controller-gen for code generation
generate-crd: install-controller-gen ## Generate CRD manifests using controller-gen
@echo "Generating CRD manifests..."
@cd src/semantic-router && controller-gen crd:crdVersions=v1,allowDangerousTypes=true paths=./pkg/apis/vllm.ai/v1alpha1 output:crd:artifacts:config=../../deploy/kubernetes/crds
+ @echo "Copying CRDs to Helm chart..."
+ @mkdir -p deploy/helm/semantic-router/crds
+ @cp deploy/kubernetes/crds/vllm.ai_intelligentpools.yaml deploy/helm/semantic-router/crds/
+ @cp deploy/kubernetes/crds/vllm.ai_intelligentroutes.yaml deploy/helm/semantic-router/crds/
+ @echo "β
CRDs generated and copied to Helm chart"
generate-deepcopy: install-controller-gen ## Generate deepcopy methods using controller-gen
@echo "Generating deepcopy methods..."
diff --git a/tools/make/linter.mk b/tools/make/linter.mk
index 249d54dac..e03cce809 100644
--- a/tools/make/linter.mk
+++ b/tools/make/linter.mk
@@ -10,6 +10,7 @@ markdown-lint: ## Lint all markdown files in the project
--ignore node_modules \
--ignore website/node_modules \
--ignore dashboard/frontend/node_modules \
+ --ignore website/docs/api/crd-reference.md \
--ignore models
markdown-lint-fix: ## Auto-fix markdown lint issues
diff --git a/website/docs/api/classification.md b/website/docs/api/classification.md
index 5de6c69cc..584010c98 100644
--- a/website/docs/api/classification.md
+++ b/website/docs/api/classification.md
@@ -1,4 +1,4 @@
-# Classification API Reference
+# Admin API Reference
The Classification API provides direct access to the Semantic Router's classification models for intent detection, PII identification, and security analysis. This API is useful for testing, debugging, and standalone classification tasks.
@@ -581,23 +581,51 @@ categories:
- name: tech
# Map generic "tech" to multiple MMLU-Pro categories
mmlu_categories: ["computer science", "engineering"]
- model_scores:
- - model: phi4
- score: 0.9
- - model: mistral-small3.1
- score: 0.7
- name: finance
# Map generic "finance" to MMLU economics
mmlu_categories: ["economics"]
- model_scores:
- - model: gemma3:27b
- score: 0.8
- name: politics
# If mmlu_categories is omitted and the name matches an MMLU category,
# the router falls back to identity mapping automatically.
- model_scores:
+
+decisions:
+ - name: tech
+ description: "Route technical queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "tech"
+ modelRefs:
+ - model: phi4
+ use_reasoning: false
+ - model: mistral-small3.1
+ use_reasoning: false
+
+ - name: finance
+ description: "Route finance queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "finance"
+ modelRefs:
- model: gemma3:27b
- score: 0.6
+ use_reasoning: false
+
+ - name: politics
+ description: "Route politics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "politics"
+ modelRefs:
+ - model: gemma3:27b
+ use_reasoning: false
```
Notes:
@@ -626,16 +654,28 @@ Notes:
{
"name": "business",
"description": "Business and commercial content",
- "reasoning_enabled": false,
"threshold": 0.6
},
{
"name": "math",
"description": "Mathematical problems and concepts",
- "reasoning_enabled": true,
"threshold": 0.6
}
],
+ "decisions": [
+ {
+ "name": "business",
+ "description": "Route business queries",
+ "priority": 10,
+ "reasoning_enabled": false
+ },
+ {
+ "name": "math",
+ "description": "Route mathematical queries",
+ "priority": 10,
+ "reasoning_enabled": true
+ }
+ ],
"pii_types": [
"PERSON",
"EMAIL",
diff --git a/website/docs/api/crd-reference.md b/website/docs/api/crd-reference.md
new file mode 100644
index 000000000..c28d9f697
--- /dev/null
+++ b/website/docs/api/crd-reference.md
@@ -0,0 +1,409 @@
+---
+sidebar_position: 3
+title: CRD API Reference
+description: Kubernetes Custom Resource Definitions (CRDs) API reference for vLLM Semantic Router
+---
+
+# API Reference
+
+## Packages
+- [vllm.ai/v1alpha1](#vllmaiv1alpha1)
+
+
+## vllm.ai/v1alpha1
+
+Package v1alpha1 contains API Schema definitions for the v1alpha1 API group
+
+### Resource Types
+- [IntelligentPool](#intelligentpool)
+- [IntelligentPoolList](#intelligentpoollist)
+- [IntelligentRoute](#intelligentroute)
+- [IntelligentRouteList](#intelligentroutelist)
+
+
+
+#### Decision
+
+
+
+Decision defines a routing decision based on rule combinations
+
+
+
+_Appears in:_
+- [IntelligentRouteSpec](#intelligentroutespec)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `name` _string_ | Name is the unique identifier for this decision | | MaxLength: 100
MinLength: 1
Required: \{\}
|
+| `priority` _integer_ | Priority defines the priority of this decision (higher values = higher priority)
Used when strategy is "priority" | 0 | Maximum: 1000
Minimum: 0
|
+| `description` _string_ | Description provides a human-readable description of this decision | | MaxLength: 500
|
+| `signals` _[SignalCombination](#signalcombination)_ | Signals defines the signal combination logic | | Required: \{\}
|
+| `modelRefs` _[ModelRef](#modelref) array_ | ModelRefs defines the model references for this decision (currently only one model is supported) | | MaxItems: 1
MinItems: 1
Required: \{\}
|
+| `plugins` _[DecisionPlugin](#decisionplugin) array_ | Plugins defines the plugins to apply for this decision | | MaxItems: 10
|
+
+
+#### DecisionPlugin
+
+
+
+DecisionPlugin defines a plugin configuration for a decision
+
+
+
+_Appears in:_
+- [Decision](#decision)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `type` _string_ | Type is the plugin type (semantic-cache, jailbreak, pii, system_prompt, header_mutation) | | Enum: [semantic-cache jailbreak pii system_prompt header_mutation]
Required: \{\}
|
+| `configuration` _[RawExtension](https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#rawextension-runtime-pkg)_ | Configuration is the plugin-specific configuration as a raw JSON object | | Schemaless: \{\}
|
+
+
+#### DomainSignal
+
+
+
+DomainSignal defines a domain category for classification
+
+
+
+_Appears in:_
+- [Signals](#signals)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `name` _string_ | Name is the unique identifier for this domain | | MaxLength: 100
MinLength: 1
Required: \{\}
|
+| `description` _string_ | Description provides a human-readable description of this domain | | MaxLength: 500
|
+
+
+#### EmbeddingSignal
+
+
+
+EmbeddingSignal defines an embedding-based signal extraction rule
+
+
+
+_Appears in:_
+- [Signals](#signals)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `name` _string_ | Name is the unique identifier for this signal | | MaxLength: 100
MinLength: 1
Required: \{\}
|
+| `threshold` _float_ | Threshold is the similarity threshold for matching (0.0-1.0) | | Maximum: 1
Minimum: 0
Required: \{\}
|
+| `candidates` _string array_ | Candidates is the list of candidate phrases for semantic matching | | MaxItems: 100
MinItems: 1
Required: \{\}
|
+| `aggregationMethod` _string_ | AggregationMethod defines how to aggregate multiple candidate similarities | max | Enum: [mean max any]
|
+
+
+#### IntelligentPool
+
+
+
+IntelligentPool defines a pool of models with their configurations
+
+
+
+_Appears in:_
+- [IntelligentPoolList](#intelligentpoollist)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `apiVersion` _string_ | `vllm.ai/v1alpha1` | | |
+| `kind` _string_ | `IntelligentPool` | | |
+| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | |
+| `spec` _[IntelligentPoolSpec](#intelligentpoolspec)_ | | | |
+| `status` _[IntelligentPoolStatus](#intelligentpoolstatus)_ | | | |
+
+
+#### IntelligentPoolList
+
+
+
+IntelligentPoolList contains a list of IntelligentPool
+
+
+
+
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `apiVersion` _string_ | `vllm.ai/v1alpha1` | | |
+| `kind` _string_ | `IntelligentPoolList` | | |
+| `metadata` _[ListMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#listmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | |
+| `items` _[IntelligentPool](#intelligentpool) array_ | | | |
+
+
+#### IntelligentPoolSpec
+
+
+
+IntelligentPoolSpec defines the desired state of IntelligentPool
+
+
+
+_Appears in:_
+- [IntelligentPool](#intelligentpool)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `defaultModel` _string_ | DefaultModel specifies the default model to use when no specific model is selected | | MaxLength: 100
MinLength: 1
Required: \{\}
|
+| `models` _[ModelConfig](#modelconfig) array_ | Models defines the list of available models in this pool | | MaxItems: 100
MinItems: 1
Required: \{\}
|
+
+
+#### IntelligentPoolStatus
+
+
+
+IntelligentPoolStatus defines the observed state of IntelligentPool
+
+
+
+_Appears in:_
+- [IntelligentPool](#intelligentpool)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#condition-v1-meta) array_ | Conditions represent the latest available observations of the IntelligentPool's state | | |
+| `observedGeneration` _integer_ | ObservedGeneration reflects the generation of the most recently observed IntelligentPool | | |
+| `modelCount` _integer_ | ModelCount indicates the number of models in the pool | | |
+
+
+#### IntelligentRoute
+
+
+
+IntelligentRoute defines intelligent routing rules and decisions
+
+
+
+_Appears in:_
+- [IntelligentRouteList](#intelligentroutelist)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `apiVersion` _string_ | `vllm.ai/v1alpha1` | | |
+| `kind` _string_ | `IntelligentRoute` | | |
+| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | |
+| `spec` _[IntelligentRouteSpec](#intelligentroutespec)_ | | | |
+| `status` _[IntelligentRouteStatus](#intelligentroutestatus)_ | | | |
+
+
+#### IntelligentRouteList
+
+
+
+IntelligentRouteList contains a list of IntelligentRoute
+
+
+
+
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `apiVersion` _string_ | `vllm.ai/v1alpha1` | | |
+| `kind` _string_ | `IntelligentRouteList` | | |
+| `metadata` _[ListMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#listmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | |
+| `items` _[IntelligentRoute](#intelligentroute) array_ | | | |
+
+
+#### IntelligentRouteSpec
+
+
+
+IntelligentRouteSpec defines the desired state of IntelligentRoute
+
+
+
+_Appears in:_
+- [IntelligentRoute](#intelligentroute)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `signals` _[Signals](#signals)_ | Signals defines signal extraction rules for routing decisions | | |
+| `decisions` _[Decision](#decision) array_ | Decisions defines the routing decisions based on signal combinations | | MaxItems: 100
MinItems: 1
Required: \{\}
|
+
+
+#### IntelligentRouteStatus
+
+
+
+IntelligentRouteStatus defines the observed state of IntelligentRoute
+
+
+
+_Appears in:_
+- [IntelligentRoute](#intelligentroute)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#condition-v1-meta) array_ | Conditions represent the latest available observations of the IntelligentRoute's state | | |
+| `observedGeneration` _integer_ | ObservedGeneration reflects the generation of the most recently observed IntelligentRoute | | |
+| `statistics` _[RouteStatistics](#routestatistics)_ | Statistics provides statistics about configured decisions and signals | | |
+
+
+#### KeywordSignal
+
+
+
+KeywordSignal defines a keyword-based signal extraction rule
+
+
+
+_Appears in:_
+- [Signals](#signals)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `name` _string_ | Name is the unique identifier for this rule (also used as category name) | | MaxLength: 100
MinLength: 1
Required: \{\}
|
+| `operator` _string_ | Operator defines the logical operator for keywords (AND/OR) | | Enum: [AND OR]
Required: \{\}
|
+| `keywords` _string array_ | Keywords is the list of keywords to match | | MaxItems: 100
MinItems: 1
Required: \{\}
|
+| `caseSensitive` _boolean_ | CaseSensitive specifies whether keyword matching is case-sensitive | false | |
+
+
+#### LoRAConfig
+
+
+
+LoRAConfig defines a LoRA adapter configuration
+
+
+
+_Appears in:_
+- [ModelConfig](#modelconfig)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `name` _string_ | Name is the unique identifier for this LoRA adapter | | MaxLength: 100
MinLength: 1
Required: \{\}
|
+| `description` _string_ | Description provides a human-readable description of this LoRA adapter | | MaxLength: 500
|
+
+
+#### ModelConfig
+
+
+
+ModelConfig defines the configuration for a single model
+
+
+
+_Appears in:_
+- [IntelligentPoolSpec](#intelligentpoolspec)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `name` _string_ | Name is the unique identifier for this model | | MaxLength: 100
MinLength: 1
Required: \{\}
|
+| `reasoningFamily` _string_ | ReasoningFamily specifies the reasoning syntax family (e.g., "qwen3", "deepseek")
Must be defined in the global static configuration's ReasoningFamilies | | MaxLength: 50
|
+| `pricing` _[ModelPricing](#modelpricing)_ | Pricing defines the cost structure for this model | | |
+| `loras` _[LoRAConfig](#loraconfig) array_ | LoRAs defines the list of LoRA adapters available for this model | | MaxItems: 50
|
+
+
+#### ModelPricing
+
+
+
+ModelPricing defines the pricing structure for a model
+
+
+
+_Appears in:_
+- [ModelConfig](#modelconfig)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `inputTokenPrice` _float_ | InputTokenPrice is the cost per input token | | Minimum: 0
|
+| `outputTokenPrice` _float_ | OutputTokenPrice is the cost per output token | | Minimum: 0
|
+
+
+#### ModelRef
+
+
+
+ModelRef defines a model reference without score
+
+
+
+_Appears in:_
+- [Decision](#decision)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `model` _string_ | Model is the name of the model (must exist in IntelligentPool) | | MaxLength: 100
MinLength: 1
Required: \{\}
|
+| `loraName` _string_ | LoRAName is the name of the LoRA adapter to use (must exist in the model's LoRAs) | | MaxLength: 100
|
+| `useReasoning` _boolean_ | UseReasoning specifies whether to enable reasoning mode for this model | false | |
+| `reasoningDescription` _string_ | ReasoningDescription provides context for when to use reasoning | | MaxLength: 500
|
+| `reasoningEffort` _string_ | ReasoningEffort defines the reasoning effort level (low/medium/high) | | Enum: [low medium high]
|
+
+
+
+
+#### RouteStatistics
+
+
+
+RouteStatistics provides statistics about the IntelligentRoute configuration
+
+
+
+_Appears in:_
+- [IntelligentRouteStatus](#intelligentroutestatus)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `decisions` _integer_ | Decisions indicates the number of decisions | | |
+| `keywords` _integer_ | Keywords indicates the number of keyword signals | | |
+| `embeddings` _integer_ | Embeddings indicates the number of embedding signals | | |
+| `domains` _integer_ | Domains indicates the number of domain signals | | |
+
+
+#### SignalCombination
+
+
+
+SignalCombination defines how to combine multiple signals
+
+
+
+_Appears in:_
+- [Decision](#decision)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `operator` _string_ | Operator defines the logical operator for combining conditions (AND/OR) | | Enum: [AND OR]
Required: \{\}
|
+| `conditions` _[SignalCondition](#signalcondition) array_ | Conditions defines the list of signal conditions | | MaxItems: 50
MinItems: 1
Required: \{\}
|
+
+
+#### SignalCondition
+
+
+
+SignalCondition defines a single signal condition
+
+
+
+_Appears in:_
+- [SignalCombination](#signalcombination)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `type` _string_ | Type defines the type of signal (keyword/embedding/domain) | | Enum: [keyword embedding domain]
Required: \{\}
|
+| `name` _string_ | Name is the name of the signal to reference | | MaxLength: 100
MinLength: 1
Required: \{\}
|
+
+
+#### Signals
+
+
+
+Signals defines signal extraction rules
+
+
+
+_Appears in:_
+- [IntelligentRouteSpec](#intelligentroutespec)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `keywords` _[KeywordSignal](#keywordsignal) array_ | Keywords defines keyword-based signal extraction rules | | MaxItems: 100
|
+| `embeddings` _[EmbeddingSignal](#embeddingsignal) array_ | Embeddings defines embedding-based signal extraction rules | | MaxItems: 100
|
+| `domains` _[DomainSignal](#domainsignal) array_ | Domains defines MMLU domain categories for classification | | MaxItems: 14
|
+
+
diff --git a/website/docs/installation/configuration.md b/website/docs/installation/configuration.md
index a7ca200d8..d39b6dfb6 100644
--- a/website/docs/installation/configuration.md
+++ b/website/docs/installation/configuration.md
@@ -77,26 +77,59 @@ classifier:
# Categories and routing rules
categories:
- name: math
- model_scores:
- - model: your-model
- score: 1.0
- use_reasoning: true # Enable reasoning for math problems
- # Optional: Category-level cache settings
- # semantic_cache_enabled: true
- # semantic_cache_similarity_threshold: 0.9 # Higher threshold for math
- # Optional: Category-level jailbreak settings
- # jailbreak_enabled: true # Override global jailbreak detection
- name: computer science
- model_scores:
- - model: your-model
- score: 1.0
- use_reasoning: true # Enable reasoning for code
- name: other
- model_scores:
- - model: your-model
- score: 0.8
- use_reasoning: false # No reasoning for general queries
- # semantic_cache_similarity_threshold: 0.75 # Lower threshold for general queries
+
+# Decision-based routing
+decisions:
+- name: math
+ description: "Route mathematical queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: your-model
+ use_reasoning: true # Enable reasoning for math problems
+ # Optional: Decision-level plugins
+ # plugins:
+ # - type: "semantic-cache"
+ # configuration:
+ # enabled: true
+ # similarity_threshold: 0.9 # Higher threshold for math
+ # - type: "jailbreak"
+ # configuration:
+ # enabled: true # Override global jailbreak detection
+
+- name: computer science
+ description: "Route computer science queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "computer science"
+ modelRefs:
+ - model: your-model
+ use_reasoning: true # Enable reasoning for code
+
+- name: other
+ description: "Route general queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: your-model
+ use_reasoning: false # No reasoning for general queries
+ # plugins:
+ # - type: "semantic-cache"
+ # configuration:
+ # similarity_threshold: 0.75 # Lower threshold for general queries
default_model: your-model
@@ -294,48 +327,77 @@ classifier:
### Categories and Routing
-Define how different query types are handled. Each category can have multiple models with individual reasoning settings:
+Define how different query types are handled using the Decision-based routing system:
```yaml
+# Categories define domains for classification
categories:
- name: math
- model_scores:
- - model: your-model
- score: 1.0 # Preference score for this model
- use_reasoning: true # Enable reasoning for this model on math problems
+- name: computer science
+- name: other
+
+# Decisions define routing logic with rules and model selection
+decisions:
+- name: math
+ description: "Route mathematical queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: your-model
+ use_reasoning: true # Enable reasoning for this model on math problems
- name: computer science
- model_scores:
- - model: your-model
- score: 1.0
- use_reasoning: true # Enable reasoning for code
+ description: "Route computer science queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "computer science"
+ modelRefs:
+ - model: your-model
+ use_reasoning: true # Enable reasoning for code
- name: other
- model_scores:
- - model: your-model
- score: 0.8
- use_reasoning: false # No reasoning for general queries
+ description: "Route general queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: your-model
+ use_reasoning: false # No reasoning for general queries
default_model: your-model # Fallback model
```
### Model-Specific Reasoning
-The `use_reasoning` field is configured per model within each category, allowing fine-grained control:
+The `use_reasoning` field is configured per model within each decision's modelRefs, allowing fine-grained control:
```yaml
-categories:
+decisions:
- name: math
- model_scores:
- - model: gpt-oss-120b
- score: 1.0
- use_reasoning: true # GPT-OSS-120b supports reasoning for math
- - model: phi4
- score: 0.8
- use_reasoning: false # phi4 doesn't support reasoning mode
- - model: deepseek-v31
- score: 0.9
- use_reasoning: true # DeepSeek supports reasoning for math
+ description: "Route mathematical queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: gpt-oss-120b
+ use_reasoning: true # GPT-OSS-120b supports reasoning for math
+ - model: phi4
+ use_reasoning: false # phi4 doesn't support reasoning mode
+ - model: deepseek-v31
+ use_reasoning: true # DeepSeek supports reasoning for math
```
### Model Reasoning Configuration
@@ -425,24 +487,36 @@ Set the global default reasoning effort level used when categories don't specify
default_reasoning_effort: "high" # Options: "low", "medium", "high"
```
-**Category-Specific Reasoning Effort:**
-Override the default effort level per category:
+**Decision-Specific Reasoning Effort:**
+Override the default effort level per decision:
```yaml
-categories:
+decisions:
- name: math
+ description: "Route mathematical queries"
+ priority: 10
reasoning_effort: "high" # Use high effort for complex math
- model_scores:
- - model: your-model
- score: 1.0
- use_reasoning: true # Enable reasoning for this model
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: your-model
+ use_reasoning: true # Enable reasoning for this model
- name: general
+ description: "Route general queries"
+ priority: 5
reasoning_effort: "low" # Use low effort for general queries
- model_scores:
- - model: your-model
- score: 1.0
- use_reasoning: true # Enable reasoning for this model
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "general"
+ modelRefs:
+ - model: your-model
+ use_reasoning: true # Enable reasoning for this model
```
### Security Features
@@ -483,30 +557,59 @@ semantic_cache:
ttl_seconds: 3600 # Cache expiration time
eviction_policy: "fifo" # Options: "fifo", "lru", "lfu"
-# Category-Level Cache Configuration (New)
-# Override global cache settings for specific categories
+# Decision-Level Cache Configuration (New)
+# Override global cache settings for specific decisions
categories:
- name: health
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95 # Very strict - medical accuracy critical
- model_scores:
+ - name: general_chat
+ - name: troubleshooting
+
+decisions:
+ - name: health
+ description: "Route health queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
- model: your-model
- score: 0.5
use_reasoning: false
-
+ plugins:
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95 # Very strict - medical accuracy critical
+
- name: general_chat
- semantic_cache_similarity_threshold: 0.75 # Relaxed for better cache hits
- model_scores:
+ description: "Route general chat queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "general_chat"
+ modelRefs:
- model: your-model
- score: 0.7
use_reasoning: false
-
+ plugins:
+ - type: "semantic-cache"
+ configuration:
+ similarity_threshold: 0.75 # Relaxed for better cache hits
+
- name: troubleshooting
- # No cache settings - uses global default (0.8)
- model_scores:
+ description: "Route troubleshooting queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "troubleshooting"
+ modelRefs:
- model: your-model
- score: 0.7
use_reasoning: false
+ # No cache plugin - uses global default (0.8)
# Tool Auto-Selection
tools:
@@ -662,7 +765,7 @@ Different categories have different tolerance for semantic variations:
### Configuration Examples
-#### Example 1: Mixed Thresholds for Different Categories
+#### Example 1: Mixed Thresholds for Different Decisions
```yaml
semantic_cache:
@@ -672,37 +775,93 @@ semantic_cache:
categories:
- name: health
- system_prompt: "You are a health expert..."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95 # Very strict - "headache" vs "severe headache" = different
- model_scores:
+ - name: psychology
+ - name: general_chat
+ - name: troubleshooting
+
+decisions:
+ - name: health
+ description: "Route health queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
- model: your-model
- score: 0.5
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a health expert..."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95 # Very strict - "headache" vs "severe headache" = different
- name: psychology
- system_prompt: "You are a psychology expert..."
- semantic_cache_similarity_threshold: 0.92 # Strict - clinical nuances matter
- model_scores:
+ description: "Route psychology queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "psychology"
+ modelRefs:
- model: your-model
- score: 0.6
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a psychology expert..."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ similarity_threshold: 0.92 # Strict - clinical nuances matter
- name: general_chat
- system_prompt: "You are a helpful assistant..."
- semantic_cache_similarity_threshold: 0.75 # Relaxed - "how's the weather" = "what's the weather"
- model_scores:
+ description: "Route general chat queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "general_chat"
+ modelRefs:
- model: your-model
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a helpful assistant..."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ similarity_threshold: 0.75 # Relaxed - "how's the weather" = "what's the weather"
- name: troubleshooting
- system_prompt: "You are a tech support expert..."
- # No cache settings - uses global threshold of 0.8
- model_scores:
+ description: "Route troubleshooting queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "troubleshooting"
+ modelRefs:
- model: your-model
- score: 0.7
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a tech support expert..."
+ mode: "replace"
+ # No cache plugin - uses global threshold of 0.8
```
#### Example 2: Disable Cache for Sensitive Data
@@ -710,24 +869,41 @@ categories:
```yaml
categories:
- name: personal_data
- system_prompt: "Handle personal information..."
- semantic_cache_enabled: false # Disable cache entirely for privacy
- model_scores:
+
+decisions:
+ - name: personal_data
+ description: "Route personal data queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "personal_data"
+ modelRefs:
- model: your-model
- score: 0.8
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "Handle personal information..."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ enabled: false # Disable cache entirely for privacy
```
### Configuration Options
-**Category-Level Fields:**
+**Decision-Level Plugin Fields:**
-- `semantic_cache_enabled` (optional, boolean): Enable/disable caching for this category. If not specified, inherits from global `semantic_cache.enabled`.
-- `semantic_cache_similarity_threshold` (optional, float 0.0-1.0): Minimum similarity score for cache hits in this category. If not specified, inherits from global `semantic_cache.similarity_threshold`.
+- `plugins[].type: "semantic-cache"` - Semantic cache plugin configuration
+ - `configuration.enabled` (optional, boolean): Enable/disable caching for this decision. If not specified, inherits from global `semantic_cache.enabled`.
+ - `configuration.similarity_threshold` (optional, float 0.0-1.0): Minimum similarity score for cache hits in this decision. If not specified, inherits from global `semantic_cache.similarity_threshold`.
**Fallback Hierarchy:**
-1. Category-specific `semantic_cache_similarity_threshold` (if set)
+1. Decision-specific plugin `similarity_threshold` (if set)
2. Global `semantic_cache.similarity_threshold` (if set)
3. `bert_model.threshold` (final fallback)
@@ -741,7 +917,7 @@ categories:
**Privacy and Compliance:**
-- Disable caching (`semantic_cache_enabled: false`) for categories handling:
+- Disable caching (set plugin `enabled: false`) for decisions handling:
- Personal identifiable information (PII)
- Financial data
- Health records
@@ -750,9 +926,9 @@ categories:
**Performance Tuning:**
- Start with conservative (higher) thresholds
-- Monitor cache hit rates per category
-- Lower thresholds for categories with low hit rates
-- Raise thresholds for categories with incorrect cache hits
+- Monitor cache hit rates per decision
+- Lower thresholds for decisions with low hit rates
+- Raise thresholds for decisions with incorrect cache hits
## Common Configuration Examples
@@ -845,15 +1021,32 @@ vllm_endpoints:
categories:
- name: math
- model_scores:
- - model: math-model
- score: 1.0
- use_reasoning: true # Enable reasoning for math
- name: other
- model_scores:
- - model: general-model
- score: 1.0
- use_reasoning: false # No reasoning for general queries
+
+decisions:
+- name: math
+ description: "Route mathematical queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: math-model
+ use_reasoning: true # Enable reasoning for math
+
+- name: other
+ description: "Route general queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: general-model
+ use_reasoning: false # No reasoning for general queries
```
**Load Balancing:**
diff --git a/website/docs/training/model-performance-eval.md b/website/docs/training/model-performance-eval.md
index 529c54815..a59d35c45 100644
--- a/website/docs/training/model-performance-eval.md
+++ b/website/docs/training/model-performance-eval.md
@@ -2,7 +2,7 @@
## Why evaluate?
Evaluation makes routing data-driven. By measuring per-category accuracy on MMLU-Pro (and doing a quick sanity check with ARC), you can:
-- Select the right model for each category and rank them into categories.model_scores
+- Select the right model for each decision and configure them in decisions.modelRefs
- Pick a sensible default_model based on overall performance
- Decide when CoT prompting is worth the latency/cost tradeoff
- Catch regressions when models, prompts, or parameters change
@@ -31,9 +31,9 @@ see code in [/src/training/model_eval](https://github.com/vllm-project/semantic-
#### 3) Generate an updated config.yaml
-- Rank models per category into categories.model_scores
+- Create decisions for each category with modelRefs
- Set default_model to the best average performer
-- Keep or apply category-level reasioning settings
+- Keep or apply decision-level reasoning settings
## 1.Prerequisites
@@ -244,18 +244,24 @@ python src/training/model_eval/result_to_config.py \
- Reads all `analysis.json` files, extracting analysis["category_accuracy"]
- Constructs a new config:
- - **categories**: For each category present in results, ranks models by accuracy:
- - **category.model_scores** = `[{ model: "Model_Name", score: 0.87 }, ...]`, highest first
+ - **categories**: Creates simplified category definitions (name only)
+ - **decisions**: For each category present in results, creates a decision with:
+ - **rules**: Domain-based routing conditions
+ - **modelRefs**: Models ranked by accuracy (no score field)
+ - **plugins**: System prompt and other configurations
- **default_model**: the best average performer across categories
- - **category reasoning settings**: auto-filled from a built-in mapping (you can adjust after generation)
- - math, physics, chemistry, CS, engineering -> high reasoning
+ - **decision reasoning settings**: auto-filled from a built-in mapping (you can adjust after generation)
+ - math, physics, chemistry, CS, engineering -> high reasoning
- others default -> low/medium
- Leaves out any special βautoβ placeholder models if present
### Schema alignment
-- **categories[].name**: the MMLU-Pro category string
-- **categories[].model_scores**: descending ranking by accuracy for that category
+- **categories[].name**: the MMLU-Pro category string (simplified, no model_scores)
+- **decisions[].name**: matches category name
+- **decisions[].modelRefs**: models ranked by accuracy for that category (no score field)
+- **decisions[].rules**: domain-based routing conditions
+- **decisions[].plugins**: system_prompt and other policy configurations
- **default_model**: a top performer across categories (approach suffix removed, e.g., gemma3:27b from gemma3:27b:direct)
- Keeps other config sections (semantic_cache, tools, classifier, prompt_guard) with reasonable defaults; you can edit them post-generation if your environment differs
@@ -264,7 +270,7 @@ python src/training/model_eval/result_to_config.py \
- This script only work with results from **MMLU_Pro** Evaluation.
- Existing config.yaml can be overwritten. Consider writing to a temp file first and diffing:
- `--output-file config/config.eval.yaml`
-- If your production config.yaml carries **environment-specific settings (endpoints, pricing, policies)**, port the evaluated `categories[].model_scores` and `default_model` back into your canonical config.
+- If your production config.yaml carries **environment-specific settings (endpoints, pricing, policies)**, port the evaluated `decisions[].modelRefs` and `default_model` back into your canonical config.
### Example config.eval.yaml
see more about config at [configuration](https://vllm-semantic-router.com/docs/installation/configuration)
@@ -310,35 +316,75 @@ classifier:
pii_mapping_path: models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json
categories:
- name: business
- use_reasoning: false
- reasoning_description: Business content is typically conversational
+- name: law
+- name: engineering
+
+decisions:
+- name: business
+ description: "Route business queries"
+ priority: 10
reasoning_effort: low
- model_scores:
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
- model: phi4
- score: 0.2
+ use_reasoning: false
- model: qwen3-0.6B
- score: 0.0
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "Business content is typically conversational"
+ mode: "replace"
+
- name: law
- use_reasoning: false
- reasoning_description: Legal content is typically explanatory
+ description: "Route legal queries"
+ priority: 10
reasoning_effort: medium
- model_scores:
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
- model: phi4
- score: 0.8
+ use_reasoning: false
- model: qwen3-0.6B
- score: 0.2
+ use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "Legal content is typically explanatory"
+ mode: "replace"
# Ignore some categories here
- name: engineering
- use_reasoning: true
- reasoning_description: Engineering problems require systematic problem-solving
+ description: "Route engineering queries"
+ priority: 10
reasoning_effort: high
- model_scores:
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "engineering"
+ modelRefs:
- model: phi4
- score: 0.6
+ use_reasoning: true
- model: qwen3-0.6B
- score: 0.2
+ use_reasoning: true
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "Engineering problems require systematic problem-solving"
+ mode: "replace"
+
default_reasoning_effort: medium
default_model: phi4
```
diff --git a/website/docs/tutorials/intelligent-route/domain-routing.md b/website/docs/tutorials/intelligent-route/domain-routing.md
index 2ed3b6536..612ab5d3e 100644
--- a/website/docs/tutorials/intelligent-route/domain-routing.md
+++ b/website/docs/tutorials/intelligent-route/domain-routing.md
@@ -49,48 +49,124 @@ classifier:
categories:
- name: math
- system_prompt: "You are a mathematics expert. Provide step-by-step solutions."
- model_scores:
+ - name: physics
+ - name: computer science
+ - name: business
+ - name: health
+ - name: law
+
+decisions:
+ - name: math
+ description: "Route mathematical queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
- model: qwen3
- score: 1.0
use_reasoning: true
-
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a mathematics expert. Provide step-by-step solutions."
+ mode: "replace"
+
- name: physics
- system_prompt: "You are a physics expert with deep understanding of physical laws."
- model_scores:
+ description: "Route physics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
- model: qwen3
- score: 0.7
use_reasoning: true
-
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a physics expert with deep understanding of physical laws."
+ mode: "replace"
+
- name: computer science
- system_prompt: "You are a computer science expert with knowledge of algorithms and data structures."
- model_scores:
+ description: "Route computer science queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "computer science"
+ modelRefs:
- model: qwen3
- score: 0.6
use_reasoning: false
-
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a computer science expert with knowledge of algorithms and data structures."
+ mode: "replace"
+
- name: business
- system_prompt: "You are a senior business consultant and strategic advisor."
- model_scores:
+ description: "Route business queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
- model: qwen3
- score: 0.7
use_reasoning: false
-
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a senior business consultant and strategic advisor."
+ mode: "replace"
+
- name: health
- system_prompt: "You are a health and medical information expert."
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95
- model_scores:
+ description: "Route health queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
- model: qwen3
- score: 0.5
use_reasoning: false
-
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a health and medical information expert."
+ mode: "replace"
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95
+
- name: law
- system_prompt: "You are a knowledgeable legal expert."
- model_scores:
+ description: "Route legal queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
- model: qwen3
- score: 0.4
use_reasoning: false
+ plugins:
+ - type: "system_prompt"
+ configuration:
+ enabled: true
+ system_prompt: "You are a knowledgeable legal expert."
+ mode: "replace"
default_model: qwen3
```
@@ -170,38 +246,116 @@ curl -X POST http://localhost:8801/v1/chat/completions \
### STEM Domains (Reasoning Enabled)
```yaml
+decisions:
- name: math
- use_reasoning: true # Step-by-step solutions
- score: 1.0 # Highest priority
+ description: "Route mathematical queries"
+ priority: 10 # Highest priority
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "math"
+ modelRefs:
+ - model: qwen3
+ use_reasoning: true # Step-by-step solutions
+
- name: physics
- use_reasoning: true # Derivations and proofs
- score: 0.7
+ description: "Route physics queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "physics"
+ modelRefs:
+ - model: qwen3
+ use_reasoning: true # Derivations and proofs
+
- name: chemistry
- use_reasoning: true # Reaction mechanisms
- score: 0.6
+ description: "Route chemistry queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "chemistry"
+ modelRefs:
+ - model: qwen3
+ use_reasoning: true # Reaction mechanisms
```
### Professional Domains (PII + Caching)
```yaml
+decisions:
- name: health
- semantic_cache_enabled: true
- semantic_cache_similarity_threshold: 0.95 # Very strict
- pii_detection_enabled: true
+ description: "Route health queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "health"
+ modelRefs:
+ - model: qwen3
+ use_reasoning: false
+ plugins:
+ - type: "semantic-cache"
+ configuration:
+ enabled: true
+ similarity_threshold: 0.95 # Very strict
+ - type: "pii"
+ configuration:
+ enabled: true
+
- name: law
- score: 0.4 # Conservative routing
- pii_detection_enabled: true
+ description: "Route legal queries"
+ priority: 5 # Conservative routing
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "law"
+ modelRefs:
+ - model: qwen3
+ use_reasoning: false
+ plugins:
+ - type: "pii"
+ configuration:
+ enabled: true
```
### General Domains (Fast + Cached)
```yaml
+decisions:
- name: business
- use_reasoning: false # Fast responses
- score: 0.7
+ description: "Route business queries"
+ priority: 10
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "business"
+ modelRefs:
+ - model: qwen3
+ use_reasoning: false # Fast responses
+
- name: other
- semantic_cache_similarity_threshold: 0.75 # Relaxed
- score: 0.7
+ description: "Route general queries"
+ priority: 5
+ rules:
+ operator: "OR"
+ conditions:
+ - type: "domain"
+ name: "other"
+ modelRefs:
+ - model: qwen3
+ use_reasoning: false
+ plugins:
+ - type: "semantic-cache"
+ configuration:
+ similarity_threshold: 0.75 # Relaxed
```
## Performance Characteristics
diff --git a/website/docs/tutorials/intelligent-route/embedding-routing.md b/website/docs/tutorials/intelligent-route/embedding-routing.md
index 7c3305002..2c7d0ac98 100644
--- a/website/docs/tutorials/intelligent-route/embedding-routing.md
+++ b/website/docs/tutorials/intelligent-route/embedding-routing.md
@@ -87,7 +87,7 @@ categories:
score: 0.9
use_reasoning: true
jailbreak_enabled: true
- pii_detection_enabled: true
+ pii_enabled: true
- name: product_inquiry
system_prompt: "You are a product specialist."
diff --git a/website/sidebars.ts b/website/sidebars.ts
index ebb6d89fa..36005ff46 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -131,6 +131,7 @@ const sidebars: SidebarsConfig = {
items: [
'api/router',
'api/classification',
+ 'api/crd-reference',
],
},
{