From 500ff78d0390684ca7b109af71e0ce190ff1e68b Mon Sep 17 00:00:00 2001 From: Arthur De Magalhaes Date: Wed, 5 Nov 2025 18:15:06 -0500 Subject: [PATCH 1/3] Adding Knative docs and files Signed-off-by: Arthur De Magalhaes --- deployment/knative/KNATIVE_SCALE_TO_ZERO.md | 284 ++++++++++++++++++ deployment/knative/knative-serving.yaml | 65 ++++ .../knative/mcpgateway-knative-service.yaml | 125 ++++++++ deployment/knative/postgres-config.yaml | 17 ++ 4 files changed, 491 insertions(+) create mode 100644 deployment/knative/KNATIVE_SCALE_TO_ZERO.md create mode 100644 deployment/knative/knative-serving.yaml create mode 100644 deployment/knative/mcpgateway-knative-service.yaml create mode 100644 deployment/knative/postgres-config.yaml diff --git a/deployment/knative/KNATIVE_SCALE_TO_ZERO.md b/deployment/knative/KNATIVE_SCALE_TO_ZERO.md new file mode 100644 index 000000000..fb4f3b474 --- /dev/null +++ b/deployment/knative/KNATIVE_SCALE_TO_ZERO.md @@ -0,0 +1,284 @@ +# Knative Scale-to-Zero Setup for mcpgateway + +## Overview +This document describes the Knative Serving configuration that enables scale-to-zero functionality for the mcpgateway application on Kubernetes clusters (including OpenShift). + +## Prerequisites + +- Kubernetes cluster (1.28+) or OpenShift (4.12+) +- Knative Serving installed ([installation guide](https://knative.dev/docs/install/)) +- kubectl or oc CLI configured + +## Components + +### 1. PostgreSQL Configuration +**File:** [`postgres-config.yaml`](postgres-config.yaml) +**Namespace:** `mcp-gateway` + +ConfigMap containing PostgreSQL connection settings. **Important:** Update these values before deploying: +- `POSTGRES_HOST`: PostgreSQL service hostname +- `POSTGRES_PORT`: PostgreSQL port (default: 5432) +- `POSTGRES_DB`: Database name +- `POSTGRES_USER`: Database username +- `POSTGRES_PASSWORD`: Database password (use Kubernetes Secrets in production) + +### 2. KnativeServing Custom Resource +**File:** [`knative-serving.yaml`](knative-serving.yaml) +**Namespace:** `knative-serving` + +This resource configures the Knative Serving platform with: +- **Scale-to-zero enabled**: Pods automatically scale down to 0 when idle +- **30-second grace period**: Pods remain running for 30 seconds after the last request +- **High availability**: 1 replica for control plane components +- **Ingress configuration**: Commented out by default - configure based on your setup +- **Autoscaling parameters**: + - Target concurrency: 100 requests per pod + - Stable window: 60 seconds + - Panic window: 6 seconds + +**Note:** The ingress configuration is commented out. Uncomment and configure based on your ingress controller (Kourier, Istio, or Contour). OpenShift users don't need to configure this as it's handled automatically by the Serverless Operator. + +### 3. Knative Service for mcpgateway +**File:** [`mcpgateway-knative-service.yaml`](mcpgateway-knative-service.yaml) +**Namespace:** `mcp-gateway` + +This replaces the traditional Deployment with a Knative Service that includes: +- **Min scale: 0** - Allows scaling to zero pods +- **Max scale: 1** - Maximum of 1 pod under load (adjust as needed) +- **Container concurrency: 100** - Up to 100 concurrent requests per pod +- **Scale-to-zero retention: 30s** - Keeps pods alive for 30 seconds after traffic stops +- **Health checks**: Readiness and liveness probes for proper traffic routing +- **Database config**: References `postgres-config` ConfigMap for connection settings + +## Deployment Steps + +### 1. Install Knative Serving and Ingress Controller + +**For vanilla Kubernetes:** +```bash +# Install Knative Serving +kubectl apply -f https://github.com/knative/serving/releases/download/knative-v1.12.0/serving-crds.yaml +kubectl apply -f https://github.com/knative/serving/releases/download/knative-v1.12.0/serving-core.yaml + +# Install Kourier (recommended lightweight ingress) +kubectl apply -f https://github.com/knative/net-kourier/releases/download/knative-v1.12.0/kourier.yaml + +# Configure Knative to use Kourier +kubectl patch configmap/config-network \ + --namespace knative-serving \ + --type merge \ + --patch '{"data":{"ingress-class":"kourier.ingress.networking.knative.dev"}}' +``` + +**For OpenShift:** +```bash +# Install OpenShift Serverless Operator from OperatorHub +# Then create KnativeServing instance (ingress is auto-configured) +``` + +### 2. Create namespace +```bash +kubectl create namespace mcp-gateway +``` + +### 3. Deploy PostgreSQL configuration +```bash +# Edit postgres-config.yaml with your database credentials first! +kubectl apply -f postgres-config.yaml +``` + +**Security Note:** For production, use Kubernetes Secrets instead of ConfigMap: +```bash +kubectl create secret generic postgres-credentials \ + --from-literal=POSTGRES_PASSWORD=your-secure-password \ + -n mcp-gateway +``` + +Then update the Knative Service to reference the Secret instead of ConfigMap. + +### 4. Deploy Knative Serving configuration (optional) +```bash +# This step is optional - only needed if you want to customize +# autoscaling parameters beyond defaults +kubectl apply -f knative-serving.yaml +``` + +**Note:** For vanilla Kubernetes, you may need to uncomment and configure the `ingress-class` setting in [`knative-serving.yaml`](knative-serving.yaml:48) to match your installed ingress controller. + +### 5. Deploy the mcpgateway service +```bash +kubectl apply -f mcpgateway-knative-service.yaml +``` + +### 6. Verify deployment +```bash +# Check service status +kubectl get ksvc mcpgateway -n mcp-gateway + +# Check revisions +kubectl get revision -n mcp-gateway + +# Expected output when idle (scale-to-zero active): +# NAME CONFIG NAME GENERATION READY ACTUAL REPLICAS DESIRED REPLICAS +# mcpgateway-00001 mcpgateway 1 True 0 0 +``` + +## Checking Status + +```bash +# For OpenShift: +$ oc get ksvc mcpgateway -n mcp-gateway + +# For vanilla Kubernetes: +$ kubectl get ksvc mcpgateway -n mcp-gateway + +# Check revisions: +$ kubectl get revision -n mcp-gateway +NAME CONFIG NAME GENERATION READY ACTUAL REPLICAS DESIRED REPLICAS +mcpgateway-00001 mcpgateway 1 True 0 0 +``` + +✅ **Scale-to-zero is active**: The service shows 0 actual and 0 desired replicas when idle. + +## How It Works + +1. **Idle State**: When no traffic is received, Knative scales the pods to 0 after the grace period +2. **Cold Start**: When a request arrives, Knative automatically spins up a pod +3. **Active State**: Pods handle requests and scale based on concurrency +4. **Scale Down**: After 30 seconds of no traffic, pods scale back to 0 + +## Accessing the Service + +The service is accessible via the Knative-managed route. The exact URL depends on your cluster's domain configuration: +- **OpenShift**: `https://mcpgateway-mcp-gateway.apps.` +- **Vanilla Kubernetes**: Depends on your ingress configuration and domain setup + +When you make a request: +1. If scaled to zero, there will be a brief cold-start delay (typically 5-15 seconds) +2. The pod will start and handle the request +3. Subsequent requests will be fast while the pod is running +4. After 30 seconds of inactivity, the pod will terminate + +## Monitoring Scale-to-Zero + +### Check current pod count: +```bash +kubectl get pods -n mcp-gateway -l serving.knative.dev/service=mcpgateway +``` + +### Watch pods scale up/down: +```bash +kubectl get pods -n mcp-gateway -l serving.knative.dev/service=mcpgateway -w +``` + +### Check revision status: +```bash +kubectl get revision -n mcp-gateway +``` + +### View Knative Service details: +```bash +kubectl describe ksvc mcpgateway -n mcp-gateway +``` + +**Note:** OpenShift users can use `oc` instead of `kubectl` for all commands. + +## Configuration Parameters + +Key autoscaling annotations in the Knative Service: + +| Annotation | Value | Description | +|------------|-------|-------------| +| `autoscaling.knative.dev/min-scale` | `0` | Minimum pods (enables scale-to-zero) | +| `autoscaling.knative.dev/max-scale` | `10` | Maximum pods under load | +| `autoscaling.knative.dev/target` | `100` | Target concurrent requests per pod | +| `autoscaling.knative.dev/scale-to-zero-pod-retention-period` | `30s` | Time to keep pods after last request | +| `autoscaling.knative.dev/metric` | `concurrency` | Metric used for scaling decisions | + +## Troubleshooting + +### Service not scaling to zero +```bash +# Check if there's active traffic +kubectl get podautoscaler -n mcp-gateway + +# Check Knative autoscaler logs +kubectl logs -n knative-serving -l app=autoscaler +``` + +### Cold start taking too long +```bash +# Check pod startup time +kubectl get pods -n mcp-gateway -l serving.knative.dev/service=mcpgateway -w + +# Review readiness probe configuration +kubectl describe ksvc mcpgateway -n mcp-gateway +``` + +### Service not ready +```bash +# Check Knative Service status +kubectl get ksvc mcpgateway -n mcp-gateway -o yaml + +# Check revision status +kubectl describe revision -n mcp-gateway +``` + +## Reverting to Standard Deployment + +If you need to revert to a standard Kubernetes Deployment: + +1. Delete the Knative Service: + ```bash + kubectl delete ksvc mcpgateway -n mcp-gateway + ``` + +2. Recreate the original Deployment and Service from your backup or version control + +## Platform-Specific Notes + +### Vanilla Kubernetes +- **Must install Knative Serving and an ingress controller** (Kourier recommended): [Installation Guide](https://knative.dev/docs/install/) +- Configure DNS or use Magic DNS (xip.io/nip.io/sslip.io) for local development +- Uncomment and set `ingress-class` in [`knative-serving.yaml`](knative-serving.yaml:48) to match your ingress controller +- Supported ingress controllers: + - **Kourier** (recommended): Lightweight, Knative-specific + - **Istio**: Full service mesh with advanced features + - **Contour**: Envoy-based, good balance of features and performance + +### OpenShift +- **Install OpenShift Serverless Operator** from OperatorHub (includes Knative + Kourier) +- Ingress is automatically configured - no need to modify [`knative-serving.yaml`](knative-serving.yaml:48) +- OpenShift Routes are automatically created and managed +- Can use `oc` instead of `kubectl` for all commands +- No separate ingress controller installation needed + +## Security Best Practices + +1. **Use Secrets for sensitive data:** + ```bash + kubectl create secret generic postgres-credentials \ + --from-literal=POSTGRES_PASSWORD=secure-password \ + -n mcp-gateway + ``` + +2. **Update the Knative Service to use Secrets:** + ```yaml + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: postgres-credentials + key: POSTGRES_PASSWORD + ``` + +3. **Use network policies to restrict database access** +4. **Enable TLS for the Knative Service route** +5. **Regularly rotate credentials** + +## Additional Resources + +- [Knative Serving Documentation](https://knative.dev/docs/serving/) +- [Knative Autoscaling](https://knative.dev/docs/serving/autoscaling/) +- [Knative Installation Guide](https://knative.dev/docs/install/) +- [OpenShift Serverless Documentation](https://docs.openshift.com/serverless/) +- [Kubernetes Secrets](https://kubernetes.io/docs/concepts/configuration/secret/) \ No newline at end of file diff --git a/deployment/knative/knative-serving.yaml b/deployment/knative/knative-serving.yaml new file mode 100644 index 000000000..491472bcc --- /dev/null +++ b/deployment/knative/knative-serving.yaml @@ -0,0 +1,65 @@ +apiVersion: operator.knative.dev/v1beta1 +kind: KnativeServing +metadata: + name: knative-serving + namespace: knative-serving +spec: + # High availability configuration + high-availability: + replicas: 1 + + # Ingress configuration + # ingress: + # kourier: + # enabled: true + + # Configuration for scale-to-zero + config: + autoscaler: + # Enable scale to zero + enable-scale-to-zero: "true" + # Time window for stable mode (default: 60s) + stable-window: "60s" + # Time window for panic mode (default: 6s) + panic-window: "6s" + # Target concurrency per pod (default: 100) + container-concurrency-target-default: "100" + # Percentage of target to maintain (default: 70) + container-concurrency-target-percentage: "70" + # Scale down delay after last request (default: 0s for immediate scale down) + scale-to-zero-grace-period: "30s" + # Pod retention time after scale to zero decision (default: 0s) + scale-to-zero-pod-retention-period: "0s" + + deployment: + # Progress deadline for deployments + progress-deadline: "600s" + # QPS settings for Kubernetes API + qps-burst: "200" + qps: "100" + + network: + # Ingress class - configure based on your ingress controller: + # - Kourier: "kourier.ingress.networking.knative.dev" + # - Istio: "istio.ingress.networking.knative.dev" + # - Contour: "contour.ingress.networking.knative.dev" + # OpenShift: Automatically configured by Serverless Operator + # Comment out or remove if using default ingress + # ingress-class: "kourier.ingress.networking.knative.dev" + + # Domain template for routes + domain-template: "{{.Name}}-{{.Namespace}}.{{.Domain}}" + # Enable HTTP2 + enable-http2: "true" + # Autocreate cluster domain claims + autocreate-cluster-domain-claims: "true" + + observability: + # Enable request logging + logging.enable-request-log: "true" + # Request log template + logging.request-log-template: '{"httpRequest": {"requestMethod": "{{.Request.Method}}", "requestUrl": "{{js .Request.RequestURI}}", "requestSize": "{{.Request.ContentLength}}", "status": {{.Response.Code}}, "responseSize": "{{.Response.Size}}", "userAgent": "{{js .Request.UserAgent}}", "remoteIp": "{{js .Request.RemoteAddr}}", "serverIp": "{{.Revision.PodIP}}", "referer": "{{js .Request.Referer}}", "latency": "{{.Response.Latency}}s", "protocol": "{{.Request.Proto}}"}, "traceId": "{{index .Request.Header "X-B3-Traceid"}}"}' + # Metrics backend + metrics.backend-destination: "prometheus" + # Enable profiling + profiling.enable: "false" \ No newline at end of file diff --git a/deployment/knative/mcpgateway-knative-service.yaml b/deployment/knative/mcpgateway-knative-service.yaml new file mode 100644 index 000000000..877cd2c3d --- /dev/null +++ b/deployment/knative/mcpgateway-knative-service.yaml @@ -0,0 +1,125 @@ +apiVersion: serving.knative.dev/v1 +kind: Service +metadata: + name: mcpgateway + namespace: mcp-gateway +spec: + template: + metadata: + annotations: + # Enable scale to zero + autoscaling.knative.dev/enable-scale-to-zero: "true" + # Scale down to zero after 30 seconds of no traffic + autoscaling.knative.dev/scale-to-zero-pod-retention-period: "30s" + # Minimum number of instances (0 for scale-to-zero) + autoscaling.knative.dev/min-scale: "0" + # Maximum number of instances + autoscaling.knative.dev/max-scale: "1" + # Target concurrency per pod + autoscaling.knative.dev/target: "100" + # Metric for autoscaling (concurrency or rps) + autoscaling.knative.dev/metric: "concurrency" + # Window for stable mode + autoscaling.knative.dev/window: "60s" + # Autoscaling class + autoscaling.knative.dev/class: "kpa.autoscaling.knative.dev" + # Target utilization percentage + autoscaling.knative.dev/target-utilization-percentage: "70" + spec: + # Timeout for requests (important for scale-to-zero) + timeoutSeconds: 300 + # Container concurrency (0 = unlimited, or set a specific value) + containerConcurrency: 100 + containers: + - name: gateway + image: ghcr.io/ibm/mcp-context-forge:latest + ports: + - containerPort: 4444 + protocol: TCP + env: + - name: HOST + value: "0.0.0.0" + - name: PORT + value: "4444" + - name: POSTGRES_HOST + valueFrom: + configMapKeyRef: + name: postgres-config + key: POSTGRES_HOST + optional: true + - name: POSTGRES_PORT + valueFrom: + configMapKeyRef: + name: postgres-config + key: POSTGRES_PORT + optional: true + - name: POSTGRES_DB + valueFrom: + configMapKeyRef: + name: postgres-config + key: POSTGRES_DB + optional: true + - name: POSTGRES_USER + valueFrom: + configMapKeyRef: + name: postgres-config + key: POSTGRES_USER + optional: true + - name: POSTGRES_PASSWORD + valueFrom: + configMapKeyRef: + name: postgres-config + key: POSTGRES_PASSWORD + optional: true + - name: GUNICORN_CMD_ARGS + value: --bind=0.0.0.0:4444 + - name: MCPGATEWAY_UI_ENABLED + value: "true" + - name: MCPGATEWAY_ADMIN_API_ENABLED + value: "true" + envFrom: + - configMapRef: + name: mcpgateway-env + optional: true + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 100m + memory: 256Mi + # Readiness probe - critical for Knative to know when pod is ready + readinessProbe: + httpGet: + path: /health + port: 4444 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 + successThreshold: 1 + failureThreshold: 3 + # Liveness probe + livenessProbe: + httpGet: + path: /health + port: 4444 + initialDelaySeconds: 120 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 5 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /app/data + name: data-volume + volumes: + - name: data-volume + emptyDir: {} \ No newline at end of file diff --git a/deployment/knative/postgres-config.yaml b/deployment/knative/postgres-config.yaml new file mode 100644 index 000000000..733314696 --- /dev/null +++ b/deployment/knative/postgres-config.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: postgres-config + namespace: mcp-gateway + labels: + app: postgres +data: + # PostgreSQL connection settings + # IMPORTANT: Change these values for your deployment + POSTGRES_HOST: postgresql + POSTGRES_PORT: "5432" + POSTGRES_DB: mcp + POSTGRES_USER: postgres + POSTGRES_PASSWORD: changeme + +# Made with Bob From a2ff0baa5f1e658a42ecdc6e01c07293b7c282b1 Mon Sep 17 00:00:00 2001 From: Arthur De Magalhaes Date: Wed, 5 Nov 2025 18:16:27 -0500 Subject: [PATCH 2/3] Formatting Signed-off-by: Arthur De Magalhaes --- deployment/knative/postgres-config.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/deployment/knative/postgres-config.yaml b/deployment/knative/postgres-config.yaml index 733314696..75e43bf8b 100644 --- a/deployment/knative/postgres-config.yaml +++ b/deployment/knative/postgres-config.yaml @@ -13,5 +13,3 @@ data: POSTGRES_DB: mcp POSTGRES_USER: postgres POSTGRES_PASSWORD: changeme - -# Made with Bob From dd8e15acc53fa91852012364a32fbbeef9288019 Mon Sep 17 00:00:00 2001 From: Arthur De Magalhaes Date: Wed, 5 Nov 2025 20:17:12 -0500 Subject: [PATCH 3/3] Formatting Signed-off-by: Arthur De Magalhaes --- deployment/knative/knative-serving.yaml | 29 ++++++++++++++----- .../knative/mcpgateway-knative-service.yaml | 2 +- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/deployment/knative/knative-serving.yaml b/deployment/knative/knative-serving.yaml index 491472bcc..38d289b2d 100644 --- a/deployment/knative/knative-serving.yaml +++ b/deployment/knative/knative-serving.yaml @@ -7,12 +7,12 @@ spec: # High availability configuration high-availability: replicas: 1 - + # Ingress configuration # ingress: # kourier: # enabled: true - + # Configuration for scale-to-zero config: autoscaler: @@ -30,14 +30,14 @@ spec: scale-to-zero-grace-period: "30s" # Pod retention time after scale to zero decision (default: 0s) scale-to-zero-pod-retention-period: "0s" - + deployment: # Progress deadline for deployments progress-deadline: "600s" # QPS settings for Kubernetes API qps-burst: "200" qps: "100" - + network: # Ingress class - configure based on your ingress controller: # - Kourier: "kourier.ingress.networking.knative.dev" @@ -46,20 +46,33 @@ spec: # OpenShift: Automatically configured by Serverless Operator # Comment out or remove if using default ingress # ingress-class: "kourier.ingress.networking.knative.dev" - + # Domain template for routes domain-template: "{{.Name}}-{{.Namespace}}.{{.Domain}}" # Enable HTTP2 enable-http2: "true" # Autocreate cluster domain claims autocreate-cluster-domain-claims: "true" - + observability: # Enable request logging logging.enable-request-log: "true" # Request log template - logging.request-log-template: '{"httpRequest": {"requestMethod": "{{.Request.Method}}", "requestUrl": "{{js .Request.RequestURI}}", "requestSize": "{{.Request.ContentLength}}", "status": {{.Response.Code}}, "responseSize": "{{.Response.Size}}", "userAgent": "{{js .Request.UserAgent}}", "remoteIp": "{{js .Request.RemoteAddr}}", "serverIp": "{{.Revision.PodIP}}", "referer": "{{js .Request.Referer}}", "latency": "{{.Response.Latency}}s", "protocol": "{{.Request.Proto}}"}, "traceId": "{{index .Request.Header "X-B3-Traceid"}}"}' + logging.request-log-template: >- + {"httpRequest": {"requestMethod": "{{.Request.Method}}", + "requestUrl": "{{js .Request.RequestURI}}", + "requestSize": "{{.Request.ContentLength}}", + "status": {{.Response.Code}}, + "responseSize": "{{.Response.Size}}", + "userAgent": "{{js .Request.UserAgent}}", + "remoteIp": "{{js .Request.RemoteAddr}}", + "serverIp": "{{.Revision.PodIP}}", + "referer": "{{js .Request.Referer}}", + "latency": "{{.Response.Latency}}s", + "protocol": "{{.Request.Proto}}"}, + "traceId": "{{index .Request.Header "X-B3-Traceid"}}"} # Metrics backend metrics.backend-destination: "prometheus" # Enable profiling - profiling.enable: "false" \ No newline at end of file + profiling.enable: "false" + diff --git a/deployment/knative/mcpgateway-knative-service.yaml b/deployment/knative/mcpgateway-knative-service.yaml index 877cd2c3d..e2c37e54a 100644 --- a/deployment/knative/mcpgateway-knative-service.yaml +++ b/deployment/knative/mcpgateway-knative-service.yaml @@ -122,4 +122,4 @@ spec: name: data-volume volumes: - name: data-volume - emptyDir: {} \ No newline at end of file + emptyDir: {}