Baseline and autoscaling load tests.

pantierra · pantierra · commit b963ad07f6e6 · 2025-12-04T15:16:24.000+01:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -68,9 +68,6 @@ jobs:
       - name: Run notification tests
         run: ./eoapi-cli test notification
 
-      - name: Run autoscaling tests
-        run: ./eoapi-cli test autoscaling
-
       - name: Debug failed deployment
         if: failure()
         run: ./eoapi-cli deployment debug
diff --git a/eoapi-cli b/eoapi-cli
@@ -58,12 +58,12 @@ EXAMPLES:
     # Run integration tests only
     eoapi-cli test integration
 
-    # Run autoscaling tests only
-    eoapi-cli test autoscaling
-
     # Run load tests
     eoapi-cli load all
 
+    # Run autoscaling load tests only
+    eoapi-cli load autoscaling
+
     # Ingest sample data
     eoapi-cli ingest sample-data
 
diff --git a/scripts/load.sh b/scripts/load.sh
@@ -22,6 +22,7 @@ USAGE:
 COMMANDS:
     baseline        Low load, verify monitoring works
     services        Test each service individually
+    autoscaling     Test HPA scaling under load
     mixed           Realistic scenario
     stress          Find breaking points
     soak            Long-running stability
@@ -41,21 +42,167 @@ EXAMPLES:
     # Test individual services
     $(basename "$0") services --debug
 
+    # Test autoscaling behavior
+    $(basename "$0") autoscaling --debug
+
     # Run all load tests
     $(basename "$0") all
 EOF
 }
 
+get_base_url() {
+    # Try localhost first (most common in local dev)
+    if curl -s -f -m 3 "http://localhost/stac" >/dev/null 2>&1; then
+        echo "http://localhost"
+        return 0
+    fi
+
+    # Try ingress if configured
+    local host
+    host=$(kubectl get ingress -n "$NAMESPACE" -o jsonpath='{.items[0].spec.rules[0].host}' 2>/dev/null || echo "")
+    if [[ -n "$host" ]] && curl -s -f -m 3 "http://$host/stac" >/dev/null 2>&1; then
+        echo "http://$host"
+        return 0
+    fi
+
+    return 1
+}
+
+test_endpoint() {
+    local url="$1"
+    local duration="${2:-30}"
+    local concurrency="${3:-2}"
+
+    if ! command_exists hey; then
+        log_error "hey not found. Install with: go install github.com/rakyll/hey@latest"
+        return 1
+    fi
+
+    log_info "Testing $url (${duration}s, ${concurrency}c)"
+    hey -z "${duration}s" -c "$concurrency" "$url" 2>/dev/null | grep -E "(Total:|Requests/sec:|Average:|Status code)"
+}
+
+monitor_during_test() {
+    local duration="$1"
+    log_info "Monitor with: watch kubectl get pods -n $NAMESPACE"
+    sleep "$duration" &
+    local sleep_pid=$!
+
+    # Show initial state
+    kubectl get hpa -n "$NAMESPACE" 2>/dev/null | head -2 || true
+
+    wait $sleep_pid
+}
+
 load_baseline() {
     log_info "Running baseline load test..."
-    # TODO: Implement baseline load testing
+
+    validate_cluster || return 1
+    validate_namespace "$NAMESPACE" || return 1
+
+    local base_url
+    if ! base_url=$(get_base_url); then
+        log_error "Cannot reach eoAPI endpoints"
+        return 1
+    fi
+    log_info "Using base URL: $base_url"
+
+    # Wait for deployments
+    for service in stac raster vector; do
+        kubectl wait --for=condition=Available deployment/"${RELEASE_NAME}-${service}" -n "$NAMESPACE" --timeout=60s 2>/dev/null || \
+            log_warn "Service $service may not be ready"
+    done
+
+    log_info "Running light load tests..."
+    log_info "Monitor pods: kubectl get pods -n $NAMESPACE -w"
+
+    # STAC collections (30s, 2 concurrent)
+    test_endpoint "$base_url/stac/collections" &
+    monitor_during_test 30
+    wait
+
+    # STAC search (60s, 3 concurrent)
+    if command_exists curl && command_exists hey; then
+        log_info "Testing STAC search (60s, 3c)"
+        hey -z 60s -c 3 -m POST -H "Content-Type: application/json" -d '{"limit":10}' "$base_url/stac/search" 2>/dev/null | \
+            grep -E "(Total:|Requests/sec:|Average:|Status code)"
+    fi
+
+    # Health checks
+    test_endpoint "$base_url/raster/healthz"
+    test_endpoint "$base_url/vector/healthz"
+
+    log_success "Baseline load test completed"
 }
 
 load_services() {
     log_info "Running service-specific load tests..."
     # TODO: Implement individual service testing
 }
 
+load_autoscaling() {
+    log_info "Running autoscaling tests..."
+
+    validate_cluster || return 1
+    validate_namespace "$NAMESPACE" || return 1
+
+    # Check HPA exists
+    if ! kubectl get hpa -n "$NAMESPACE" >/dev/null 2>&1 || [[ $(kubectl get hpa -n "$NAMESPACE" --no-headers 2>/dev/null | wc -l) -eq 0 ]]; then
+        log_error "No HPA resources found. Deploy with autoscaling enabled."
+        return 1
+    fi
+
+    # Check metrics server
+    if ! kubectl get deployment -A | grep -q metrics-server; then
+        log_error "metrics-server required for autoscaling tests"
+        return 1
+    fi
+
+    local base_url
+    if ! base_url=$(get_base_url); then
+        log_error "Cannot reach eoAPI endpoints"
+        return 1
+    fi
+    log_info "Using base URL: $base_url"
+
+    # Wait for services
+    for service in stac raster vector; do
+        kubectl wait --for=condition=Available deployment/"${RELEASE_NAME}-${service}" -n "$NAMESPACE" --timeout=90s || return 1
+    done
+
+    log_info "Current HPA status:"
+    kubectl get hpa -n "$NAMESPACE"
+
+    log_info "Generating sustained load to trigger autoscaling..."
+
+    # Generate load that should trigger HPA (10 min, 15 concurrent)
+    if command_exists hey; then
+        log_info "Starting sustained load test (10 minutes)..."
+        hey -z 600s -c 15 "$base_url/stac/search" -m POST \
+            -H "Content-Type: application/json" -d '{"limit":100}' &
+        local load_pid=$!
+
+        # Monitor HPA changes every 30s
+        log_info "Monitoring HPA scaling..."
+        for i in {1..20}; do
+            sleep 30
+            log_info "HPA status after ${i}x30s:"
+            kubectl get hpa -n "$NAMESPACE" --no-headers | awk '{print $1 ": " $6 "/" $7 " replicas, CPU: " $3}'
+        done
+
+        # Stop load test
+        kill $load_pid 2>/dev/null || true
+        wait $load_pid 2>/dev/null || true
+
+        log_info "Final HPA status:"
+        kubectl get hpa -n "$NAMESPACE"
+        log_success "Autoscaling test completed"
+    else
+        log_error "hey required for autoscaling tests"
+        return 1
+    fi
+}
+
 load_mixed() {
     log_info "Running mixed load test scenario..."
     # TODO: Implement realistic mixed scenario
@@ -83,6 +230,7 @@ load_all() {
 
     load_baseline || ((failed++))
     load_services || ((failed++))
+    load_autoscaling || ((failed++))
     load_mixed || ((failed++))
     load_stress || ((failed++))
     load_soak || ((failed++))
@@ -119,7 +267,7 @@ main() {
                 RELEASE_NAME="$2"
                 shift 2
                 ;;
-            baseline|services|mixed|stress|soak|chaos|all)
+            baseline|services|autoscaling|mixed|stress|soak|chaos|all)
                 command="$1"
                 shift
                 break
@@ -141,6 +289,9 @@ main() {
         services)
             load_services
             ;;
+        autoscaling)
+            load_autoscaling
+            ;;
         mixed)
             load_mixed
             ;;
diff --git a/scripts/test.sh b/scripts/test.sh
@@ -27,7 +27,6 @@ COMMANDS:
     unit            Run Helm unit tests
     integration     Run integration tests with pytest
     notification    Run notification tests with database access
-    autoscaling     Run autoscaling tests with pytest
     all             Run all tests
 
 OPTIONS:
@@ -50,9 +49,6 @@ EXAMPLES:
     # Run integration tests with debug
     $(basename "$0") integration --debug
 
-    # Run autoscaling tests with debug
-    $(basename "$0") autoscaling --debug
-
     # Run all tests
     $(basename "$0") all
 EOF
@@ -123,13 +119,7 @@ test_integration() {
     "${SCRIPT_DIR}/test/integration.sh" "$pytest_args"
 }
 
-test_autoscaling() {
-    local pytest_args="${1:-}"
-    export NAMESPACE="$NAMESPACE"
-    export RELEASE_NAME="$RELEASE_NAME"
-    export DEBUG_MODE="$DEBUG_MODE"
-    "${SCRIPT_DIR}/test/autoscaling.sh" "$pytest_args"
-}
+
 
 test_notification() {
     local pytest_args="${1:-}"
@@ -150,7 +140,6 @@ test_all() {
 
     if validate_cluster 2>/dev/null; then
         test_integration || ((failed++))
-        test_autoscaling || ((failed++))
         test_notification || ((failed++))
     else
         log_warn "Skipping integration tests - no cluster connection"
@@ -192,7 +181,7 @@ main() {
                 pytest_args="$2"
                 shift 2
                 ;;
-            schema|lint|unit|notification|integration|autoscaling|all)
+            schema|lint|unit|notification|integration|all)
                 command="$1"
                 shift
                 break
@@ -223,9 +212,6 @@ main() {
         notification)
             test_notification "$pytest_args"
             ;;
-        autoscaling)
-            test_autoscaling "$pytest_args"
-            ;;
         all)
             test_all
             ;;