EOPF-Explorer
diff --git a/‎.github/workflows/parallelism-smoke-test.yml‎
Lines changed: 47 additions & 0 deletions b/‎.github/workflows/parallelism-smoke-test.yml‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 18 additions & 0 deletions b/‎Makefile‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎reports/README.md‎
Lines changed: 54 additions & 0 deletions b/‎reports/README.md‎
Lines changed: 54 additions & 0 deletions
@@ -0,0 +1,47 @@
+name: Workflow Parallelism Smoke Test
+
+on:
+  pull_request:
+    paths:
+    - 'workflows/**'
+    - 'scripts/convert.py'
+    - 'scripts/register.py'
+  push:
+    branches:
+    - feat/workflow-parallelism
+
+jobs:
+  smoke-test:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+
+    - name: Install dependencies
+      run: |
+        pip install uv
+        uv sync
+
+    - name: Validate workflow template YAML
+      run: |
+        python -c "import yaml; yaml.safe_load(open('workflows/base/workflowtemplate.yaml'))"
+
+    - name: Test convert.py imports
+      run: |
+        uv run python -c "from scripts.convert import run_conversion; print('✓ convert.py imports OK')"
+
+    - name: Test register.py imports
+      run: |
+        uv run python -c "from scripts.register import run_registration; print('✓ register.py imports OK')"
+
+    - name: Validate kustomize overlays
+      run: |
+        if command -v kustomize &> /dev/null; then
+          kustomize build workflows/overlays/high-throughput
+        else
+          echo "Kustomize not available; skipping validation"
+        fi
@@ -61,3 +61,5 @@ Thumbs.db
 # Project-specific
 *.zarr
 out/
+reports/*
+!reports/README.md
@@ -49,3 +49,21 @@ clean:  ## Clean generated files and caches
 	find . -type f -name '*.pyc' -delete 2>/dev/null || true
 	rm -rf .pytest_cache .mypy_cache .ruff_cache htmlcov .coverage
 	@echo "✓ Clean complete"
+
+test:  ## Run tests with pytest
+	@echo "🧪 Running tests..."
+	uv run pytest tests/ -v
+
+validate-workflows:  ## Validate workflow YAML files
+	@echo "✓ Validating workflow templates..."
+	@python3 -c "import yaml; yaml.safe_load(open('workflows/base/workflowtemplate.yaml'))" && echo "  ✓ workflowtemplate.yaml"
+	@python3 -c "import yaml; yaml.safe_load(open('workflows/base/sensor.yaml'))" && echo "  ✓ sensor.yaml"
+	@python3 -c "import yaml; yaml.safe_load(open('workflows/base/eventsource.yaml'))" && echo "  ✓ eventsource.yaml"
+
+apply-staging:  ## Apply staging overlay to devseed-staging
+	@echo "📦 Applying staging overlay..."
+	kubectl apply -k workflows/overlays/staging
+
+apply-production:  ## Apply production overlay to devseed
+	@echo "📦 Applying production overlay..."
+	kubectl apply -k workflows/overlays/production
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
 [project]
 name = "data-pipeline"
 version = "1.0.0"
-description = "Minimal event-driven Argo Workflows pipeline for Sentinel-2 GeoZarr conversion and STAC registration"
+description = "Minimal event-driven Argo Workflows pipeline for Sentinel GeoZarr conversion and STAC registration"
 readme = "README.md"
 requires-python = ">=3.13"
 license = { text = "MIT" }
@@ -44,6 +44,7 @@ dev = [
     "mypy>=1.11.0",
     "pre-commit>=3.7.0",
     "types-boto3>=1.0.2",
+    "matplotlib>=3.7.0",
 ]
 
 [project.urls]
 
@@ -0,0 +1,54 @@
+# Benchmarking Reports
+
+Performance metrics and analysis for workflow scaling tests.
+
+## Structure
+
+```
+reports/
+├── baseline/          # Baseline workflow metrics
+│   └── baseline-metrics-*.json
+└── analysis/          # Statistical analysis outputs
+    ├── baseline-stats.json
+    └── baseline-plots.png
+```
+
+## Metrics Files
+
+Each `baseline-metrics-*.json` file contains:
+
+- **metadata**: Benchmark run details (start/end time, duration, namespace)
+- **captures**: Time-series data points
+  - **workflows**: Argo workflow counts (total, running, succeeded, failed)
+  - **nodes**: Kubernetes node resource allocation
+  - **rabbitmq**: Queue depths and consumer counts (if available)
+
+## Analysis Files
+
+Each `baseline-stats.json` file contains:
+
+- **metadata**: Benchmark run details
+- **workflow_stats**: Aggregate statistics
+  - `peak_concurrent`: Maximum concurrent workflows
+  - `total_completed`: Total succeeded workflows
+  - `total_failed`: Total failed workflows
+  - `avg_concurrent`: Average concurrent workflows
+- **rabbitmq_stats**: RabbitMQ statistics (if available)
+  - `peak_queue_depth`: Maximum queue depth
+  - `avg_queue_depth`: Average queue depth
+
+## Usage
+
+```bash
+# Capture metrics during workflow burst
+python tools/benchmark_baseline.py --duration 1800 --interval 30
+
+# Analyze results
+python tools/analyze_baseline.py reports/baseline/baseline-metrics-*.json
+```
+
+## Notes
+
+- Metrics files timestamped: `baseline-metrics-YYYYMMDD-HHMMSS.json`
+- Reports directory excluded from git (see `.gitignore`)
+- Production metrics stored in monitoring system (Prometheus/Grafana)