Skip to content

Commit cbab8cc

Browse files
committed
perf(workflow): optimize template for better observability
- Set archiveLogs: false for immediate log visibility via kubectl - Change convert-geozarr from script to container template for stdout logs - Reduce memory request to 6Gi (limit 10Gi) for better cluster scheduling - Add Dask parallel processing info in comments - Simplify show-parameters to basic output Fixes 30-60s log delay in Argo UI. Logs now visible via kubectl immediately.
1 parent f1eae63 commit cbab8cc

File tree

1 file changed

+72
-98
lines changed

1 file changed

+72
-98
lines changed

workflows/template.yaml

Lines changed: 72 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ spec:
77
# Service account with S3 and STAC API permissions
88
serviceAccountName: operate-workflow-sa
99
entrypoint: main
10+
# Disable log archival - logs visible directly in UI without S3 archival delay
11+
archiveLogs: false
1012
# Clean up completed workflows after 24 hours
1113
ttlStrategy:
1214
secondsAfterCompletion: 86400 # 24 hours
@@ -59,116 +61,88 @@ spec:
5961

6062
- name: show-parameters
6163
activeDeadlineSeconds: 60
62-
script:
64+
container:
6365
image: ghcr.io/eopf-explorer/data-pipeline:{{workflow.parameters.pipeline_image_version}}
6466
imagePullPolicy: Always
65-
command: [bash]
66-
source: |
67-
cat <<'EOF'
68-
╔════════════════════════════════════════════════════════════════════════════╗
69-
║ GEOZARR PIPELINE EXECUTION ║
70-
╚════════════════════════════════════════════════════════════════════════════╝
71-
72-
📋 WORKFLOW PARAMETERS:
73-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
74-
75-
🎯 ITEM DETAILS:
76-
• Item ID: {{workflow.parameters.item_id}}
77-
• Source URL: {{workflow.parameters.source_url}}
78-
• Collection: {{workflow.parameters.register_collection}}
79-
80-
🌐 API ENDPOINTS:
81-
• STAC API: {{workflow.parameters.stac_api_url}}
82-
• Raster API: {{workflow.parameters.raster_api_url}}
83-
84-
☁️ S3 CONFIGURATION:
85-
• Endpoint: {{workflow.parameters.s3_endpoint}}
86-
• Bucket: {{workflow.parameters.s3_output_bucket}}
87-
• Prefix: {{workflow.parameters.s3_output_prefix}}
88-
89-
🐳 IMAGE VERSION:
90-
• Pipeline: {{workflow.parameters.pipeline_image_version}}
91-
92-
📦 OUTPUT PATH:
93-
s3://{{workflow.parameters.s3_output_bucket}}/{{workflow.parameters.s3_output_prefix}}/{{workflow.parameters.register_collection}}/{{workflow.parameters.item_id}}.zarr
94-
95-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
96-
⏱️ Started: $(date -u +"%Y-%m-%d %H:%M:%S UTC")
97-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
98-
EOF
99-
env:
100-
- name: PYTHONUNBUFFERED
101-
value: "1"
67+
command: ["/bin/sh"]
68+
args:
69+
- -c
70+
- |
71+
echo "=== Workflow Parameters ==="
72+
echo "{{workflow.parameters}}"
10273
10374
- name: convert-geozarr
10475
activeDeadlineSeconds: 3600 # 1 hour timeout
105-
script:
76+
container:
10677
image: ghcr.io/eopf-explorer/data-pipeline:{{workflow.parameters.pipeline_image_version}}
10778
imagePullPolicy: Always
108-
command: [bash]
109-
source: |
110-
set -euo pipefail
79+
command: [bash, -c]
80+
args:
81+
- |
82+
set -euo pipefail
11183
112-
echo "════════════════════════════════════════════════════════════════════════════"
113-
echo " STEP 1/4: GEOZARR CONVERSION"
114-
echo "════════════════════════════════════════════════════════════════════════════"
115-
echo ""
84+
echo "════════════════════════════════════════════════════════════════════════════"
85+
echo " STEP 1/4: GEOZARR CONVERSION"
86+
echo "════════════════════════════════════════════════════════════════════════════"
87+
echo ""
11688
117-
SOURCE_URL="{{workflow.parameters.source_url}}"
118-
COLLECTION="{{workflow.parameters.register_collection}}"
119-
OUTPUT_PATH="s3://{{workflow.parameters.s3_output_bucket}}/{{workflow.parameters.s3_output_prefix}}/$COLLECTION/{{workflow.parameters.item_id}}.zarr"
89+
SOURCE_URL="{{workflow.parameters.source_url}}"
90+
COLLECTION="{{workflow.parameters.register_collection}}"
91+
OUTPUT_PATH="s3://{{workflow.parameters.s3_output_bucket}}/{{workflow.parameters.s3_output_prefix}}/$COLLECTION/{{workflow.parameters.item_id}}.zarr"
12092
121-
echo "🔍 [1/6] Resolving source..."
122-
# Check if source is STAC item or direct zarr
123-
if [[ "$SOURCE_URL" == *"/items/"* ]]; then
124-
echo "📡 Extracting Zarr URL from STAC item..."
125-
ZARR_URL=$(python3 /app/scripts/get_zarr_url.py "$SOURCE_URL")
126-
echo "✅ Zarr URL: $ZARR_URL"
127-
else
128-
ZARR_URL="$SOURCE_URL"
129-
echo "✅ Direct Zarr URL: $ZARR_URL"
130-
fi
131-
echo ""
93+
echo "🔍 [1/6] Resolving source..."
94+
# Check if source is STAC item or direct zarr
95+
if [[ "$SOURCE_URL" == *"/items/"* ]]; then
96+
echo "📡 Extracting Zarr URL from STAC item..."
97+
ZARR_URL=$(python3 /app/scripts/get_zarr_url.py "$SOURCE_URL")
98+
echo "✅ Zarr URL: $ZARR_URL"
99+
else
100+
ZARR_URL="$SOURCE_URL"
101+
echo "✅ Direct Zarr URL: $ZARR_URL"
102+
fi
103+
echo ""
132104
133-
echo "� [2/6] Getting conversion parameters for $COLLECTION..."
134-
eval $(python3 /app/scripts/get_conversion_params.py --collection "$COLLECTION")
135-
echo " Groups: $ZARR_GROUPS"
136-
echo " Chunk: $CHUNK"
137-
echo " Tile width: $TILE_WIDTH"
138-
echo " Extra flags: $EXTRA_FLAGS"
139-
echo ""
105+
echo "� [2/6] Getting conversion parameters for $COLLECTION..."
106+
eval $(python3 /app/scripts/get_conversion_params.py --collection "$COLLECTION")
107+
echo " Groups: $ZARR_GROUPS"
108+
echo " Chunk: $CHUNK"
109+
echo " Tile width: $TILE_WIDTH"
110+
echo " Extra flags: $EXTRA_FLAGS"
111+
echo ""
140112
141-
echo "🧹 [3/6] Cleaning up existing output..."
142-
if [ -f /app/scripts/cleanup_s3_path.py ]; then
143-
python3 /app/scripts/cleanup_s3_path.py "$OUTPUT_PATH" || echo "⚠️ Cleanup failed (may not exist yet)"
144-
else
145-
echo "ℹ️ Skipping cleanup (script not available)"
146-
fi
147-
echo ""
113+
echo "🧹 [3/6] Cleaning up existing output..."
114+
if [ -f /app/scripts/cleanup_s3_path.py ]; then
115+
python3 /app/scripts/cleanup_s3_path.py "$OUTPUT_PATH" || echo "⚠️ Cleanup failed (may not exist yet)"
116+
else
117+
echo "ℹ️ Skipping cleanup (script not available)"
118+
fi
119+
echo ""
148120
149-
echo "🚀 [4/6] Starting GeoZarr conversion..."
150-
echo " Source: $ZARR_URL"
151-
echo " Destination: $OUTPUT_PATH"
152-
echo " Collection: $COLLECTION"
153-
echo ""
154-
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
155-
echo " CONVERSION LOGS (this may take 10-30 minutes for large datasets)"
156-
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
157-
echo ""
121+
echo "🚀 [4/6] Starting GeoZarr conversion..."
122+
echo " Source: $ZARR_URL"
123+
echo " Destination: $OUTPUT_PATH"
124+
echo " Collection: $COLLECTION"
125+
echo ""
126+
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
127+
echo " CONVERSION LOGS (parallel processing with local Dask cluster)"
128+
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
129+
echo ""
158130
159-
# Build conversion command with Dask for parallel processing
160-
eopf-geozarr convert "$ZARR_URL" "$OUTPUT_PATH" \
161-
--groups "$ZARR_GROUPS" \
162-
$EXTRA_FLAGS \
163-
--spatial-chunk $CHUNK \
164-
--tile-width $TILE_WIDTH \
165-
--dask-cluster \
166-
--verbose
131+
# Build conversion command with parallel processing
132+
# - Enable local Dask cluster for parallel chunk processing
133+
# - Higher CPU/memory resources support multiple Dask workers
134+
eopf-geozarr convert "$ZARR_URL" "$OUTPUT_PATH" \
135+
--groups "$ZARR_GROUPS" \
136+
$EXTRA_FLAGS \
137+
--spatial-chunk $CHUNK \
138+
--tile-width $TILE_WIDTH \
139+
--dask-cluster \
140+
--verbose
167141
168-
echo ""
169-
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
170-
echo "✅ [6/6] Conversion completed successfully!"
171-
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
142+
echo ""
143+
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
144+
echo "✅ [6/6] Conversion completed successfully!"
145+
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
172146
env:
173147
- name: PYTHONUNBUFFERED
174148
value: "1"
@@ -186,10 +160,10 @@ spec:
186160
value: "{{workflow.parameters.s3_endpoint}}"
187161
resources:
188162
requests:
189-
memory: "8Gi"
190-
cpu: "1"
163+
memory: "6Gi"
164+
cpu: "2"
191165
limits:
192-
memory: "16Gi"
166+
memory: "10Gi"
193167
cpu: "4"
194168

195169
- name: validate

0 commit comments

Comments
 (0)