77 # Service account with S3 and STAC API permissions
88 serviceAccountName : operate-workflow-sa
99 entrypoint : main
10+ # Disable log archival - logs visible directly in UI without S3 archival delay
11+ archiveLogs : false
1012 # Clean up completed workflows after 24 hours
1113 ttlStrategy :
1214 secondsAfterCompletion : 86400 # 24 hours
@@ -59,116 +61,88 @@ spec:
5961
6062 - name : show-parameters
6163 activeDeadlineSeconds : 60
62- script :
64+ container :
6365 image : ghcr.io/eopf-explorer/data-pipeline:{{workflow.parameters.pipeline_image_version}}
6466 imagePullPolicy : Always
65- command : [bash]
66- source : |
67- cat <<'EOF'
68- ╔════════════════════════════════════════════════════════════════════════════╗
69- ║ GEOZARR PIPELINE EXECUTION ║
70- ╚════════════════════════════════════════════════════════════════════════════╝
71-
72- 📋 WORKFLOW PARAMETERS:
73- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
74-
75- 🎯 ITEM DETAILS:
76- • Item ID: {{workflow.parameters.item_id}}
77- • Source URL: {{workflow.parameters.source_url}}
78- • Collection: {{workflow.parameters.register_collection}}
79-
80- 🌐 API ENDPOINTS:
81- • STAC API: {{workflow.parameters.stac_api_url}}
82- • Raster API: {{workflow.parameters.raster_api_url}}
83-
84- ☁️ S3 CONFIGURATION:
85- • Endpoint: {{workflow.parameters.s3_endpoint}}
86- • Bucket: {{workflow.parameters.s3_output_bucket}}
87- • Prefix: {{workflow.parameters.s3_output_prefix}}
88-
89- 🐳 IMAGE VERSION:
90- • Pipeline: {{workflow.parameters.pipeline_image_version}}
91-
92- 📦 OUTPUT PATH:
93- s3://{{workflow.parameters.s3_output_bucket}}/{{workflow.parameters.s3_output_prefix}}/{{workflow.parameters.register_collection}}/{{workflow.parameters.item_id}}.zarr
94-
95- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
96- ⏱️ Started: $(date -u +"%Y-%m-%d %H:%M:%S UTC")
97- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
98- EOF
99- env :
100- - name : PYTHONUNBUFFERED
101- value : " 1"
67+ command : ["/bin/sh"]
68+ args :
69+ - -c
70+ - |
71+ echo "=== Workflow Parameters ==="
72+ echo "{{workflow.parameters}}"
10273
10374 - name : convert-geozarr
10475 activeDeadlineSeconds : 3600 # 1 hour timeout
105- script :
76+ container :
10677 image : ghcr.io/eopf-explorer/data-pipeline:{{workflow.parameters.pipeline_image_version}}
10778 imagePullPolicy : Always
108- command : [bash]
109- source : |
110- set -euo pipefail
79+ command : [bash, -c]
80+ args :
81+ - |
82+ set -euo pipefail
11183
112- echo "════════════════════════════════════════════════════════════════════════════"
113- echo " STEP 1/4: GEOZARR CONVERSION"
114- echo "════════════════════════════════════════════════════════════════════════════"
115- echo ""
84+ echo "════════════════════════════════════════════════════════════════════════════"
85+ echo " STEP 1/4: GEOZARR CONVERSION"
86+ echo "════════════════════════════════════════════════════════════════════════════"
87+ echo ""
11688
117- SOURCE_URL="{{workflow.parameters.source_url}}"
118- COLLECTION="{{workflow.parameters.register_collection}}"
119- OUTPUT_PATH="s3://{{workflow.parameters.s3_output_bucket}}/{{workflow.parameters.s3_output_prefix}}/$COLLECTION/{{workflow.parameters.item_id}}.zarr"
89+ SOURCE_URL="{{workflow.parameters.source_url}}"
90+ COLLECTION="{{workflow.parameters.register_collection}}"
91+ OUTPUT_PATH="s3://{{workflow.parameters.s3_output_bucket}}/{{workflow.parameters.s3_output_prefix}}/$COLLECTION/{{workflow.parameters.item_id}}.zarr"
12092
121- echo "🔍 [1/6] Resolving source..."
122- # Check if source is STAC item or direct zarr
123- if [[ "$SOURCE_URL" == *"/items/"* ]]; then
124- echo "📡 Extracting Zarr URL from STAC item..."
125- ZARR_URL=$(python3 /app/scripts/get_zarr_url.py "$SOURCE_URL")
126- echo "✅ Zarr URL: $ZARR_URL"
127- else
128- ZARR_URL="$SOURCE_URL"
129- echo "✅ Direct Zarr URL: $ZARR_URL"
130- fi
131- echo ""
93+ echo "🔍 [1/6] Resolving source..."
94+ # Check if source is STAC item or direct zarr
95+ if [[ "$SOURCE_URL" == *"/items/"* ]]; then
96+ echo "📡 Extracting Zarr URL from STAC item..."
97+ ZARR_URL=$(python3 /app/scripts/get_zarr_url.py "$SOURCE_URL")
98+ echo "✅ Zarr URL: $ZARR_URL"
99+ else
100+ ZARR_URL="$SOURCE_URL"
101+ echo "✅ Direct Zarr URL: $ZARR_URL"
102+ fi
103+ echo ""
132104
133- echo "� [2/6] Getting conversion parameters for $COLLECTION..."
134- eval $(python3 /app/scripts/get_conversion_params.py --collection "$COLLECTION")
135- echo " Groups: $ZARR_GROUPS"
136- echo " Chunk: $CHUNK"
137- echo " Tile width: $TILE_WIDTH"
138- echo " Extra flags: $EXTRA_FLAGS"
139- echo ""
105+ echo "� [2/6] Getting conversion parameters for $COLLECTION..."
106+ eval $(python3 /app/scripts/get_conversion_params.py --collection "$COLLECTION")
107+ echo " Groups: $ZARR_GROUPS"
108+ echo " Chunk: $CHUNK"
109+ echo " Tile width: $TILE_WIDTH"
110+ echo " Extra flags: $EXTRA_FLAGS"
111+ echo ""
140112
141- echo "🧹 [3/6] Cleaning up existing output..."
142- if [ -f /app/scripts/cleanup_s3_path.py ]; then
143- python3 /app/scripts/cleanup_s3_path.py "$OUTPUT_PATH" || echo "⚠️ Cleanup failed (may not exist yet)"
144- else
145- echo "ℹ️ Skipping cleanup (script not available)"
146- fi
147- echo ""
113+ echo "🧹 [3/6] Cleaning up existing output..."
114+ if [ -f /app/scripts/cleanup_s3_path.py ]; then
115+ python3 /app/scripts/cleanup_s3_path.py "$OUTPUT_PATH" || echo "⚠️ Cleanup failed (may not exist yet)"
116+ else
117+ echo "ℹ️ Skipping cleanup (script not available)"
118+ fi
119+ echo ""
148120
149- echo "🚀 [4/6] Starting GeoZarr conversion..."
150- echo " Source: $ZARR_URL"
151- echo " Destination: $OUTPUT_PATH"
152- echo " Collection: $COLLECTION"
153- echo ""
154- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
155- echo " CONVERSION LOGS (this may take 10-30 minutes for large datasets )"
156- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
157- echo ""
121+ echo "🚀 [4/6] Starting GeoZarr conversion..."
122+ echo " Source: $ZARR_URL"
123+ echo " Destination: $OUTPUT_PATH"
124+ echo " Collection: $COLLECTION"
125+ echo ""
126+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
127+ echo " CONVERSION LOGS (parallel processing with local Dask cluster )"
128+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
129+ echo ""
158130
159- # Build conversion command with Dask for parallel processing
160- eopf-geozarr convert "$ZARR_URL" "$OUTPUT_PATH" \
161- --groups "$ZARR_GROUPS" \
162- $EXTRA_FLAGS \
163- --spatial-chunk $CHUNK \
164- --tile-width $TILE_WIDTH \
165- --dask-cluster \
166- --verbose
131+ # Build conversion command with parallel processing
132+ # - Enable local Dask cluster for parallel chunk processing
133+ # - Higher CPU/memory resources support multiple Dask workers
134+ eopf-geozarr convert "$ZARR_URL" "$OUTPUT_PATH" \
135+ --groups "$ZARR_GROUPS" \
136+ $EXTRA_FLAGS \
137+ --spatial-chunk $CHUNK \
138+ --tile-width $TILE_WIDTH \
139+ --dask-cluster \
140+ --verbose
167141
168- echo ""
169- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
170- echo "✅ [6/6] Conversion completed successfully!"
171- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
142+ echo ""
143+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
144+ echo "✅ [6/6] Conversion completed successfully!"
145+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
172146 env :
173147 - name : PYTHONUNBUFFERED
174148 value : " 1"
@@ -186,10 +160,10 @@ spec:
186160 value : " {{workflow.parameters.s3_endpoint}}"
187161 resources :
188162 requests :
189- memory : " 8Gi "
190- cpu : " 1 "
163+ memory : " 6Gi "
164+ cpu : " 2 "
191165 limits :
192- memory : " 16Gi "
166+ memory : " 10Gi "
193167 cpu : " 4"
194168
195169 - name : validate
0 commit comments