Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
4f9d9a2
Fix naming for podlogs
sfc-gh-jmckulka Nov 19, 2025
7327b58
more checks and output
sfc-gh-jmckulka Nov 19, 2025
0c34eb1
remove unnecessary images
sfc-gh-jmckulka Nov 19, 2025
e0f9a1c
use variable in template
sfc-gh-jmckulka Nov 20, 2025
f9a2253
add more logging
sfc-gh-jmckulka Nov 20, 2025
eed9964
Increase timeout to 5m for backup/restore operations
sfc-gh-jmckulka Nov 21, 2025
a531ce8
Increate timeout for image prefetch
sfc-gh-jmckulka Nov 21, 2025
910f94e
Wait for postgres to be ready
sfc-gh-jmckulka Nov 21, 2025
e7906ee
Add Postgres readiness checks before psql execs
sfc-gh-jmckulka Nov 21, 2025
0645472
Add storage provisioning diagnostics for CI failures
sfc-gh-jmckulka Nov 21, 2025
872e8ad
test: reduce disk usage in pgbackrest-restore chainsaw test
sfc-gh-jmckulka Nov 21, 2025
136aa04
test: fix shell syntax error in clone-cluster template
sfc-gh-jmckulka Nov 21, 2025
c6fee50
test: add timeout to PVC deletion wait scripts
sfc-gh-jmckulka Nov 21, 2025
0566e11
test: remove unnecessary image prefetch for chainsaw tests
sfc-gh-jmckulka Nov 21, 2025
c257229
Split e2e-k3d into separate chainsaw and kuttl jobs
sfc-gh-jmckulka Nov 21, 2025
569abc5
Fixup: remove extra debug
sfc-gh-jmckulka Nov 21, 2025
aa19cad
test: add missing image prefetch and increase KUTTL timeout
sfc-gh-jmckulka Nov 21, 2025
6cfdeeb
Remove some debug logging
sfc-gh-jmckulka Nov 24, 2025
e8b0ac4
Revert timeout bump
sfc-gh-jmckulka Nov 24, 2025
a2d12f5
Revert foreground deletion of pvc
sfc-gh-jmckulka Nov 24, 2025
bd01f63
Restore background deletion policy for clone clusters
sfc-gh-jmckulka Nov 24, 2025
51f6ea0
Remove more prefetch images from chainsaw action
sfc-gh-jmckulka Nov 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/k3d/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ inputs:
description: >
Each line is the name of an image to fetch onto all Kubernetes nodes
prefetch-timeout:
default: 90s
default: 3m
required: true
description: >
Amount of time to wait for images to be fetched
Expand Down
81 changes: 58 additions & 23 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ jobs:
path: envtest-existing.coverage.gz
retention-days: 1

e2e-k3d:
e2e-k3d-chainsaw:
runs-on: ubuntu-24.04
needs: [go-test]
strategy:
Expand All @@ -99,18 +99,64 @@ jobs:
k3s-channel: "${{ matrix.kubernetes }}"
prefetch-images: |
registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2542
registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2542
registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2542
registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.6-2542

- name: Get pgMonitor files.
run: make get-pgmonitor
env:
PGMONITOR_DIR: "${{ github.workspace }}/hack/tools/pgmonitor"
QUERIES_CONFIG_DIR: "${{ github.workspace }}/hack/tools/queries"

# Start a Docker container with the working directory mounted.
- run: make build BUILDAH=docker
- name: Start PGO
run: |
kubectl apply --server-side -k ./config/namespace
kubectl apply --server-side -k ./config/dev
hack/create-kubeconfig.sh postgres-operator pgo
docker run --detach --network host --read-only \
--volume "$(pwd):/mnt" --workdir '/mnt' \
--env 'QUERIES_CONFIG_DIR=/mnt/hack/tools/queries' \
--env 'KUBECONFIG=hack/.kube/postgres-operator/pgo' \
--env 'RELATED_IMAGE_PGBACKREST=registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2542' \
--env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.6-2542' \
--env 'PGO_FEATURE_GATES=TablespaceVolumes=true,OpenTelemetryLogs=true,OpenTelemetryMetrics=true' \
--name 'postgres-operator' localhost/postgres-operator

- run: |
make check-chainsaw && exit
failed=$?
echo '::group::PGO logs'; docker logs 'postgres-operator'; echo '::endgroup::'
exit $failed

- name: Stop PGO
run: docker stop 'postgres-operator' || true

e2e-k3d-kuttl:
runs-on: ubuntu-24.04
needs: [go-test]
strategy:
fail-fast: false
matrix:
kubernetes: [v1.30, v1.33]
steps:
- uses: actions/checkout@v5
- uses: actions/setup-go@v6
with: { go-version: stable }

- name: Start k3s
uses: ./.github/actions/k3d
with:
k3s-channel: "${{ matrix.kubernetes }}"
prefetch-timeout: 5m
prefetch-images: |
registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2542
registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.6-2542
registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.10-2542
registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.10-3.3-2542
registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.10-3.4-2542
registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2542
registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2542
registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-18.0-2542
registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.6-3.4-2542
registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.6-3.5-2542
registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.6-3.6-2542
registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-18.0-2542
registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-18.0-3.6-2542
registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.8-2542

- name: Get pgMonitor files.
run: make get-pgmonitor
Expand All @@ -134,25 +180,12 @@ jobs:
--env 'RELATED_IMAGE_PGEXPORTER=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.17.1-2542' \
--env 'RELATED_IMAGE_PGUPGRADE=registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-18.0-2542' \
--env 'RELATED_IMAGE_POSTGRES_16=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.10-2542' \
--env 'RELATED_IMAGE_POSTGRES_16_GIS_3.3=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.10-3.3-2542' \
--env 'RELATED_IMAGE_POSTGRES_16_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.10-3.4-2542' \
--env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.6-2542' \
--env 'RELATED_IMAGE_POSTGRES_17_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.6-3.4-2542' \
--env 'RELATED_IMAGE_POSTGRES_17_GIS_3.5=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.6-3.5-2542' \
--env 'RELATED_IMAGE_POSTGRES_17_GIS_3.6=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.6-3.6-2542' \
--env 'RELATED_IMAGE_POSTGRES_18=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-18.0-2542' \
--env 'RELATED_IMAGE_POSTGRES_18_GIS_3.6=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-18.0-3.6-2542' \
--env 'RELATED_IMAGE_STANDALONE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.8-2542' \
--env 'RELATED_IMAGE_COLLECTOR=registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.4-0' \
--env 'PGO_FEATURE_GATES=TablespaceVolumes=true,OpenTelemetryLogs=true,OpenTelemetryMetrics=true' \
--name 'postgres-operator' localhost/postgres-operator

- run: |
make check-chainsaw && exit
failed=$?
echo '::group::PGO logs'; docker logs 'postgres-operator'; echo '::endgroup::'
exit $failed

- run: make generate-kuttl
env:
KUTTL_PG_UPGRADE_FROM_VERSION: '16'
Expand All @@ -175,6 +208,8 @@ jobs:
needs:
- kubernetes-api
- kubernetes-k3d
- e2e-k3d-chainsaw
- e2e-k3d-kuttl
steps:
- uses: actions/checkout@v5
- uses: actions/setup-go@v6
Expand Down
2 changes: 1 addition & 1 deletion testing/chainsaw/e2e/pgbackrest-restore/chainsaw-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ spec:
connect: { name: PGCONNECT_TIMEOUT, value: '5' }

- name: volume
value: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } }
value: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 256Mi } } }

- name: postgrescluster
value:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,21 @@ spec:
postgres-operator.crunchydata.com/cluster=original,
postgres-operator.crunchydata.com/role=master

-
description: >
Wait for PostgreSQL to be ready
script:
skipCommandOutput: true
timeout: 2m
env:
- name: PRIMARY
value: ($primary)
content: |
until kubectl exec --namespace "${NAMESPACE}" "${PRIMARY}" \
-- psql -qAt --command 'SELECT 1' 2>/dev/null; do
sleep 1
done

-
description: >
Read the timestamp at which PostgreSQL last started
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ spec:
description: >
Wait for the cluster to come online
assert:
timeout: 5m
resource:
apiVersion: ($postgrescluster.apiVersion)
kind: PostgresCluster
Expand All @@ -51,16 +52,40 @@ spec:
updatedReplicas: 1

catch:
- description: Describe the PostgresCluster to see its status
describe:
apiVersion: ($postgrescluster.apiVersion)
kind: PostgresCluster
name: ($name)

- description: Get all pods in the namespace to see what's running
describe:
apiVersion: v1
kind: Pod
selector: (join('', ['postgres-operator.crunchydata.com/cluster=', $name]))

- description: Get all jobs to check restore status
describe:
apiVersion: batch/v1
kind: Job
selector: (join('', ['postgres-operator.crunchydata.com/cluster=', $name]))

- description: Get events related to the cluster
script:
content: kubectl get events --field-selector (join('', ['involvedObject.name=', $name])) -o wide

- description: Read all log lines from job pods
podLogs:
selector: >
batch.kubernetes.io/job-name,
postgres-operator.crunchydata.com/cluster in (clone-one)
selector: (join('', ['batch.kubernetes.io/job-name,', 'postgres-operator.crunchydata.com/cluster=', $name]))
tail: -1

- description: Read all log lines from postgres pods
podLogs:
selector: >
postgres-operator.crunchydata.com/instance,
postgres-operator.crunchydata.com/cluster in (clone-one)
selector: (join('', ['postgres-operator.crunchydata.com/instance,', 'postgres-operator.crunchydata.com/cluster=', $name]))
tail: -1

- description: Check PVCs for the cluster
describe:
apiVersion: v1
kind: PersistentVolumeClaim
selector: (join('', ['postgres-operator.crunchydata.com/cluster=', $name]))
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ spec:
description: >
Wait for the backup to complete
assert:
timeout: 5m
resource:
apiVersion: batch/v1
kind: Job
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,28 @@ spec:
- name: repo1
volume:
volumeClaimSpec: ($volume)
-
description: >
Wait for the cluster to come online
assert:
resource:
apiVersion: v1
kind: Pod
metadata:
labels:
postgres-operator.crunchydata.com/cluster: original
postgres-operator.crunchydata.com/data: postgres
status:
phase: Running
(containerStatuses[?name == 'database']):
- name: database
ready: true

-
description: >
Wait for the replica backup to complete
assert:
timeout: 5m
resource:
apiVersion: ($postgrescluster.apiVersion)
kind: PostgresCluster
Expand All @@ -64,6 +81,12 @@ spec:
postgres-operator.crunchydata.com/role=master'
)

# Wait for PostgreSQL to be ready
until kubectl exec --namespace "${NAMESPACE}" "${PRIMARY}" \
-- psql -qAt --command 'SELECT 1' 2>/dev/null; do
sleep 1
done

kubectl exec --stdin --namespace "${NAMESPACE}" "${PRIMARY}" -- psql -q --file=- <<'SQL'
CREATE TABLESPACE barn LOCATION '/tablespaces/barn/data';
GRANT ALL ON TABLESPACE barn TO public;
Expand All @@ -73,3 +96,7 @@ spec:
- podLogs:
selector: postgres-operator.crunchydata.com/cluster in (original)
tail: 50
- describe:
apiVersion: v1
kind: Pod
selector: postgres-operator.crunchydata.com/cluster in (original)
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ spec:
- name: PRIMARY
value: ($primary)
content: |
# Wait for PostgreSQL to be ready
until kubectl exec --namespace "${NAMESPACE}" "${PRIMARY}" \
-- psql -qAt --command 'SELECT 1' 2>/dev/null; do
sleep 1
done

OBJECTIVE=$(
kubectl exec --namespace "${NAMESPACE}" "${PRIMARY}" \
-- psql -qAt --command 'SELECT clock_timestamp()'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ spec:
description: >
Wait for the restore to complete and the cluster to come online
assert:
timeout: 5m
resource:
apiVersion: ($postgrescluster.apiVersion)
kind: PostgresCluster
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ spec:
postgres-operator.crunchydata.com/role=master'
)

# Wait for PostgreSQL to be ready
until kubectl exec --namespace "${NAMESPACE}" "${PRIMARY}" \
-- psql -qAt --command 'SELECT 1' 2>/dev/null; do
sleep 1
done

kubectl exec --namespace "${NAMESPACE}" "${PRIMARY}" \
-- psql --command 'SELECT pg_switch_wal()' --pset footer=off

Expand Down
Loading