From 4c834444dcd531964106d508e600462ee08dc134 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Mon, 29 Sep 2025 11:46:47 -0500 Subject: [PATCH 1/3] Check environment before starting Postgres major upgrade This leaves the disk untouched when the upgrade image cannot support the requested upgrade. Issue: PGO-300 See: 406e069c2e038befbcc122a912e692712839b22c --- internal/controller/pgupgrade/jobs.go | 8 +++++++- internal/controller/pgupgrade/jobs_test.go | 8 ++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/internal/controller/pgupgrade/jobs.go b/internal/controller/pgupgrade/jobs.go index 4715c8da93..be39b4d591 100644 --- a/internal/controller/pgupgrade/jobs.go +++ b/internal/controller/pgupgrade/jobs.go @@ -49,7 +49,7 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s newVersion := spec.ToPostgresVersion // if the fetch key command is set for TDE, provide the value during initialization - initdb := `/usr/pgsql-"${new_version}"/bin/initdb -k -D /pgdata/pg"${new_version}"` + initdb := `/usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}"` if fetchKeyCommand != "" { initdb += ` --encryption-key-command "` + fetchKeyCommand + `"` } @@ -80,6 +80,12 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s // Enable nss_wrapper so the current UID and GID resolve to "postgres". // - https://cwrap.org/nss_wrapper.html `export LD_PRELOAD='libnss_wrapper.so' NSS_WRAPPER_GROUP NSS_WRAPPER_PASSWD`, + `id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]]`, + + // Expect Postgres executables at the Red Hat paths. + `[[ -x /usr/pgsql-"${old_version}"/bin/postgres ]]`, + `[[ -x /usr/pgsql-"${new_version}"/bin/initdb ]]`, + `[[ -d /pgdata/pg"${old_version}" ]]`, // Below is the pg_upgrade script used to upgrade a PostgresCluster from // one major version to another. Additional information concerning the diff --git a/internal/controller/pgupgrade/jobs_test.go b/internal/controller/pgupgrade/jobs_test.go index a94641d4c6..5b1f6bc4f8 100644 --- a/internal/controller/pgupgrade/jobs_test.go +++ b/internal/controller/pgupgrade/jobs_test.go @@ -208,11 +208,15 @@ spec: (sed "/^postgres:x:/ d; /^[^:]*:x:${uid}:/ d" /etc/passwd echo "postgres:x:${uid}:${gid%% *}::${data_volume}:") > "${NSS_WRAPPER_PASSWD}" export LD_PRELOAD='libnss_wrapper.so' NSS_WRAPPER_GROUP NSS_WRAPPER_PASSWD + id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]] + [[ -x /usr/pgsql-"${old_version}"/bin/postgres ]] + [[ -x /usr/pgsql-"${new_version}"/bin/initdb ]] + [[ -d /pgdata/pg"${old_version}" ]] cd /pgdata || exit echo -e "Step 1: Making new pgdata directory...\n" mkdir /pgdata/pg"${new_version}" echo -e "Step 2: Initializing new pgdata directory...\n" - /usr/pgsql-"${new_version}"/bin/initdb -k -D /pgdata/pg"${new_version}" + /usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}" echo -e "\nStep 3: Setting the expected permissions on the old pgdata directory...\n" chmod 750 /pgdata/pg"${old_version}" echo -e "Step 4: Copying shared_preload_libraries setting to new postgresql.conf file...\n" @@ -263,7 +267,7 @@ status: {} tdeJob := reconciler.generateUpgradeJob(ctx, upgrade, startup, "echo testKey") assert.Assert(t, cmp.MarshalContains(tdeJob, - `/usr/pgsql-"${new_version}"/bin/initdb -k -D /pgdata/pg"${new_version}" --encryption-key-command "echo testKey"`)) + `/usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}" --encryption-key-command "echo testKey"`)) } func TestGenerateRemoveDataJob(t *testing.T) { From 1a321404a9f7d022e497006b7dd86f27e43728e3 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Fri, 7 Mar 2025 12:26:09 -0600 Subject: [PATCH 2/3] Simplify upgrade scripts with more variables I find this Go code and resulting Bash easier to read. This also logs more about what is happening without changing the sequence of commands. The script included an unnecessary `|| exit`, so I moved the `set -eu` out of the Bash invocation into the script itself to make that behavior more obvious. --- internal/controller/pgupgrade/jobs.go | 149 ++++++++++----------- internal/controller/pgupgrade/jobs_test.go | 79 +++++------ 2 files changed, 115 insertions(+), 113 deletions(-) diff --git a/internal/controller/pgupgrade/jobs.go b/internal/controller/pgupgrade/jobs.go index be39b4d591..ac71e4a0a6 100644 --- a/internal/controller/pgupgrade/jobs.go +++ b/internal/controller/pgupgrade/jobs.go @@ -21,11 +21,10 @@ import ( "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" + "github.com/crunchydata/postgres-operator/internal/shell" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) -// Upgrade job - // pgUpgradeJob returns the ObjectMeta for the pg_upgrade Job utilized to // upgrade from one major PostgreSQL version to another func pgUpgradeJob(upgrade *v1beta1.PGUpgrade) metav1.ObjectMeta { @@ -48,20 +47,24 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s oldVersion := spec.FromPostgresVersion newVersion := spec.ToPostgresVersion - // if the fetch key command is set for TDE, provide the value during initialization - initdb := `/usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}"` + var argEncryptionKeyCommand string if fetchKeyCommand != "" { - initdb += ` --encryption-key-command "` + fetchKeyCommand + `"` + argEncryptionKeyCommand = ` --encryption-key-command=` + shell.QuoteWord(fetchKeyCommand) } args := []string{fmt.Sprint(oldVersion), fmt.Sprint(newVersion)} script := strings.Join([]string{ + // Exit immediately when a pipeline or subshell exits non-zero or when expanding an unset variable. + `shopt -so errexit nounset`, + `declare -r data_volume='/pgdata' old_version="$1" new_version="$2"`, - `printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n\n' "$@"`, + `printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n' "$@"`, + `section() { printf '\n\n%s\n' "$@"; }`, - // Note: Rather than import the nss_wrapper init container, as we do in - // the main postgres-operator, this job does the required nss_wrapper + // NOTE: Rather than import the nss_wrapper init container, as we do in + // the PostgresCluster controller, this job does the required nss_wrapper // settings here. + `section 'Step 1 of 7: Ensuring username is postgres...'`, // Create a copy of the system group definitions, but remove the "postgres" // group or any group with the current GID. Replace them with our own that @@ -82,57 +85,54 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s `export LD_PRELOAD='libnss_wrapper.so' NSS_WRAPPER_GROUP NSS_WRAPPER_PASSWD`, `id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]]`, + `section 'Step 2 of 7: Finding data and tools...'`, + // Expect Postgres executables at the Red Hat paths. - `[[ -x /usr/pgsql-"${old_version}"/bin/postgres ]]`, - `[[ -x /usr/pgsql-"${new_version}"/bin/initdb ]]`, - `[[ -d /pgdata/pg"${old_version}" ]]`, + `old_bin="/usr/pgsql-${old_version}/bin" && [[ -x "${old_bin}/postgres" ]]`, + `new_bin="/usr/pgsql-${new_version}/bin" && [[ -x "${new_bin}/initdb" ]]`, + `old_data="${data_volume}/pg${old_version}" && [[ -d "${old_data}" ]]`, + `new_data="${data_volume}/pg${new_version}"`, + + // pg_upgrade writes its files in "${new_data}/pg_upgrade_output.d" since PostgreSQL v15. + // Change to a writable working directory to be compatible with PostgreSQL v14 and earlier. + // + // https://www.postgresql.org/docs/release/15#id-1.11.6.20.5.11.3 + `cd "${data_volume}"`, // Below is the pg_upgrade script used to upgrade a PostgresCluster from // one major version to another. Additional information concerning the // steps used and command flag specifics can be found in the documentation: // - https://www.postgresql.org/docs/current/pgupgrade.html - // To begin, we first move to the mounted /pgdata directory and create a - // new version directory which is then initialized with the initdb command. - `cd /pgdata || exit`, - `echo -e "Step 1: Making new pgdata directory...\n"`, - `mkdir /pgdata/pg"${new_version}"`, - `echo -e "Step 2: Initializing new pgdata directory...\n"`, - initdb, - - // Before running the upgrade check, which ensures the clusters are compatible, - // proper permissions have to be set on the old pgdata directory and the - // preload library settings must be copied over. - `echo -e "\nStep 3: Setting the expected permissions on the old pgdata directory...\n"`, - `chmod 750 /pgdata/pg"${old_version}"`, - `echo -e "Step 4: Copying shared_preload_libraries setting to new postgresql.conf file...\n"`, - `echo "shared_preload_libraries = '$(/usr/pgsql-"""${old_version}"""/bin/postgres -D \`, - `/pgdata/pg"""${old_version}""" -C shared_preload_libraries)'" >> /pgdata/pg"${new_version}"/postgresql.conf`, - - // Before the actual upgrade is run, we will run the upgrade --check to - // verify everything before actually changing any data. - `echo -e "Step 5: Running pg_upgrade check...\n"`, - `time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \`, - `--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}"\`, - ` --new-datadir /pgdata/pg"${new_version}" --check` + argMethod + argJobs, - - // Assuming the check completes successfully, the pg_upgrade command will - // be run that actually prepares the upgraded pgdata directory. - `echo -e "\nStep 6: Running pg_upgrade...\n"`, - `time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \`, - `--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}" \`, - `--new-datadir /pgdata/pg"${new_version}"` + argMethod + argJobs, - - // Since we have cleared the Patroni cluster step by removing the EndPoints, we copy patroni.dynamic.json - // from the old data dir to help retain PostgreSQL parameters you had set before. - // - https://patroni.readthedocs.io/en/latest/existing_data.html#major-upgrade-of-postgresql-version - `echo -e "\nStep 7: Copying patroni.dynamic.json...\n"`, - `cp /pgdata/pg"${old_version}"/patroni.dynamic.json /pgdata/pg"${new_version}"`, - - `echo -e "\npg_upgrade Job Complete!"`, + `section 'Step 3 of 7: Initializing new data directory...'`, + `PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums` + argEncryptionKeyCommand, + + // Read the configured value then quote it; every single-quote U+0027 is replaced by two. + // + // https://www.postgresql.org/docs/current/config-setting.html + // https://www.gnu.org/software/bash/manual/bash.html#ANSI_002dC-Quoting + `section 'Step 4 of 7: Copying shared_preload_libraries parameter...'`, + `value=$(LC_ALL=C PGDATA="${old_data}" "${old_bin}/postgres" -C shared_preload_libraries)`, + `echo >> "${new_data}/postgresql.conf" "shared_preload_libraries = '${value//$'\''/$'\'\''}'"`, + + `section 'Step 5 of 7: Checking for potential issues...'`, + `"${new_bin}/pg_upgrade" --check` + argMethod + argJobs + ` \`, + `--old-bindir="${old_bin}" --old-datadir="${old_data}" \`, + `--new-bindir="${new_bin}" --new-datadir="${new_data}"`, + + `section 'Step 6 of 7: Performing upgrade...'`, + `(set -x && time "${new_bin}/pg_upgrade"` + argMethod + argJobs + ` \`, + `--old-bindir="${old_bin}" --old-datadir="${old_data}" \`, + `--new-bindir="${new_bin}" --new-datadir="${new_data}")`, + + // https://patroni.readthedocs.io/en/latest/existing_data.html#major-upgrade-of-postgresql-version + `section 'Step 7 of 7: Copying Patroni settings...'`, + `(set -x && cp "${old_data}/patroni.dynamic.json" "${new_data}")`, + + `section 'Success!'`, }, "\n") - return append([]string{"bash", "-ceu", "--", script, "upgrade"}, args...) + return append([]string{"bash", "-c", "--", script, "upgrade"}, args...) } // largestWholeCPU returns the maximum CPU request or limit as a non-negative @@ -238,38 +238,37 @@ func (r *PGUpgradeReconciler) generateUpgradeJob( // We currently target the `pgdata/pg{old_version}` and `pgdata/pg{old_version}_wal` // directories for removal. func removeDataCommand(upgrade *v1beta1.PGUpgrade) []string { - oldVersion := fmt.Sprint(upgrade.Spec.FromPostgresVersion) + oldVersion := upgrade.Spec.FromPostgresVersion // Before removing the directories (both data and wal), we check that // the directory is not in use by running `pg_controldata` and making sure // the server state is "shut down in recovery" - // TODO(benjaminjb): pg_controldata seems pretty stable, but might want to - // experiment with a few more versions. - args := []string{oldVersion} + args := []string{fmt.Sprint(oldVersion)} script := strings.Join([]string{ - `declare -r old_version="$1"`, - `printf 'Removing PostgreSQL data dir for pg%s...\n\n' "$@"`, - `echo -e "Checking the directory exists and isn't being used...\n"`, - `cd /pgdata || exit`, - // The string `shut down in recovery` is the dbstate that postgres sets from - // at least version 10 to 14 when a replica has been shut down. - // - https://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/bin/pg_controldata/pg_controldata.c;h=f911f98d946d83f1191abf35239d9b4455c5f52a;hb=HEAD#l59 - // Note: `pg_controldata` is actually used by `pg_upgrade` before upgrading - // to make sure that the server in question is shut down as a primary; - // that aligns with our use here, where we're making sure that the server in question - // was shut down as a replica. - // - https://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/bin/pg_upgrade/controldata.c;h=41b8f69b8cbe4f40e6098ad84c2e8e987e24edaf;hb=HEAD#l122 - `if [ "$(/usr/pgsql-"${old_version}"/bin/pg_controldata /pgdata/pg"${old_version}" | grep -c "shut down in recovery")" -ne 1 ]; then echo -e "Directory in use, cannot remove..."; exit 1; fi`, - `echo -e "Removing old pgdata directory...\n"`, - // When deleting the wal directory, use `realpath` to resolve the symlink from - // the pgdata directory. This is necessary because the wal directory can be - // mounted at different places depending on if an external wal PVC is used, - // i.e. `/pgdata/pg14_wal` vs `/pgwal/pg14_wal` - `rm -rf /pgdata/pg"${old_version}" "$(realpath /pgdata/pg${old_version}/pg_wal)"`, - `echo -e "Remove Data Job Complete!"`, + // Exit immediately when a pipeline or subshell exits non-zero or when expanding an unset variable. + `shopt -so errexit nounset`, + + `declare -r data_volume='/pgdata' old_version="$1"`, + `printf 'Removing PostgreSQL %s data...\n\n' "$@"`, + `delete() (set -x && rm -rf -- "$@")`, + + `old_data="${data_volume}/pg${old_version}"`, + `control=$(LC_ALL=C /usr/pgsql-${old_version}/bin/pg_controldata "${old_data}")`, + `read -r state <<< "${control##*cluster state:}"`, + + // We expect exactly one state for a replica that has been stopped. + // + // https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_10_0;f=src/bin/pg_controldata/pg_controldata.c#l55 + // https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_17_0;f=src/bin/pg_controldata/pg_controldata.c#l58 + `[[ "${state}" == 'shut down in recovery' ]] || { printf >&2 'Unexpected state! %q\n' "${state}"; exit 1; }`, + + // "rm" does not follow symbolic links. + // Delete the old data directory after subdirectories that contain versioned data. + `delete "${old_data}/pg_wal/"`, + `delete "${old_data}" && echo 'Success!'`, }, "\n") - return append([]string{"bash", "-ceu", "--", script, "remove"}, args...) + return append([]string{"bash", "-c", "--", script, "remove"}, args...) } // generateRemoveDataJob returns a Job that can remove the data diff --git a/internal/controller/pgupgrade/jobs_test.go b/internal/controller/pgupgrade/jobs_test.go index 5b1f6bc4f8..7ce22be378 100644 --- a/internal/controller/pgupgrade/jobs_test.go +++ b/internal/controller/pgupgrade/jobs_test.go @@ -87,7 +87,7 @@ func TestUpgradeCommand(t *testing.T) { spec := &v1beta1.PGUpgradeSettings{Jobs: tt.Spec} command := upgradeCommand(spec, "") assert.Assert(t, len(command) > 3) - assert.DeepEqual(t, []string{"bash", "-ceu", "--"}, command[:3]) + assert.DeepEqual(t, []string{"bash", "-c", "--"}, command[:3]) script := command[3] assert.Assert(t, cmp.Contains(script, tt.Args)) @@ -111,7 +111,7 @@ func TestUpgradeCommand(t *testing.T) { spec := &v1beta1.PGUpgradeSettings{TransferMethod: tt.Spec} command := upgradeCommand(spec, "") assert.Assert(t, len(command) > 3) - assert.DeepEqual(t, []string{"bash", "-ceu", "--"}, command[:3]) + assert.DeepEqual(t, []string{"bash", "-c", "--"}, command[:3]) script := command[3] assert.Assert(t, cmp.Contains(script, tt.Args)) @@ -196,11 +196,14 @@ spec: containers: - command: - bash - - -ceu + - -c - -- - |- + shopt -so errexit nounset declare -r data_volume='/pgdata' old_version="$1" new_version="$2" - printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n\n' "$@" + printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n' "$@" + section() { printf '\n\n%s\n' "$@"; } + section 'Step 1 of 7: Ensuring username is postgres...' gid=$(id -G); NSS_WRAPPER_GROUP=$(mktemp) (sed "/^postgres:x:/ d; /^[^:]*:x:${gid%% *}:/ d" /etc/group echo "postgres:x:${gid%% *}:") > "${NSS_WRAPPER_GROUP}" @@ -209,30 +212,28 @@ spec: echo "postgres:x:${uid}:${gid%% *}::${data_volume}:") > "${NSS_WRAPPER_PASSWD}" export LD_PRELOAD='libnss_wrapper.so' NSS_WRAPPER_GROUP NSS_WRAPPER_PASSWD id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]] - [[ -x /usr/pgsql-"${old_version}"/bin/postgres ]] - [[ -x /usr/pgsql-"${new_version}"/bin/initdb ]] - [[ -d /pgdata/pg"${old_version}" ]] - cd /pgdata || exit - echo -e "Step 1: Making new pgdata directory...\n" - mkdir /pgdata/pg"${new_version}" - echo -e "Step 2: Initializing new pgdata directory...\n" - /usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}" - echo -e "\nStep 3: Setting the expected permissions on the old pgdata directory...\n" - chmod 750 /pgdata/pg"${old_version}" - echo -e "Step 4: Copying shared_preload_libraries setting to new postgresql.conf file...\n" - echo "shared_preload_libraries = '$(/usr/pgsql-"""${old_version}"""/bin/postgres -D \ - /pgdata/pg"""${old_version}""" -C shared_preload_libraries)'" >> /pgdata/pg"${new_version}"/postgresql.conf - echo -e "Step 5: Running pg_upgrade check...\n" - time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \ - --new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}"\ - --new-datadir /pgdata/pg"${new_version}" --check --link --jobs=1 - echo -e "\nStep 6: Running pg_upgrade...\n" - time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \ - --new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}" \ - --new-datadir /pgdata/pg"${new_version}" --link --jobs=1 - echo -e "\nStep 7: Copying patroni.dynamic.json...\n" - cp /pgdata/pg"${old_version}"/patroni.dynamic.json /pgdata/pg"${new_version}" - echo -e "\npg_upgrade Job Complete!" + section 'Step 2 of 7: Finding data and tools...' + old_bin="/usr/pgsql-${old_version}/bin" && [[ -x "${old_bin}/postgres" ]] + new_bin="/usr/pgsql-${new_version}/bin" && [[ -x "${new_bin}/initdb" ]] + old_data="${data_volume}/pg${old_version}" && [[ -d "${old_data}" ]] + new_data="${data_volume}/pg${new_version}" + cd "${data_volume}" + section 'Step 3 of 7: Initializing new data directory...' + PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums + section 'Step 4 of 7: Copying shared_preload_libraries parameter...' + value=$(LC_ALL=C PGDATA="${old_data}" "${old_bin}/postgres" -C shared_preload_libraries) + echo >> "${new_data}/postgresql.conf" "shared_preload_libraries = '${value//$'\''/$'\'\''}'" + section 'Step 5 of 7: Checking for potential issues...' + "${new_bin}/pg_upgrade" --check --link --jobs=1 \ + --old-bindir="${old_bin}" --old-datadir="${old_data}" \ + --new-bindir="${new_bin}" --new-datadir="${new_data}" + section 'Step 6 of 7: Performing upgrade...' + (set -x && time "${new_bin}/pg_upgrade" --link --jobs=1 \ + --old-bindir="${old_bin}" --old-datadir="${old_data}" \ + --new-bindir="${new_bin}" --new-datadir="${new_data}") + section 'Step 7 of 7: Copying Patroni settings...' + (set -x && cp "${old_data}/patroni.dynamic.json" "${new_data}") + section 'Success!' - upgrade - "19" - "25" @@ -267,7 +268,7 @@ status: {} tdeJob := reconciler.generateUpgradeJob(ctx, upgrade, startup, "echo testKey") assert.Assert(t, cmp.MarshalContains(tdeJob, - `/usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}" --encryption-key-command "echo testKey"`)) + `PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums --encryption-key-command='echo testKey'`)) } func TestGenerateRemoveDataJob(t *testing.T) { @@ -343,17 +344,19 @@ spec: containers: - command: - bash - - -ceu + - -c - -- - |- - declare -r old_version="$1" - printf 'Removing PostgreSQL data dir for pg%s...\n\n' "$@" - echo -e "Checking the directory exists and isn't being used...\n" - cd /pgdata || exit - if [ "$(/usr/pgsql-"${old_version}"/bin/pg_controldata /pgdata/pg"${old_version}" | grep -c "shut down in recovery")" -ne 1 ]; then echo -e "Directory in use, cannot remove..."; exit 1; fi - echo -e "Removing old pgdata directory...\n" - rm -rf /pgdata/pg"${old_version}" "$(realpath /pgdata/pg${old_version}/pg_wal)" - echo -e "Remove Data Job Complete!" + shopt -so errexit nounset + declare -r data_volume='/pgdata' old_version="$1" + printf 'Removing PostgreSQL %s data...\n\n' "$@" + delete() (set -x && rm -rf -- "$@") + old_data="${data_volume}/pg${old_version}" + control=$(LC_ALL=C /usr/pgsql-${old_version}/bin/pg_controldata "${old_data}") + read -r state <<< "${control##*cluster state:}" + [[ "${state}" == 'shut down in recovery' ]] || { printf >&2 'Unexpected state! %q\n' "${state}"; exit 1; } + delete "${old_data}/pg_wal/" + delete "${old_data}" && echo 'Success!' - remove - "19" image: img4 From be40084f5ed80ecc812d9ac660eaa28e9541ac1c Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Fri, 26 Sep 2025 16:08:14 -0500 Subject: [PATCH 3/3] Look in more directories for upgrade binaries This makes major upgrades compatible with images from other distros. Issue: PGO-864 --- internal/controller/pgupgrade/jobs.go | 26 ++++++++++++++---- internal/controller/pgupgrade/jobs_test.go | 10 ++++--- internal/postgres/config.go | 11 ++++++++ internal/postgres/config_test.go | 31 ++++++++++++++++++++++ 4 files changed, 70 insertions(+), 8 deletions(-) diff --git a/internal/controller/pgupgrade/jobs.go b/internal/controller/pgupgrade/jobs.go index ac71e4a0a6..9dbf76ea5c 100644 --- a/internal/controller/pgupgrade/jobs.go +++ b/internal/controller/pgupgrade/jobs.go @@ -21,6 +21,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" + "github.com/crunchydata/postgres-operator/internal/postgres" "github.com/crunchydata/postgres-operator/internal/shell" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -86,13 +87,25 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s `id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]]`, `section 'Step 2 of 7: Finding data and tools...'`, - - // Expect Postgres executables at the Red Hat paths. - `old_bin="/usr/pgsql-${old_version}/bin" && [[ -x "${old_bin}/postgres" ]]`, - `new_bin="/usr/pgsql-${new_version}/bin" && [[ -x "${new_bin}/initdb" ]]`, `old_data="${data_volume}/pg${old_version}" && [[ -d "${old_data}" ]]`, `new_data="${data_volume}/pg${new_version}"`, + // Search for Postgres executables matching the old and new versions. + // Use `command -v` to look through all of PATH, then trim the executable name from the absolute path. + `old_bin=$(` + postgres.ShellPath(oldVersion) + ` && command -v postgres)`, + `old_bin="${old_bin%/postgres}"`, + `new_bin=$(` + postgres.ShellPath(newVersion) + ` && command -v pg_upgrade)`, + `new_bin="${new_bin%/pg_upgrade}"`, + + // The executables found might not be the versions we need, so do a cursory check before writing to disk. + // pg_upgrade checks every executable thoroughly since PostgreSQL v14. + // + // https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_10_0;f=src/bin/pg_upgrade/exec.c#l355 + // https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_14_0;f=src/bin/pg_upgrade/exec.c#l358 + // https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_18_0;f=src/bin/pg_upgrade/exec.c#l370 + `(set -x && [[ "$("${old_bin}/postgres" --version)" =~ ") ${old_version}"($|[^0-9]) ]])`, + `(set -x && [[ "$("${new_bin}/initdb" --version)" =~ ") ${new_version}"($|[^0-9]) ]])`, + // pg_upgrade writes its files in "${new_data}/pg_upgrade_output.d" since PostgreSQL v15. // Change to a writable working directory to be compatible with PostgreSQL v14 and earlier. // @@ -115,6 +128,9 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s `value=$(LC_ALL=C PGDATA="${old_data}" "${old_bin}/postgres" -C shared_preload_libraries)`, `echo >> "${new_data}/postgresql.conf" "shared_preload_libraries = '${value//$'\''/$'\'\''}'"`, + // NOTE: The default for --new-bindir is the directory of pg_upgrade since PostgreSQL v13. + // + // https://www.postgresql.org/docs/release/13#id-1.11.6.28.5.11 `section 'Step 5 of 7: Checking for potential issues...'`, `"${new_bin}/pg_upgrade" --check` + argMethod + argJobs + ` \`, `--old-bindir="${old_bin}" --old-datadir="${old_data}" \`, @@ -253,7 +269,7 @@ func removeDataCommand(upgrade *v1beta1.PGUpgrade) []string { `delete() (set -x && rm -rf -- "$@")`, `old_data="${data_volume}/pg${old_version}"`, - `control=$(LC_ALL=C /usr/pgsql-${old_version}/bin/pg_controldata "${old_data}")`, + `control=$(` + postgres.ShellPath(oldVersion) + ` && LC_ALL=C pg_controldata "${old_data}")`, `read -r state <<< "${control##*cluster state:}"`, // We expect exactly one state for a replica that has been stopped. diff --git a/internal/controller/pgupgrade/jobs_test.go b/internal/controller/pgupgrade/jobs_test.go index 7ce22be378..9c9e000631 100644 --- a/internal/controller/pgupgrade/jobs_test.go +++ b/internal/controller/pgupgrade/jobs_test.go @@ -213,10 +213,14 @@ spec: export LD_PRELOAD='libnss_wrapper.so' NSS_WRAPPER_GROUP NSS_WRAPPER_PASSWD id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]] section 'Step 2 of 7: Finding data and tools...' - old_bin="/usr/pgsql-${old_version}/bin" && [[ -x "${old_bin}/postgres" ]] - new_bin="/usr/pgsql-${new_version}/bin" && [[ -x "${new_bin}/initdb" ]] old_data="${data_volume}/pg${old_version}" && [[ -d "${old_data}" ]] new_data="${data_volume}/pg${new_version}" + old_bin=$(PATH="/usr/lib/postgresql/19/bin:/usr/libexec/postgresql19:/usr/pgsql-19/bin${PATH+:${PATH}}" && command -v postgres) + old_bin="${old_bin%/postgres}" + new_bin=$(PATH="/usr/lib/postgresql/25/bin:/usr/libexec/postgresql25:/usr/pgsql-25/bin${PATH+:${PATH}}" && command -v pg_upgrade) + new_bin="${new_bin%/pg_upgrade}" + (set -x && [[ "$("${old_bin}/postgres" --version)" =~ ") ${old_version}"($|[^0-9]) ]]) + (set -x && [[ "$("${new_bin}/initdb" --version)" =~ ") ${new_version}"($|[^0-9]) ]]) cd "${data_volume}" section 'Step 3 of 7: Initializing new data directory...' PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums @@ -352,7 +356,7 @@ spec: printf 'Removing PostgreSQL %s data...\n\n' "$@" delete() (set -x && rm -rf -- "$@") old_data="${data_volume}/pg${old_version}" - control=$(LC_ALL=C /usr/pgsql-${old_version}/bin/pg_controldata "${old_data}") + control=$(PATH="/usr/lib/postgresql/19/bin:/usr/libexec/postgresql19:/usr/pgsql-19/bin${PATH+:${PATH}}" && LC_ALL=C pg_controldata "${old_data}") read -r state <<< "${control##*cluster state:}" [[ "${state}" == 'shut down in recovery' ]] || { printf >&2 'Unexpected state! %q\n' "${state}"; exit 1; } delete "${old_data}/pg_wal/" diff --git a/internal/postgres/config.go b/internal/postgres/config.go index 75371e6af6..ebefc9dd6c 100644 --- a/internal/postgres/config.go +++ b/internal/postgres/config.go @@ -264,6 +264,17 @@ func Environment(cluster *v1beta1.PostgresCluster) []corev1.EnvVar { } } +// ShellPath returns a POSIX shell command that prepends typical Postgres executable paths to the PATH variable. +func ShellPath(postgresVersion int32) string { + return fmt.Sprintf(`PATH="`+ + strings.Join([]string{ + `/usr/lib/postgresql/%[1]d/bin`, // Debian + `/usr/libexec/postgresql%[1]d`, // Alpine + `/usr/pgsql-%[1]d/bin`, // Red Hat + }, ":")+ + `${PATH+:${PATH}}"`, postgresVersion) +} + // reloadCommand returns an entrypoint that convinces PostgreSQL to reload // certificate files when they change. The process will appear as name in `ps` // and `top`. diff --git a/internal/postgres/config_test.go b/internal/postgres/config_test.go index c0960ac27e..ffd227f4b8 100644 --- a/internal/postgres/config_test.go +++ b/internal/postgres/config_test.go @@ -547,6 +547,37 @@ func TestBashSafeLink(t *testing.T) { }) } +func TestShellPath(t *testing.T) { + t.Parallel() + + script := ShellPath(11) + + assert.Assert(t, cmp.Contains(script, `/usr/lib/postgresql/11/bin`)) + assert.Assert(t, cmp.Contains(script, `/usr/libexec/postgresql11`)) + assert.Assert(t, cmp.Contains(script, `/usr/pgsql-11/bin`)) + + t.Run("ShellCheckPOSIX", func(t *testing.T) { + shellcheck := require.ShellCheck(t) + + dir := t.TempDir() + file := filepath.Join(dir, "script.sh") + assert.NilError(t, os.WriteFile(file, []byte(script), 0o600)) + + // Expect ShellCheck for "sh" to be happy. + // - https://www.shellcheck.net/wiki/SC2148 + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", "--shell=sh", file) + output, err := cmd.CombinedOutput() + assert.NilError(t, err, "%q\n%s", cmd.Args, output) + }) + + t.Run("PrettyYAML", func(t *testing.T) { + b, err := yaml.Marshal(script) + assert.NilError(t, err) + assert.Assert(t, !strings.Contains(string(b), `\n`), "expected literal flow scalar, got:\n%s", b) + assert.Equal(t, 1, strings.Count(string(b), "\n"), "expected one trailing newline, got:\n%s", b) + }) +} + func TestStartupCommand(t *testing.T) { shellcheck := require.ShellCheck(t) t.Parallel()