Skip to content

Commit 07ac87a

Browse files
committed
Simplify upgrade scripts with more variables
I find this Go code and resulting Bash easier to read. This also logs more about what is happening without changing the sequence of commands. The script included an unnecessary `|| exit`, so I moved the `set -eu` out of the Bash invocation into the script itself to make that behavior more obvious.
1 parent 4c83444 commit 07ac87a

File tree

2 files changed

+118
-113
lines changed

2 files changed

+118
-113
lines changed

internal/controller/pgupgrade/jobs.go

Lines changed: 76 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,10 @@ import (
2121
"github.com/crunchydata/postgres-operator/internal/feature"
2222
"github.com/crunchydata/postgres-operator/internal/initialize"
2323
"github.com/crunchydata/postgres-operator/internal/naming"
24+
"github.com/crunchydata/postgres-operator/internal/shell"
2425
"github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
2526
)
2627

27-
// Upgrade job
28-
2928
// pgUpgradeJob returns the ObjectMeta for the pg_upgrade Job utilized to
3029
// upgrade from one major PostgreSQL version to another
3130
func pgUpgradeJob(upgrade *v1beta1.PGUpgrade) metav1.ObjectMeta {
@@ -48,20 +47,24 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s
4847
oldVersion := spec.FromPostgresVersion
4948
newVersion := spec.ToPostgresVersion
5049

51-
// if the fetch key command is set for TDE, provide the value during initialization
52-
initdb := `/usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}"`
50+
var argEncryptionKeyCommand string
5351
if fetchKeyCommand != "" {
54-
initdb += ` --encryption-key-command "` + fetchKeyCommand + `"`
52+
argEncryptionKeyCommand = ` --encryption-key-command=` + shell.QuoteWord(fetchKeyCommand)
5553
}
5654

5755
args := []string{fmt.Sprint(oldVersion), fmt.Sprint(newVersion)}
5856
script := strings.Join([]string{
57+
// Exit immediately when a pipeline or subshell exits non-zero or when expanding an unset variable.
58+
`shopt -so errexit nounset`,
59+
5960
`declare -r data_volume='/pgdata' old_version="$1" new_version="$2"`,
60-
`printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n\n' "$@"`,
61+
`printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n' "$@"`,
62+
`section() { printf '\n\n%s\n' "$@"; }`,
6163

62-
// Note: Rather than import the nss_wrapper init container, as we do in
63-
// the main postgres-operator, this job does the required nss_wrapper
64+
// NOTE: Rather than import the nss_wrapper init container, as we do in
65+
// the PostgresCluster controller, this job does the required nss_wrapper
6466
// settings here.
67+
`section 'Step 1 of 7: Ensuring username is postgres...'`,
6568

6669
// Create a copy of the system group definitions, but remove the "postgres"
6770
// group or any group with the current GID. Replace them with our own that
@@ -82,57 +85,56 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s
8285
`export LD_PRELOAD='libnss_wrapper.so' NSS_WRAPPER_GROUP NSS_WRAPPER_PASSWD`,
8386
`id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]]`,
8487

88+
`section 'Step 2 of 7: Finding data and tools...'`,
89+
8590
// Expect Postgres executables at the Red Hat paths.
86-
`[[ -x /usr/pgsql-"${old_version}"/bin/postgres ]]`,
87-
`[[ -x /usr/pgsql-"${new_version}"/bin/initdb ]]`,
88-
`[[ -d /pgdata/pg"${old_version}" ]]`,
91+
`old_data="${data_volume}/pg${old_version}"`,
92+
`new_data="${data_volume}/pg${new_version}"`,
93+
`old_bin="/usr/pgsql-${old_version}/bin"`,
94+
`new_bin="/usr/pgsql-${new_version}/bin"`,
95+
96+
`[[ -x "${old_bin}/postgres" && -x "${new_bin}/initdb" && -d "${old_data}" ]]`,
97+
98+
// pg_upgrade writes its files in "${new_data}/pg_upgrade_output.d" since PostgreSQL v15.
99+
// Change to a writable working directory to be compatible with PostgreSQL v14 and earlier.
100+
//
101+
// https://www.postgresql.org/docs/release/15#id-1.11.6.20.5.11.3
102+
`cd "${data_volume}"`,
89103

90104
// Below is the pg_upgrade script used to upgrade a PostgresCluster from
91105
// one major version to another. Additional information concerning the
92106
// steps used and command flag specifics can be found in the documentation:
93107
// - https://www.postgresql.org/docs/current/pgupgrade.html
94108

95-
// To begin, we first move to the mounted /pgdata directory and create a
96-
// new version directory which is then initialized with the initdb command.
97-
`cd /pgdata || exit`,
98-
`echo -e "Step 1: Making new pgdata directory...\n"`,
99-
`mkdir /pgdata/pg"${new_version}"`,
100-
`echo -e "Step 2: Initializing new pgdata directory...\n"`,
101-
initdb,
102-
103-
// Before running the upgrade check, which ensures the clusters are compatible,
104-
// proper permissions have to be set on the old pgdata directory and the
105-
// preload library settings must be copied over.
106-
`echo -e "\nStep 3: Setting the expected permissions on the old pgdata directory...\n"`,
107-
`chmod 750 /pgdata/pg"${old_version}"`,
108-
`echo -e "Step 4: Copying shared_preload_libraries setting to new postgresql.conf file...\n"`,
109-
`echo "shared_preload_libraries = '$(/usr/pgsql-"""${old_version}"""/bin/postgres -D \`,
110-
`/pgdata/pg"""${old_version}""" -C shared_preload_libraries)'" >> /pgdata/pg"${new_version}"/postgresql.conf`,
111-
112-
// Before the actual upgrade is run, we will run the upgrade --check to
113-
// verify everything before actually changing any data.
114-
`echo -e "Step 5: Running pg_upgrade check...\n"`,
115-
`time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \`,
116-
`--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}"\`,
117-
` --new-datadir /pgdata/pg"${new_version}" --check` + argMethod + argJobs,
118-
119-
// Assuming the check completes successfully, the pg_upgrade command will
120-
// be run that actually prepares the upgraded pgdata directory.
121-
`echo -e "\nStep 6: Running pg_upgrade...\n"`,
122-
`time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \`,
123-
`--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}" \`,
124-
`--new-datadir /pgdata/pg"${new_version}"` + argMethod + argJobs,
125-
126-
// Since we have cleared the Patroni cluster step by removing the EndPoints, we copy patroni.dynamic.json
127-
// from the old data dir to help retain PostgreSQL parameters you had set before.
128-
// - https://patroni.readthedocs.io/en/latest/existing_data.html#major-upgrade-of-postgresql-version
129-
`echo -e "\nStep 7: Copying patroni.dynamic.json...\n"`,
130-
`cp /pgdata/pg"${old_version}"/patroni.dynamic.json /pgdata/pg"${new_version}"`,
131-
132-
`echo -e "\npg_upgrade Job Complete!"`,
109+
`section 'Step 3 of 7: Initializing new data directory...'`,
110+
`PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums` + argEncryptionKeyCommand,
111+
112+
// Read the configured value then quote it; every single-quote U+0027 is replaced by two.
113+
//
114+
// https://www.postgresql.org/docs/current/config-setting.html
115+
// https://www.gnu.org/software/bash/manual/bash.html#ANSI_002dC-Quoting
116+
`section 'Step 4 of 7: Copying shared_preload_libraries parameter...'`,
117+
`value=$(LC_ALL=C PGDATA="${old_data}" "${old_bin}/postgres" -C shared_preload_libraries)`,
118+
`echo >> "${new_data}/postgresql.conf" "shared_preload_libraries = '${value//$'\''/$'\'\''}'"`,
119+
120+
`section 'Step 5 of 7: Checking for potential issues...'`,
121+
`"${new_bin}/pg_upgrade" --check` + argMethod + argJobs + ` \`,
122+
`--old-bindir="${old_bin}" --old-datadir="${old_data}" \`,
123+
`--new-bindir="${new_bin}" --new-datadir="${new_data}"`,
124+
125+
`section 'Step 6 of 7: Performing upgrade...'`,
126+
`(set -x && time "${new_bin}/pg_upgrade"` + argMethod + argJobs + ` \`,
127+
`--old-bindir="${old_bin}" --old-datadir="${old_data}" \`,
128+
`--new-bindir="${new_bin}" --new-datadir="${new_data}")`,
129+
130+
// https://patroni.readthedocs.io/en/latest/existing_data.html#major-upgrade-of-postgresql-version
131+
`section 'Step 7 of 7: Copying Patroni settings...'`,
132+
`(set -x && cp "${old_data}/patroni.dynamic.json" "${new_data}")`,
133+
134+
`section 'Success!'`,
133135
}, "\n")
134136

135-
return append([]string{"bash", "-ceu", "--", script, "upgrade"}, args...)
137+
return append([]string{"bash", "-c", "--", script, "upgrade"}, args...)
136138
}
137139

138140
// largestWholeCPU returns the maximum CPU request or limit as a non-negative
@@ -238,38 +240,37 @@ func (r *PGUpgradeReconciler) generateUpgradeJob(
238240
// We currently target the `pgdata/pg{old_version}` and `pgdata/pg{old_version}_wal`
239241
// directories for removal.
240242
func removeDataCommand(upgrade *v1beta1.PGUpgrade) []string {
241-
oldVersion := fmt.Sprint(upgrade.Spec.FromPostgresVersion)
243+
oldVersion := upgrade.Spec.FromPostgresVersion
242244

243245
// Before removing the directories (both data and wal), we check that
244246
// the directory is not in use by running `pg_controldata` and making sure
245247
// the server state is "shut down in recovery"
246-
// TODO(benjaminjb): pg_controldata seems pretty stable, but might want to
247-
// experiment with a few more versions.
248-
args := []string{oldVersion}
248+
args := []string{fmt.Sprint(oldVersion)}
249249
script := strings.Join([]string{
250-
`declare -r old_version="$1"`,
251-
`printf 'Removing PostgreSQL data dir for pg%s...\n\n' "$@"`,
252-
`echo -e "Checking the directory exists and isn't being used...\n"`,
253-
`cd /pgdata || exit`,
254-
// The string `shut down in recovery` is the dbstate that postgres sets from
255-
// at least version 10 to 14 when a replica has been shut down.
256-
// - https://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/bin/pg_controldata/pg_controldata.c;h=f911f98d946d83f1191abf35239d9b4455c5f52a;hb=HEAD#l59
257-
// Note: `pg_controldata` is actually used by `pg_upgrade` before upgrading
258-
// to make sure that the server in question is shut down as a primary;
259-
// that aligns with our use here, where we're making sure that the server in question
260-
// was shut down as a replica.
261-
// - https://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/bin/pg_upgrade/controldata.c;h=41b8f69b8cbe4f40e6098ad84c2e8e987e24edaf;hb=HEAD#l122
262-
`if [ "$(/usr/pgsql-"${old_version}"/bin/pg_controldata /pgdata/pg"${old_version}" | grep -c "shut down in recovery")" -ne 1 ]; then echo -e "Directory in use, cannot remove..."; exit 1; fi`,
263-
`echo -e "Removing old pgdata directory...\n"`,
264-
// When deleting the wal directory, use `realpath` to resolve the symlink from
265-
// the pgdata directory. This is necessary because the wal directory can be
266-
// mounted at different places depending on if an external wal PVC is used,
267-
// i.e. `/pgdata/pg14_wal` vs `/pgwal/pg14_wal`
268-
`rm -rf /pgdata/pg"${old_version}" "$(realpath /pgdata/pg${old_version}/pg_wal)"`,
269-
`echo -e "Remove Data Job Complete!"`,
250+
// Exit immediately when a pipeline or subshell exits non-zero or when expanding an unset variable.
251+
`shopt -so errexit nounset`,
252+
253+
`declare -r data_volume='/pgdata' old_version="$1"`,
254+
`printf 'Removing PostgreSQL %s data...\n\n' "$@"`,
255+
`delete() (set -x && rm -rf -- "$@")`,
256+
257+
`old_data="${data_volume}/pg${old_version}"`,
258+
`control=$(LC_ALL=C /usr/pgsql-${old_version}/bin/pg_controldata "${old_data}")`,
259+
`read -r state <<< "${control##*cluster state:}"`,
260+
261+
// We expect exactly one state for a replica that has been stopped.
262+
//
263+
// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_10_0;f=src/bin/pg_controldata/pg_controldata.c#l55
264+
// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_17_0;f=src/bin/pg_controldata/pg_controldata.c#l58
265+
`[[ "${state}" == 'shut down in recovery' ]] || { printf >&2 'Unexpected state! %q\n' "${state}"; exit 1; }`,
266+
267+
// "rm" does not follow symbolic links.
268+
// Delete the old data directory after subdirectories that contain versioned data.
269+
`delete "${old_data}/pg_wal/"`,
270+
`delete "${old_data}" && echo 'Success!'`,
270271
}, "\n")
271272

272-
return append([]string{"bash", "-ceu", "--", script, "remove"}, args...)
273+
return append([]string{"bash", "-c", "--", script, "remove"}, args...)
273274
}
274275

275276
// generateRemoveDataJob returns a Job that can remove the data

internal/controller/pgupgrade/jobs_test.go

Lines changed: 42 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ func TestUpgradeCommand(t *testing.T) {
8787
spec := &v1beta1.PGUpgradeSettings{Jobs: tt.Spec}
8888
command := upgradeCommand(spec, "")
8989
assert.Assert(t, len(command) > 3)
90-
assert.DeepEqual(t, []string{"bash", "-ceu", "--"}, command[:3])
90+
assert.DeepEqual(t, []string{"bash", "-c", "--"}, command[:3])
9191

9292
script := command[3]
9393
assert.Assert(t, cmp.Contains(script, tt.Args))
@@ -111,7 +111,7 @@ func TestUpgradeCommand(t *testing.T) {
111111
spec := &v1beta1.PGUpgradeSettings{TransferMethod: tt.Spec}
112112
command := upgradeCommand(spec, "")
113113
assert.Assert(t, len(command) > 3)
114-
assert.DeepEqual(t, []string{"bash", "-ceu", "--"}, command[:3])
114+
assert.DeepEqual(t, []string{"bash", "-c", "--"}, command[:3])
115115

116116
script := command[3]
117117
assert.Assert(t, cmp.Contains(script, tt.Args))
@@ -196,11 +196,14 @@ spec:
196196
containers:
197197
- command:
198198
- bash
199-
- -ceu
199+
- -c
200200
- --
201201
- |-
202+
shopt -so errexit nounset
202203
declare -r data_volume='/pgdata' old_version="$1" new_version="$2"
203-
printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n\n' "$@"
204+
printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n' "$@"
205+
section() { printf '\n\n%s\n' "$@"; }
206+
section 'Step 1 of 7: Ensuring username is postgres...'
204207
gid=$(id -G); NSS_WRAPPER_GROUP=$(mktemp)
205208
(sed "/^postgres:x:/ d; /^[^:]*:x:${gid%% *}:/ d" /etc/group
206209
echo "postgres:x:${gid%% *}:") > "${NSS_WRAPPER_GROUP}"
@@ -209,30 +212,29 @@ spec:
209212
echo "postgres:x:${uid}:${gid%% *}::${data_volume}:") > "${NSS_WRAPPER_PASSWD}"
210213
export LD_PRELOAD='libnss_wrapper.so' NSS_WRAPPER_GROUP NSS_WRAPPER_PASSWD
211214
id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]]
212-
[[ -x /usr/pgsql-"${old_version}"/bin/postgres ]]
213-
[[ -x /usr/pgsql-"${new_version}"/bin/initdb ]]
214-
[[ -d /pgdata/pg"${old_version}" ]]
215-
cd /pgdata || exit
216-
echo -e "Step 1: Making new pgdata directory...\n"
217-
mkdir /pgdata/pg"${new_version}"
218-
echo -e "Step 2: Initializing new pgdata directory...\n"
219-
/usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}"
220-
echo -e "\nStep 3: Setting the expected permissions on the old pgdata directory...\n"
221-
chmod 750 /pgdata/pg"${old_version}"
222-
echo -e "Step 4: Copying shared_preload_libraries setting to new postgresql.conf file...\n"
223-
echo "shared_preload_libraries = '$(/usr/pgsql-"""${old_version}"""/bin/postgres -D \
224-
/pgdata/pg"""${old_version}""" -C shared_preload_libraries)'" >> /pgdata/pg"${new_version}"/postgresql.conf
225-
echo -e "Step 5: Running pg_upgrade check...\n"
226-
time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \
227-
--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}"\
228-
--new-datadir /pgdata/pg"${new_version}" --check --link --jobs=1
229-
echo -e "\nStep 6: Running pg_upgrade...\n"
230-
time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \
231-
--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}" \
232-
--new-datadir /pgdata/pg"${new_version}" --link --jobs=1
233-
echo -e "\nStep 7: Copying patroni.dynamic.json...\n"
234-
cp /pgdata/pg"${old_version}"/patroni.dynamic.json /pgdata/pg"${new_version}"
235-
echo -e "\npg_upgrade Job Complete!"
215+
section 'Step 2 of 7: Finding data and tools...'
216+
old_data="${data_volume}/pg${old_version}"
217+
new_data="${data_volume}/pg${new_version}"
218+
old_bin="/usr/pgsql-${old_version}/bin"
219+
new_bin="/usr/pgsql-${new_version}/bin"
220+
[[ -x "${old_bin}/postgres" && -x "${new_bin}/initdb" && -d "${old_data}" ]]
221+
cd "${data_volume}"
222+
section 'Step 3 of 7: Initializing new data directory...'
223+
PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums
224+
section 'Step 4 of 7: Copying shared_preload_libraries parameter...'
225+
value=$(LC_ALL=C PGDATA="${old_data}" "${old_bin}/postgres" -C shared_preload_libraries)
226+
echo >> "${new_data}/postgresql.conf" "shared_preload_libraries = '${value//$'\''/$'\'\''}'"
227+
section 'Step 5 of 7: Checking for potential issues...'
228+
"${new_bin}/pg_upgrade" --check --link --jobs=1 \
229+
--old-bindir="${old_bin}" --old-datadir="${old_data}" \
230+
--new-bindir="${new_bin}" --new-datadir="${new_data}"
231+
section 'Step 6 of 7: Performing upgrade...'
232+
(set -x && time "${new_bin}/pg_upgrade" --link --jobs=1 \
233+
--old-bindir="${old_bin}" --old-datadir="${old_data}" \
234+
--new-bindir="${new_bin}" --new-datadir="${new_data}")
235+
section 'Step 7 of 7: Copying Patroni settings...'
236+
(set -x && cp "${old_data}/patroni.dynamic.json" "${new_data}")
237+
section 'Success!'
236238
- upgrade
237239
- "19"
238240
- "25"
@@ -267,7 +269,7 @@ status: {}
267269

268270
tdeJob := reconciler.generateUpgradeJob(ctx, upgrade, startup, "echo testKey")
269271
assert.Assert(t, cmp.MarshalContains(tdeJob,
270-
`/usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}" --encryption-key-command "echo testKey"`))
272+
`PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums --encryption-key-command='echo testKey'`))
271273
}
272274

273275
func TestGenerateRemoveDataJob(t *testing.T) {
@@ -343,17 +345,19 @@ spec:
343345
containers:
344346
- command:
345347
- bash
346-
- -ceu
348+
- -c
347349
- --
348350
- |-
349-
declare -r old_version="$1"
350-
printf 'Removing PostgreSQL data dir for pg%s...\n\n' "$@"
351-
echo -e "Checking the directory exists and isn't being used...\n"
352-
cd /pgdata || exit
353-
if [ "$(/usr/pgsql-"${old_version}"/bin/pg_controldata /pgdata/pg"${old_version}" | grep -c "shut down in recovery")" -ne 1 ]; then echo -e "Directory in use, cannot remove..."; exit 1; fi
354-
echo -e "Removing old pgdata directory...\n"
355-
rm -rf /pgdata/pg"${old_version}" "$(realpath /pgdata/pg${old_version}/pg_wal)"
356-
echo -e "Remove Data Job Complete!"
351+
shopt -so errexit nounset
352+
declare -r data_volume='/pgdata' old_version="$1"
353+
printf 'Removing PostgreSQL %s data...\n\n' "$@"
354+
delete() (set -x && rm -rf -- "$@")
355+
old_data="${data_volume}/pg${old_version}"
356+
control=$(LC_ALL=C /usr/pgsql-${old_version}/bin/pg_controldata "${old_data}")
357+
read -r state <<< "${control##*cluster state:}"
358+
[[ "${state}" == 'shut down in recovery' ]] || { printf >&2 'Unexpected state! %q\n' "${state}"; exit 1; }
359+
delete "${old_data}/pg_wal/"
360+
delete "${old_data}" && echo 'Success!'
357361
- remove
358362
- "19"
359363
image: img4

0 commit comments

Comments
 (0)