From 5c69762669e0c81f5b86b7e273312775d55cad95 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Fri, 15 Aug 2025 16:48:47 -0500 Subject: [PATCH 1/4] Add a Bash function to create Postgres tablespace and data directories The commands for these were intentionally similar, and now they are the same and documented together. Issue: PGO-2558 --- internal/postgres/config.go | 130 +++++++++++++--------------- internal/postgres/reconcile_test.go | 22 ++--- 2 files changed, 66 insertions(+), 86 deletions(-) diff --git a/internal/postgres/config.go b/internal/postgres/config.go index 65c26dec6d..e4a62ae798 100644 --- a/internal/postgres/config.go +++ b/internal/postgres/config.go @@ -21,6 +21,38 @@ import ( ) const ( + // bashDataDirectory is a Bash function that ensures a directory has the correct permissions for PostgreSQL data. + // + // Postgres requires its data directories be writable by only itself. + // Pod "securityContext.fsGroup" sets g+w on directories for *some* storage providers. + // Ensure the current user owns the directory, and remove group-write permission. + // + // - https://www.postgresql.org/docs/current/creating-cluster.html + // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/postmaster/postmaster.c;hb=REL_10_0#l1522 + // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/utils/init/miscinit.c;hb=REL_11_0#l142 + // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/utils/init/miscinit.c;hb=REL_17_0#l386 + // - https://issue.k8s.io/93802#issuecomment-717646167 + // + // During CREATE TABLESPACE, Postgres sets the permissions of a tablespace directory to match the data directory. + // + // - https://www.postgresql.org/docs/current/manage-ag-tablespaces.html + // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/commands/tablespace.c;hb=REL_14_0#l600 + // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/common/file_perm.c;hb=REL_14_0#l27 + // + bashDataDirectory = `dataDirectory() {` + + // When the directory does not exist, create it with the correct permissions. + // When the directory has the correct owner, set the correct permissions. + ` if [[ ! -e "$1" || -O "$1" ]]; then install --directory --mode=0750 "$1";` + + // + // The directory exists but its owner is wrong. + // When it is writable, the set-group-ID bit indicates that "fsGroup" probably ran on its contents making them safe to use. + // In this case, we can make a new directory (owned by this user) and refill it. + ` elif [[ -w "$1" && -g "$1" ]]; then recreate "$1" '0750';` + + // + // The directory exists, its owner is wrong, and it is not writable. + // This is probably fatal, but indicate failure to let the caller decide. + ` else false; fi; }` + // bashHalt is a Bash function that prints its arguments to stderr then // exits with a non-zero status. It uses the exit status of the prior // command if that was not zero. @@ -327,47 +359,31 @@ func startupCommand( version := fmt.Sprint(cluster.Spec.PostgresVersion) walDir := WALDirectory(cluster, instance) - // If the user requests tablespaces, we want to make sure the directories exist with the - // correct owner and permissions. - tablespaceCmd := "" - if feature.Enabled(ctx, feature.TablespaceVolumes) { - // This command checks if a dir exists and if not, creates it; - // if the dir does exist, then we `recreate` it to make sure the owner is correct; - // if the dir exists with the wrong owner and is not writeable, we error. - // This is the same behavior we use for the main PGDATA directory. - // Note: Postgres requires the tablespace directory to be "an existing, empty directory - // that is owned by the PostgreSQL operating system user." - // - https://www.postgresql.org/docs/current/manage-ag-tablespaces.html - // However, unlike the PGDATA directory, Postgres will _not_ error out - // if the permissions are wrong on the tablespace directory. - // Instead, when a tablespace is created in Postgres, Postgres will `chmod` the - // tablespace directory to match permissions on the PGDATA directory (either 700 or 750). - // Postgres setting the tablespace directory permissions: - // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/commands/tablespace.c;hb=REL_14_0#l600 - // Postgres choosing directory permissions: - // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/common/file_perm.c;hb=REL_14_0#l27 - // Note: This permission change seems to happen only when the tablespace is created in Postgres. - // If the user manually `chmod`'ed the directory after the creation of the tablespace, Postgres - // would not attempt to change the directory permissions. - // Note: as noted below, we mount the tablespace directory to the mountpoint `/tablespaces/NAME`, - // and so we add the subdirectory `data` in order to set the permissions. - checkInstallRecreateCmd := strings.Join([]string{ - `if [[ ! -e "${tablespace_dir}" || -O "${tablespace_dir}" ]]; then`, - `install --directory --mode=0750 "${tablespace_dir}"`, - `elif [[ -w "${tablespace_dir}" && -g "${tablespace_dir}" ]]; then`, - `recreate "${tablespace_dir}" '0750'`, - `else (halt Permissions!); fi ||`, - `halt "$(permissions "${tablespace_dir}" ||:)"`, - }, "\n") + mkdirs := make([]string, 0, 7+len(instance.TablespaceVolumes)) + mkdirs = append(mkdirs, `dataDirectory "${postgres_data_directory}" || halt "$(permissions "${postgres_data_directory}" ||:)"`) + // If the user requests tablespaces, we want to make sure the directories exist with the correct owner and permissions. + // + // The path for tablespaces volumes is /tablespaces/NAME/data -- the `data` directory is so we can arrange the permissions. + if feature.Enabled(ctx, feature.TablespaceVolumes) { for _, tablespace := range instance.TablespaceVolumes { - // The path for tablespaces volumes is /tablespaces/NAME/data - // -- the `data` path is added so that we can arrange the permissions. - tablespaceCmd = tablespaceCmd + "\ntablespace_dir=/tablespaces/" + tablespace.Name + "/data" + "\n" + - checkInstallRecreateCmd + dir := shell.QuoteWord("/tablespaces/" + tablespace.Name + "/data") + mkdirs = append(mkdirs, `dataDirectory `+dir+` || halt "$(permissions `+dir+` ||:)"`) } } + // These directories are outside "data_directory" and can be created. + mkdirs = append(mkdirs, + `(`+shell.MakeDirectories(dataMountPath, LogDirectory())+`) ||`, + `halt "$(permissions `+shell.QuoteWord(LogDirectory())+` ||:)"`, + + `(`+shell.MakeDirectories(dataMountPath, naming.PatroniPGDataLogPath)+`) ||`, + `halt "$(permissions `+shell.QuoteWord(naming.PatroniPGDataLogPath)+` ||:)"`, + + `(`+shell.MakeDirectories(dataMountPath, naming.PGBackRestPGDataLogPath)+`) ||`, + `halt "$(permissions `+shell.QuoteWord(naming.PGBackRestPGDataLogPath)+` ||:)"`, + ) + pg_rewind_override := "" if config.FetchKeyCommand(&cluster.Spec) != "" { // Quoting "EOF" disables parameter substitution during write. @@ -384,6 +400,9 @@ chmod +x /tmp/pg_rewind_tde.sh script := strings.Join([]string{ `declare -r expected_major_version="$1" pgwal_directory="$2"`, + // Function to create a Postgres data directory. + bashDataDirectory, + // Function to print the permissions of a file or directory and its parents. bashPermissions, @@ -425,42 +444,10 @@ chmod +x /tmp/pg_rewind_tde.sh // Determine if the data directory has been prepared for bootstrapping the cluster `bootstrap_dir="${postgres_data_directory}_bootstrap"`, - `[[ -d "${bootstrap_dir}" ]] && results 'bootstrap directory' "${bootstrap_dir}"`, - `[[ -d "${bootstrap_dir}" ]] && postgres_data_directory="${bootstrap_dir}"`, - - // PostgreSQL requires its directory to be writable by only itself. - // Pod "securityContext.fsGroup" sets g+w on directories for *some* - // storage providers. Ensure the current user owns the directory, and - // remove group-write permission. - // - https://www.postgresql.org/docs/current/creating-cluster.html - // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/postmaster/postmaster.c;hb=REL_10_0#l1522 - // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/utils/init/miscinit.c;hb=REL_11_0#l142 - // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/utils/init/miscinit.c;hb=REL_17_0#l386 - // - https://issue.k8s.io/93802#issuecomment-717646167 - // - // When the directory does not exist, create it with the correct permissions. - // When the directory has the correct owner, set the correct permissions. - `if [[ ! -e "${postgres_data_directory}" || -O "${postgres_data_directory}" ]]; then`, - `install --directory --mode=0750 "${postgres_data_directory}"`, - // - // The directory exists but its owner is wrong. When it is writable, - // the set-group-ID bit indicates that "fsGroup" probably ran on its - // contents making them safe to use. In this case, we can make a new - // directory (owned by this user) and refill it. - `elif [[ -w "${postgres_data_directory}" && -g "${postgres_data_directory}" ]]; then`, - `recreate "${postgres_data_directory}" '0750'`, - // - // The directory exists, its owner is wrong, and it is not writable. - `else (halt Permissions!); fi ||`, - `halt "$(permissions "${postgres_data_directory}" ||:)"`, + `[[ -d "${bootstrap_dir}" ]] && postgres_data_directory="${bootstrap_dir}" && results 'bootstrap directory' "${bootstrap_dir}"`, - // Create log directories. - `(` + shell.MakeDirectories(dataMountPath, naming.PGBackRestPGDataLogPath) + `) ||`, - `halt "$(permissions ` + naming.PGBackRestPGDataLogPath + ` ||:)"`, - `(` + shell.MakeDirectories(dataMountPath, naming.PatroniPGDataLogPath) + `) ||`, - `halt "$(permissions ` + naming.PatroniPGDataLogPath + ` ||:)"`, - `(` + shell.MakeDirectories(dataMountPath, LogDirectory()) + `) ||`, - `halt "$(permissions ` + LogDirectory() + ` ||:)"`, + // Create directories for and related to the data directory. + strings.Join(mkdirs, "\n"), // Copy replication client certificate files // from the /pgconf/tls/replication directory to the /tmp/replication directory in order @@ -478,7 +465,6 @@ chmod +x /tmp/pg_rewind_tde.sh // Add the pg_rewind wrapper script, if TDE is enabled. pg_rewind_override, - tablespaceCmd, // When the data directory is empty, there's nothing more to do. `[[ -f "${postgres_data_directory}/PG_VERSION" ]] || exit 0`, diff --git a/internal/postgres/reconcile_test.go b/internal/postgres/reconcile_test.go index c001ce890b..35a6ed900f 100644 --- a/internal/postgres/reconcile_test.go +++ b/internal/postgres/reconcile_test.go @@ -239,6 +239,7 @@ initContainers: - -- - |- declare -r expected_major_version="$1" pgwal_directory="$2" + dataDirectory() { if [[ ! -e "$1" || -O "$1" ]]; then install --directory --mode=0750 "$1"; elif [[ -w "$1" && -g "$1" ]]; then recreate "$1" '0750'; else false; fi; } permissions() { while [[ -n "$1" ]]; do set "${1%/*}" "$@"; done; shift; stat -Lc '%A %4u %4g %n' "$@"; } halt() { local rc=$?; >&2 echo "$@"; exit "${rc/#0/1}"; } results() { printf '::postgres-operator: %s::%s\n' "$@"; } @@ -267,23 +268,16 @@ initContainers: [[ "${postgres_data_directory}" == "${PGDATA}" ]] || halt Expected matching config and data directories bootstrap_dir="${postgres_data_directory}_bootstrap" - [[ -d "${bootstrap_dir}" ]] && results 'bootstrap directory' "${bootstrap_dir}" - [[ -d "${bootstrap_dir}" ]] && postgres_data_directory="${bootstrap_dir}" - if [[ ! -e "${postgres_data_directory}" || -O "${postgres_data_directory}" ]]; then - install --directory --mode=0750 "${postgres_data_directory}" - elif [[ -w "${postgres_data_directory}" && -g "${postgres_data_directory}" ]]; then - recreate "${postgres_data_directory}" '0750' - else (halt Permissions!); fi || - halt "$(permissions "${postgres_data_directory}" ||:)" - (mkdir -p '/pgdata/pgbackrest/log' && { chmod 0775 '/pgdata/pgbackrest/log' '/pgdata/pgbackrest' || :; }) || - halt "$(permissions /pgdata/pgbackrest/log ||:)" - (mkdir -p '/pgdata/patroni/log' && { chmod 0775 '/pgdata/patroni/log' '/pgdata/patroni' || :; }) || - halt "$(permissions /pgdata/patroni/log ||:)" + [[ -d "${bootstrap_dir}" ]] && postgres_data_directory="${bootstrap_dir}" && results 'bootstrap directory' "${bootstrap_dir}" + dataDirectory "${postgres_data_directory}" || halt "$(permissions "${postgres_data_directory}" ||:)" (mkdir -p '/pgdata/logs/postgres' && { chmod 0775 '/pgdata/logs/postgres' '/pgdata/logs' || :; }) || - halt "$(permissions /pgdata/logs/postgres ||:)" + halt "$(permissions '/pgdata/logs/postgres' ||:)" + (mkdir -p '/pgdata/patroni/log' && { chmod 0775 '/pgdata/patroni/log' '/pgdata/patroni' || :; }) || + halt "$(permissions '/pgdata/patroni/log' ||:)" + (mkdir -p '/pgdata/pgbackrest/log' && { chmod 0775 '/pgdata/pgbackrest/log' '/pgdata/pgbackrest' || :; }) || + halt "$(permissions '/pgdata/pgbackrest/log' ||:)" install -D --mode=0600 -t "/tmp/replication" "/pgconf/tls/replication"/{tls.crt,tls.key,ca.crt} - [[ -f "${postgres_data_directory}/PG_VERSION" ]] || exit 0 results 'data version' "${postgres_data_version:=$(< "${postgres_data_directory}/PG_VERSION")}" [[ "${postgres_data_version}" == "${expected_major_version}" ]] || From bc2fc7c08639c4c141f7f066c8cd4526d743954c Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Wed, 30 Jul 2025 13:57:15 -0500 Subject: [PATCH 2/4] Expand relative paths in shell.MakeDirectories This function needed defined behavior for relative paths. For example, the default log directory for Postgres is just "log" which it interprets relative to the data directory. Issue: PGO-2558 --- internal/collector/instance.go | 3 +-- internal/shell/paths.go | 19 ++++++++++++++++--- internal/shell/paths_test.go | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/internal/collector/instance.go b/internal/collector/instance.go index 9cb1708042..d6e6d153c8 100644 --- a/internal/collector/instance.go +++ b/internal/collector/instance.go @@ -7,7 +7,6 @@ package collector import ( "context" "fmt" - "path" corev1 "k8s.io/api/core/v1" @@ -185,7 +184,7 @@ func startCommand(logDirectories []string, includeLogrotate bool) []string { if len(logDirectories) != 0 { for _, logDir := range logDirectories { mkdirScript = mkdirScript + ` -` + shell.MakeDirectories(logDir, path.Join(logDir, "receiver")) +` + shell.MakeDirectories(logDir, "receiver") } } diff --git a/internal/shell/paths.go b/internal/shell/paths.go index 701144694a..8fcf6b3579 100644 --- a/internal/shell/paths.go +++ b/internal/shell/paths.go @@ -36,6 +36,8 @@ func CleanFileName(path string) string { // exists. It creates every directory leading to path from (but not including) // base and sets their permissions for Kubernetes, regardless of umask. // +// Relative paths are expanded relative to base. +// // See: // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/chmod.html // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/mkdir.html @@ -47,8 +49,19 @@ func MakeDirectories(base string, paths ...string) string { return `test -d ` + QuoteWord(base) } - allPaths := slices.Clone(paths) - for _, p := range paths { + // Expand each path relative to the base path. + expandedPaths := make([]string, len(paths)) + for i, p := range paths { + if filepath.IsAbs(p) { + expandedPaths[i] = p + } else { + expandedPaths[i] = filepath.Join(base, p) + } + } + + // Gather parent directories of each path. + allPaths := slices.Clone(expandedPaths) + for _, p := range expandedPaths { if r, err := filepath.Rel(base, p); err == nil && filepath.IsLocal(r) { // The result of [filepath.Rel] is a shorter representation of the full path; skip it. r = filepath.Dir(r) @@ -72,7 +85,7 @@ func MakeDirectories(base string, paths ...string) string { return `` + // Create all the paths and any missing parents. - `mkdir -p ` + strings.Join(QuoteWords(paths...), " ") + + `mkdir -p ` + strings.Join(QuoteWords(expandedPaths...), " ") + // Try to set the permissions of every path and each parent. // This swallows the exit status of `chmod` because not all filesystems diff --git a/internal/shell/paths_test.go b/internal/shell/paths_test.go index b5adb69b17..f3b9aea82d 100644 --- a/internal/shell/paths_test.go +++ b/internal/shell/paths_test.go @@ -94,6 +94,38 @@ func TestMakeDirectories(t *testing.T) { }) }) + t.Run("Relative", func(t *testing.T) { + assert.Equal(t, + MakeDirectories("/x", "one", "two/three"), + `mkdir -p '/x/one' '/x/two/three' && { chmod 0775 '/x/one' '/x/two/three' '/x/two' || :; }`, + "expected paths relative to base") + + assert.Equal(t, + MakeDirectories("/x/y/z", "../one", "./two", "../../../../three"), + `mkdir -p '/x/y/one' '/x/y/z/two' '/three' && { chmod 0775 '/x/y/one' '/x/y/z/two' '/three' || :; }`, + "expected paths relative to base") + + script := MakeDirectories("x/y", "../one", "./two", "../../../../three") + assert.Equal(t, script, + `mkdir -p 'x/one' 'x/y/two' '../../three' && { chmod 0775 'x/one' 'x/y/two' '../../three' || :; }`, + "expected paths relative to base") + + // Calling `mkdir -p '../..'` works, but run it by ShellCheck as a precaution. + t.Run("ShellCheckPOSIX", func(t *testing.T) { + shellcheck := require.ShellCheck(t) + + dir := t.TempDir() + file := filepath.Join(dir, "script.sh") + assert.NilError(t, os.WriteFile(file, []byte(script), 0o600)) + + // Expect ShellCheck for "sh" to be happy. + // - https://www.shellcheck.net/wiki/SC2148 + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", "--shell=sh", file) + output, err := cmd.CombinedOutput() + assert.NilError(t, err, "%q\n%s", cmd.Args, output) + }) + }) + t.Run("Unrelated", func(t *testing.T) { assert.Equal(t, MakeDirectories("/one", "/two/three/four"), From d77ae2b31018bc87145c180c903b95b458ddc3d4 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Fri, 15 Aug 2025 17:09:02 -0500 Subject: [PATCH 3/4] Set correct permissions on the specified log directory Controllers ignored the specified "log_directory" when preparing directories for Postgres. This parameter can be specified when the OpenTelemetryLogs gate is disabled. A relative path of more than one directory continues to fail during bootstrap. This does not change what happens when the OpenTelemetryLogs gate is enabled. In that case, controllers override the spec with their own value and prepare that directory. Issue: PGO-2558 --- internal/collector/postgres.go | 96 +++++++++++-------- internal/collector/postgres_test.go | 12 ++- .../controller/postgrescluster/controller.go | 2 +- .../controller/postgrescluster/instance.go | 9 +- .../controller/postgrescluster/postgres.go | 2 + internal/postgres/config.go | 38 ++++++-- internal/postgres/config_test.go | 13 ++- internal/postgres/parameters.go | 11 +++ internal/postgres/parameters_test.go | 2 + internal/postgres/reconcile.go | 3 +- internal/postgres/reconcile_test.go | 25 ++--- .../templates/clone-cluster.yaml | 15 +++ .../templates/point-in-time-restore.yaml | 15 ++- 13 files changed, 168 insertions(+), 75 deletions(-) diff --git a/internal/collector/postgres.go b/internal/collector/postgres.go index 892748c0a7..6dc2eddcda 100644 --- a/internal/collector/postgres.go +++ b/internal/collector/postgres.go @@ -19,9 +19,60 @@ import ( "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) +func PostgreSQLParameters(ctx context.Context, + inCluster *v1beta1.PostgresCluster, + outParameters *postgres.Parameters, +) { + version := inCluster.Spec.PostgresVersion + + if OpenTelemetryLogsEnabled(ctx, inCluster) { + var spec *v1beta1.InstrumentationLogsSpec + if inCluster != nil && inCluster.Spec.Instrumentation != nil { + spec = inCluster.Spec.Instrumentation.Logs + } + + // Retain logs for a short time unless specified. + retention := metav1.Duration{Duration: 24 * time.Hour} + if spec != nil && spec.RetentionPeriod != nil { + retention = spec.RetentionPeriod.AsDuration() + } + + // Rotate log files according to retention and name them for the OpenTelemetry Collector. + // + // The ".log" suffix is replaced by ".csv" for CSV log files, and + // the ".log" suffix is replaced by ".json" for JSON log files. + // + // https://www.postgresql.org/docs/current/runtime-config-logging.html + for k, v := range postgres.LogRotation(retention, "postgresql-", ".log") { + outParameters.Mandatory.Add(k, v) + } + + // Enable logging to file. Postgres uses a "logging collector" to safely write concurrent messages. + // NOTE: That collector is designed to not lose messages. When it is overloaded, other Postgres processes block. + // + // https://www.postgresql.org/docs/current/runtime-config-logging.html + outParameters.Mandatory.Add("logging_collector", "on") + + // PostgreSQL v8.3 adds support for CSV logging, and + // PostgreSQL v15 adds support for JSON logging. + // The latter is preferred because newlines are escaped as "\n", U+005C + U+006E. + if version >= 15 { + outParameters.Mandatory.Add("log_destination", "jsonlog") + } else { + outParameters.Mandatory.Add("log_destination", "csvlog") + } + + // Log in a timezone the OpenTelemetry Collector understands. + outParameters.Mandatory.Add("log_timezone", "UTC") + + // TODO(logs): Remove this call and do it in [postgres.NewParameters] regardless of the gate. + outParameters.Mandatory.Add("log_directory", fmt.Sprintf("%s/logs/postgres", postgres.DataStorage(inCluster))) + } +} + func NewConfigForPostgresPod(ctx context.Context, inCluster *v1beta1.PostgresCluster, - outParameters *postgres.ParameterSet, + inParameters *postgres.ParameterSet, ) *Config { config := NewConfig(inCluster.Spec.Instrumentation) @@ -30,7 +81,7 @@ func NewConfigForPostgresPod(ctx context.Context, EnablePatroniMetrics(ctx, inCluster, config) // Logging - EnablePostgresLogging(ctx, inCluster, config, outParameters) + EnablePostgresLogging(ctx, inCluster, inParameters, config) EnablePatroniLogging(ctx, inCluster, config) return config @@ -76,8 +127,8 @@ func postgresCSVNames(version int) string { func EnablePostgresLogging( ctx context.Context, inCluster *v1beta1.PostgresCluster, + inParameters *postgres.ParameterSet, outConfig *Config, - outParameters *postgres.ParameterSet, ) { var spec *v1beta1.InstrumentationLogsSpec if inCluster != nil && inCluster.Spec.Instrumentation != nil { @@ -85,42 +136,9 @@ func EnablePostgresLogging( } if OpenTelemetryLogsEnabled(ctx, inCluster) { - directory := postgres.LogDirectory() + directory := inParameters.Value("log_directory") version := inCluster.Spec.PostgresVersion - // https://www.postgresql.org/docs/current/runtime-config-logging.html - outParameters.Add("logging_collector", "on") - outParameters.Add("log_directory", directory) - - // PostgreSQL v8.3 adds support for CSV logging, and - // PostgreSQL v15 adds support for JSON logging. The latter is preferred - // because newlines are escaped as "\n", U+005C + U+006E. - if version < 15 { - outParameters.Add("log_destination", "csvlog") - } else { - outParameters.Add("log_destination", "jsonlog") - } - - // If retentionPeriod is set in the spec, use that value; otherwise, we want - // to use a reasonably short duration. Defaulting to 1 day. - retentionPeriod := metav1.Duration{Duration: 24 * time.Hour} - if spec != nil && spec.RetentionPeriod != nil { - retentionPeriod = spec.RetentionPeriod.AsDuration() - } - - // Rotate log files according to retention. - // - // The ".log" suffix is replaced by ".csv" for CSV log files, and - // the ".log" suffix is replaced by ".json" for JSON log files. - // - // https://www.postgresql.org/docs/current/runtime-config-logging.html - for k, v := range postgres.LogRotation(retentionPeriod, "postgresql-", ".log") { - outParameters.Add(k, v) - } - - // Log in a timezone that the OpenTelemetry Collector will understand. - outParameters.Add("log_timezone", "UTC") - // Keep track of what log records and files have been processed. // Use a subdirectory of the logs directory to stay within the same failure domain. // TODO(log-rotation): Create this directory during Collector startup. @@ -145,8 +163,8 @@ func EnablePostgresLogging( // The 2nd through 5th fields are optional, so match through to the 7th field. // This should do a decent job of not matching the middle of some SQL statement. // - // The number of fields has changed over the years, but the first few - // are always formatted the same way. + // The number of fields has changed over the years, but the first few are always formatted the same way. + // [PostgreSQLParameters] ensures the timezone is UTC. // // NOTE: This regexp is invoked in multi-line mode. https://go.dev/s/re2syntax "multiline": map[string]string{ diff --git a/internal/collector/postgres_test.go b/internal/collector/postgres_test.go index 89f5f52255..a1a221bd6e 100644 --- a/internal/collector/postgres_test.go +++ b/internal/collector/postgres_test.go @@ -31,9 +31,11 @@ func TestEnablePostgresLogging(t *testing.T) { }`) config := NewConfig(nil) - params := postgres.NewParameterSet() + params := postgres.NewParameters() - EnablePostgresLogging(ctx, cluster, config, params) + // NOTE: This call is necessary only because the default "log_directory" is not absolute. + PostgreSQLParameters(ctx, cluster, ¶ms) + EnablePostgresLogging(ctx, cluster, params.Mandatory, config) result, err := config.ToYAML() assert.NilError(t, err) @@ -293,9 +295,11 @@ service: cluster.Spec.Instrumentation = testInstrumentationSpec() config := NewConfig(cluster.Spec.Instrumentation) - params := postgres.NewParameterSet() + params := postgres.NewParameters() - EnablePostgresLogging(ctx, cluster, config, params) + // NOTE: This call is necessary only because the default "log_directory" is not absolute. + PostgreSQLParameters(ctx, cluster, ¶ms) + EnablePostgresLogging(ctx, cluster, params.Mandatory, config) result, err := config.ToYAML() assert.NilError(t, err) diff --git a/internal/controller/postgrescluster/controller.go b/internal/controller/postgrescluster/controller.go index bbe141c0b4..c3264d0a32 100644 --- a/internal/controller/postgrescluster/controller.go +++ b/internal/controller/postgrescluster/controller.go @@ -339,7 +339,7 @@ func (r *Reconciler) Reconcile( ctx, cluster, clusterConfigMap, clusterReplicationSecret, rootCA, clusterPodService, instanceServiceAccount, instances, patroniLeaderService, primaryCertificate, clusterVolumes, exporterQueriesConfig, exporterWebConfig, - backupsSpecFound, otelConfig, + backupsSpecFound, otelConfig, pgParameters, ) } diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index 97b035c04d..1bdac799df 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -534,6 +534,7 @@ func (r *Reconciler) reconcileInstanceSets( exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap, backupsSpecFound bool, otelConfig *collector.Config, + pgParameters *postgres.ParameterSet, ) error { // Go through the observed instances and check if a primary has been determined. @@ -571,7 +572,7 @@ func (r *Reconciler) reconcileInstanceSets( patroniLeaderService, primaryCertificate, findAvailableInstanceNames(*set, instances, clusterVolumes), numInstancePods, clusterVolumes, exporterQueriesConfig, exporterWebConfig, - backupsSpecFound, otelConfig, + backupsSpecFound, otelConfig, pgParameters, ) if err == nil { @@ -1007,6 +1008,7 @@ func (r *Reconciler) scaleUpInstances( exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap, backupsSpecFound bool, otelConfig *collector.Config, + pgParameters *postgres.ParameterSet, ) ([]*appsv1.StatefulSet, error) { log := logging.FromContext(ctx) @@ -1053,7 +1055,7 @@ func (r *Reconciler) scaleUpInstances( rootCA, clusterPodService, instanceServiceAccount, patroniLeaderService, primaryCertificate, instances[i], numInstancePods, clusterVolumes, exporterQueriesConfig, exporterWebConfig, - backupsSpecFound, otelConfig, + backupsSpecFound, otelConfig, pgParameters, ) } if err == nil { @@ -1085,6 +1087,7 @@ func (r *Reconciler) reconcileInstance( exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap, backupsSpecFound bool, otelConfig *collector.Config, + pgParameters *postgres.ParameterSet, ) error { log := logging.FromContext(ctx).WithValues("instance", instance.Name) ctx = logging.NewContext(ctx, log) @@ -1128,7 +1131,7 @@ func (r *Reconciler) reconcileInstance( postgres.InstancePod( ctx, cluster, spec, primaryCertificate, replicationCertSecretProjection(clusterReplicationSecret), - postgresDataVolume, postgresWALVolume, tablespaceVolumes, + postgresDataVolume, postgresWALVolume, tablespaceVolumes, pgParameters, &instance.Spec.Template) if backupsSpecFound { diff --git a/internal/controller/postgrescluster/postgres.go b/internal/controller/postgrescluster/postgres.go index 4dd4a9d78a..3d254a96a4 100644 --- a/internal/controller/postgrescluster/postgres.go +++ b/internal/controller/postgrescluster/postgres.go @@ -25,6 +25,7 @@ import ( "k8s.io/apimachinery/pkg/util/validation/field" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/logging" @@ -129,6 +130,7 @@ func (*Reconciler) generatePostgresParameters( ctx context.Context, cluster *v1beta1.PostgresCluster, backupsSpecFound bool, ) *postgres.ParameterSet { builtin := postgres.NewParameters() + collector.PostgreSQLParameters(ctx, cluster, &builtin) pgaudit.PostgreSQLParameters(&builtin) pgbackrest.PostgreSQLParameters(cluster, &builtin, backupsSpecFound) pgmonitor.PostgreSQLParameters(ctx, cluster, &builtin) diff --git a/internal/postgres/config.go b/internal/postgres/config.go index e4a62ae798..6c6ddccdb1 100644 --- a/internal/postgres/config.go +++ b/internal/postgres/config.go @@ -8,6 +8,7 @@ import ( "context" "fmt" "math" + "path" "strings" corev1 "k8s.io/api/core/v1" @@ -121,13 +122,13 @@ func ConfigDirectory(cluster *v1beta1.PostgresCluster) string { // DataDirectory returns the absolute path to the "data_directory" of cluster. // - https://www.postgresql.org/docs/current/runtime-config-file-locations.html func DataDirectory(cluster *v1beta1.PostgresCluster) string { - return fmt.Sprintf("%s/pg%d", dataMountPath, cluster.Spec.PostgresVersion) + return fmt.Sprintf("%s/pg%d", DataStorage(cluster), cluster.Spec.PostgresVersion) } -// LogDirectory returns the absolute path to the "log_directory" of cluster. -// - https://www.postgresql.org/docs/current/runtime-config-logging.html -func LogDirectory() string { - return fmt.Sprintf("%s/logs/postgres", dataMountPath) +// DataStorage returns the absolute path to the disk where cluster stores its data. +// Use [DataDirectory] for the exact directory that Postgres uses. +func DataStorage(cluster *v1beta1.PostgresCluster) string { + return dataMountPath } // LogRotation returns parameters that rotate log files while keeping a minimum amount. @@ -354,12 +355,16 @@ done // PostgreSQL. func startupCommand( ctx context.Context, - cluster *v1beta1.PostgresCluster, instance *v1beta1.PostgresInstanceSetSpec, + cluster *v1beta1.PostgresCluster, + instance *v1beta1.PostgresInstanceSetSpec, + parameters *ParameterSet, ) []string { version := fmt.Sprint(cluster.Spec.PostgresVersion) + dataDir := DataDirectory(cluster) + logDir := parameters.Value("log_directory") walDir := WALDirectory(cluster, instance) - mkdirs := make([]string, 0, 7+len(instance.TablespaceVolumes)) + mkdirs := make([]string, 0, 9+len(instance.TablespaceVolumes)) mkdirs = append(mkdirs, `dataDirectory "${postgres_data_directory}" || halt "$(permissions "${postgres_data_directory}" ||:)"`) // If the user requests tablespaces, we want to make sure the directories exist with the correct owner and permissions. @@ -372,11 +377,24 @@ func startupCommand( } } + // Postgres creates "log_directory" but does *not* create any of its parent directories. + // Postgres omits the group-write S_IWGRP permission on the directory. Do both here while being + // careful to *not* touch "data_directory" contents until after `initdb` or Patroni bootstrap. + if path.IsAbs(logDir) && !strings.HasPrefix(logDir, dataDir) { + mkdirs = append(mkdirs, + `(`+shell.MakeDirectories(dataMountPath, logDir)+`) ||`, + `halt "$(permissions `+shell.QuoteWord(logDir)+` ||:)"`, + ) + } else { + mkdirs = append(mkdirs, + `[[ ! -f `+shell.QuoteWord(path.Join(dataDir, "PG_VERSION"))+` ]] ||`, + `(`+shell.MakeDirectories(dataDir, logDir)+`) ||`, + `halt "$(permissions `+shell.QuoteWord(path.Join(dataDir, logDir))+` ||:)"`, + ) + } + // These directories are outside "data_directory" and can be created. mkdirs = append(mkdirs, - `(`+shell.MakeDirectories(dataMountPath, LogDirectory())+`) ||`, - `halt "$(permissions `+shell.QuoteWord(LogDirectory())+` ||:)"`, - `(`+shell.MakeDirectories(dataMountPath, naming.PatroniPGDataLogPath)+`) ||`, `halt "$(permissions `+shell.QuoteWord(naming.PatroniPGDataLogPath)+` ||:)"`, diff --git a/internal/postgres/config_test.go b/internal/postgres/config_test.go index 762bd8a0b9..32490df26b 100644 --- a/internal/postgres/config_test.go +++ b/internal/postgres/config_test.go @@ -40,6 +40,13 @@ func TestDataDirectory(t *testing.T) { assert.Equal(t, DataDirectory(cluster), "/pgdata/pg12") } +func TestDataStorage(t *testing.T) { + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.PostgresVersion = rand.IntN(20) + + assert.Equal(t, DataStorage(cluster), "/pgdata") +} + func TestLogRotation(t *testing.T) { t.Parallel() @@ -543,8 +550,10 @@ func TestStartupCommand(t *testing.T) { cluster.Spec.PostgresVersion = 13 instance := new(v1beta1.PostgresInstanceSetSpec) + parameters := NewParameters().Default + ctx := context.Background() - command := startupCommand(ctx, cluster, instance) + command := startupCommand(ctx, cluster, instance, parameters) // Expect a bash command with an inline script. assert.DeepEqual(t, command[:3], []string{"bash", "-ceu", "--"}) @@ -579,7 +588,7 @@ func TestStartupCommand(t *testing.T) { }, }, } - command := startupCommand(ctx, cluster, instance) + command := startupCommand(ctx, cluster, instance, parameters) assert.Assert(t, len(command) > 3) assert.Assert(t, strings.Contains(command[3], `cat << "EOF" > /tmp/pg_rewind_tde.sh #!/bin/sh diff --git a/internal/postgres/parameters.go b/internal/postgres/parameters.go index 6fb7b0d2f3..3590816b45 100644 --- a/internal/postgres/parameters.go +++ b/internal/postgres/parameters.go @@ -48,6 +48,17 @@ func NewParameters() Parameters { // - https://www.postgresql.org/docs/current/auth-password.html parameters.Default.Add("password_encryption", "scram-sha-256") + // Explicitly use Postgres' default log directory. This value is relative to the "data_directory" parameter. + // + // Controllers look at this parameter to grant group-write S_IWGRP on the directory. + // Postgres does not grant this permission on the directories it creates. + // + // TODO(logs): A better default would be *outside* the data directory. + // This parameter needs to be configurable and documented before the default can change. + // + // PostgreSQL must be reloaded when changing this parameter. + parameters.Default.Add("log_directory", "log") + // Pod "securityContext.fsGroup" ensures processes and filesystems agree on a GID; // use the same permissions for group and owner. // This allows every process in the pod to read Postgres log files. diff --git a/internal/postgres/parameters_test.go b/internal/postgres/parameters_test.go index ad8c6e90c9..54095750ff 100644 --- a/internal/postgres/parameters_test.go +++ b/internal/postgres/parameters_test.go @@ -28,6 +28,8 @@ func TestNewParameters(t *testing.T) { assert.DeepEqual(t, parameters.Default.AsMap(), map[string]string{ "jit": "off", + "log_directory": "log", + "password_encryption": "scram-sha-256", }) } diff --git a/internal/postgres/reconcile.go b/internal/postgres/reconcile.go index 81c6cc31fa..bd191549eb 100644 --- a/internal/postgres/reconcile.go +++ b/internal/postgres/reconcile.go @@ -62,6 +62,7 @@ func InstancePod(ctx context.Context, inClusterCertificates, inClientCertificates *corev1.SecretProjection, inDataVolume, inWALVolume *corev1.PersistentVolumeClaim, inTablespaceVolumes []*corev1.PersistentVolumeClaim, + inParameters *ParameterSet, outInstancePod *corev1.PodTemplateSpec, ) { certVolumeMount := corev1.VolumeMount{ @@ -191,7 +192,7 @@ func InstancePod(ctx context.Context, startup := corev1.Container{ Name: naming.ContainerPostgresStartup, - Command: startupCommand(ctx, inCluster, inInstanceSpec), + Command: startupCommand(ctx, inCluster, inInstanceSpec, inParameters), Env: Environment(inCluster), Image: container.Image, diff --git a/internal/postgres/reconcile_test.go b/internal/postgres/reconcile_test.go index 35a6ed900f..9ec45a96b1 100644 --- a/internal/postgres/reconcile_test.go +++ b/internal/postgres/reconcile_test.go @@ -69,6 +69,8 @@ func TestInstancePod(t *testing.T) { cluster.Spec.ImagePullPolicy = corev1.PullAlways cluster.Spec.PostgresVersion = 11 + parameters := NewParameters().Default + dataVolume := new(corev1.PersistentVolumeClaim) dataVolume.Name = "datavol" @@ -117,7 +119,7 @@ func TestInstancePod(t *testing.T) { // without WAL volume nor WAL volume spec pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, parameters, pod) assert.Assert(t, cmp.MarshalMatches(pod.Spec, ` containers: @@ -270,8 +272,9 @@ initContainers: bootstrap_dir="${postgres_data_directory}_bootstrap" [[ -d "${bootstrap_dir}" ]] && postgres_data_directory="${bootstrap_dir}" && results 'bootstrap directory' "${bootstrap_dir}" dataDirectory "${postgres_data_directory}" || halt "$(permissions "${postgres_data_directory}" ||:)" - (mkdir -p '/pgdata/logs/postgres' && { chmod 0775 '/pgdata/logs/postgres' '/pgdata/logs' || :; }) || - halt "$(permissions '/pgdata/logs/postgres' ||:)" + [[ ! -f '/pgdata/pg11/PG_VERSION' ]] || + (mkdir -p '/pgdata/pg11/log' && { chmod 0775 '/pgdata/pg11/log' || :; }) || + halt "$(permissions '/pgdata/pg11/log' ||:)" (mkdir -p '/pgdata/patroni/log' && { chmod 0775 '/pgdata/patroni/log' '/pgdata/patroni' || :; }) || halt "$(permissions '/pgdata/patroni/log' ||:)" (mkdir -p '/pgdata/pgbackrest/log' && { chmod 0775 '/pgdata/pgbackrest/log' '/pgdata/pgbackrest' || :; }) || @@ -386,7 +389,7 @@ volumes: pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, - serverSecretProjection, clientSecretProjection, dataVolume, walVolume, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, walVolume, nil, parameters, pod) assert.Assert(t, len(pod.Spec.Containers) > 0) assert.Assert(t, len(pod.Spec.InitContainers) > 0) @@ -487,7 +490,7 @@ volumes: pod := new(corev1.PodTemplateSpec) InstancePod(ctx, clusterWithConfig, instance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, parameters, pod) assert.Assert(t, len(pod.Spec.Containers) > 0) assert.Assert(t, len(pod.Spec.InitContainers) > 0) @@ -524,7 +527,7 @@ volumes: t.Run("SidecarNotEnabled", func(t *testing.T) { InstancePod(ctx, cluster, sidecarInstance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, parameters, pod) assert.Equal(t, len(pod.Spec.Containers), 2, "expected 2 containers in Pod") }) @@ -537,7 +540,7 @@ volumes: ctx := feature.NewContext(ctx, gate) InstancePod(ctx, cluster, sidecarInstance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, parameters, pod) assert.Equal(t, len(pod.Spec.Containers), 3, "expected 3 containers in Pod") @@ -574,7 +577,7 @@ volumes: tablespaceVolumes := []*corev1.PersistentVolumeClaim{tablespaceVolume1, tablespaceVolume2} InstancePod(ctx, cluster, instance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, tablespaceVolumes, pod) + serverSecretProjection, clientSecretProjection, dataVolume, nil, tablespaceVolumes, parameters, pod) assert.Assert(t, cmp.MarshalMatches(pod.Spec.Containers[0].VolumeMounts, ` - mountPath: /pgconf/tls @@ -612,7 +615,7 @@ volumes: pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, - serverSecretProjection, clientSecretProjection, dataVolume, walVolume, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, walVolume, nil, parameters, pod) assert.Assert(t, len(pod.Spec.Containers) > 0) assert.Assert(t, len(pod.Spec.InitContainers) > 0) @@ -714,7 +717,7 @@ volumes: pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, parameters, pod) assert.Assert(t, len(pod.Spec.Containers) > 0) assert.Assert(t, cmp.MarshalContains(pod.Spec.Containers[0].VolumeMounts, ` @@ -744,7 +747,7 @@ volumes: annotated.Labels = map[string]string{"gg": "asdf"} InstancePod(ctx, cluster, instance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, annotated) + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, parameters, annotated) assert.Assert(t, cmp.MarshalContains(annotated.Spec.Volumes, ` - ephemeral: diff --git a/testing/chainsaw/e2e/pgbackrest-restore/templates/clone-cluster.yaml b/testing/chainsaw/e2e/pgbackrest-restore/templates/clone-cluster.yaml index 5360ef23fa..a768fa2abd 100644 --- a/testing/chainsaw/e2e/pgbackrest-restore/templates/clone-cluster.yaml +++ b/testing/chainsaw/e2e/pgbackrest-restore/templates/clone-cluster.yaml @@ -49,3 +49,18 @@ spec: replicas: 1 readyReplicas: 1 updatedReplicas: 1 + + catch: + - description: Read all log lines from job pods + podLogs: + selector: > + batch.kubernetes.io/job-name, + postgres-operator.crunchydata.com/cluster in (clone-one) + tail: -1 + + - description: Read all log lines from postgres pods + podLogs: + selector: > + postgres-operator.crunchydata.com/instance, + postgres-operator.crunchydata.com/cluster in (clone-one) + tail: -1 diff --git a/testing/chainsaw/e2e/pgbackrest-restore/templates/point-in-time-restore.yaml b/testing/chainsaw/e2e/pgbackrest-restore/templates/point-in-time-restore.yaml index 714227ab48..3b842f5ebc 100644 --- a/testing/chainsaw/e2e/pgbackrest-restore/templates/point-in-time-restore.yaml +++ b/testing/chainsaw/e2e/pgbackrest-restore/templates/point-in-time-restore.yaml @@ -62,9 +62,16 @@ spec: finished: true catch: - - - description: > - Read all log lines from the restore job pods + - description: Read all log lines from job pods + podLogs: + selector: > + batch.kubernetes.io/job-name, + postgres-operator.crunchydata.com/cluster in (original) + tail: -1 + + - description: Read all log lines from postgres pods podLogs: - selector: postgres-operator.crunchydata.com/pgbackrest-restore + selector: > + postgres-operator.crunchydata.com/instance, + postgres-operator.crunchydata.com/cluster in (original) tail: -1 From 3ac69fed66604aa1b8c37079b8d342e1d73b7134 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Fri, 22 Aug 2025 15:07:51 -0500 Subject: [PATCH 4/4] FIXUP: more comments around log dir creation --- internal/postgres/config.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/internal/postgres/config.go b/internal/postgres/config.go index 6c6ddccdb1..e7e7c163de 100644 --- a/internal/postgres/config.go +++ b/internal/postgres/config.go @@ -378,14 +378,17 @@ func startupCommand( } // Postgres creates "log_directory" but does *not* create any of its parent directories. - // Postgres omits the group-write S_IWGRP permission on the directory. Do both here while being - // careful to *not* touch "data_directory" contents until after `initdb` or Patroni bootstrap. + // Postgres omits group-write S_IWGRP permission when creating the directory. + // + // Do both here while being careful to *not* touch "data_directory" contents until after + // `initdb` or Patroni bootstrap; those abort unless "data_directory" is entirely empty. if path.IsAbs(logDir) && !strings.HasPrefix(logDir, dataDir) { mkdirs = append(mkdirs, `(`+shell.MakeDirectories(dataMountPath, logDir)+`) ||`, `halt "$(permissions `+shell.QuoteWord(logDir)+` ||:)"`, ) } else { + // Postgres interprets "log_directory" relative to "data_directory" so do the same here. mkdirs = append(mkdirs, `[[ ! -f `+shell.QuoteWord(path.Join(dataDir, "PG_VERSION"))+` ]] ||`, `(`+shell.MakeDirectories(dataDir, logDir)+`) ||`,