diff --git a/.github/workflows/publish_ghcr_image.yaml b/.github/workflows/publish_ghcr_image.yaml deleted file mode 100644 index bbfd7e32..00000000 --- a/.github/workflows/publish_ghcr_image.yaml +++ /dev/null @@ -1,56 +0,0 @@ -name: Publish multiarch postgres-operator image on ghcr.io - -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -on: - push: - tags: - - '*' -jobs: - publish: - name: Build, test and push image - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - uses: actions/setup-go@v2 - with: - go-version: "^1.23.4" - - - name: Run unit tests - run: make deps mocks test - - - name: Define image name - id: image - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${GITHUB_REF/refs\/tags\//}" - echo "NAME=$IMAGE" >> $GITHUB_OUTPUT - - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Login to GHCR - uses: docker/login-action@v2 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push multiarch image to ghcr - uses: docker/build-push-action@v3 - with: - context: . - file: docker/Dockerfile - push: true - build-args: BASE_IMAGE=alpine:3.15 - tags: "${{ steps.image.outputs.NAME }}" - platforms: linux/amd64,linux/arm64 diff --git a/.github/workflows/run_e2e.yaml b/.github/workflows/run_e2e.yaml index 61fba3ff..3447ca48 100644 --- a/.github/workflows/run_e2e.yaml +++ b/.github/workflows/run_e2e.yaml @@ -14,7 +14,7 @@ jobs: - uses: actions/checkout@v1 - uses: actions/setup-go@v2 with: - go-version: "^1.23.4" + go-version: "^1.25.2" - name: Make dependencies run: make deps mocks - name: Code generation diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index 9775f362..60bb46e3 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -14,7 +14,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-go@v2 with: - go-version: "^1.23.4" + go-version: "^1.25.2" - name: Make dependencies run: make deps mocks - name: Compile diff --git a/Makefile b/Makefile index 44de20fe..200c12df 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ VERSION ?= $(shell git describe --tags --always --dirty) DIRS := cmd pkg PKG := `go list ./... | grep -v /vendor/` -BASE_IMAGE ?= rockylinux:9 +BASE_IMAGE ?= rockylinux/rockylinux:9 # BASE_IMAGE ?= rockylinux/rockylinux:10 PACKAGER ?= dnf BUILD ?= 1 @@ -89,7 +89,7 @@ docker-local: build/cybertec-pg-operator indocker-race: - docker run --rm -v "${GOPATH}":"${GOPATH}" -e GOPATH="${GOPATH}" -e RACE=1 -w ${PWD} golang:1.23.4 bash -c "make linux" + docker run --rm -v "${GOPATH}":"${GOPATH}" -e GOPATH="${GOPATH}" -e RACE=1 -w ${PWD} golang:1.25.2 bash -c "make linux" push: docker push "$(IMAGE):$(TAG)$(CDP_TAG)" diff --git a/README.md b/README.md index bed41381..09c39b61 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ This architecture forms the basis for a modern, highly available and scalable Po ## PostgreSQL features -- PostgreSQL 13 to 17 +- PostgreSQL 13 to 18 - Streaming replication via **Patroni** - Fully integrated backup & PITR with `pgBackRest` or `pg_basebackup` - Extensions like: @@ -73,7 +73,8 @@ This architecture forms the basis for a modern, highly available and scalable Po | Release | PostgreSQL | pgBackRest | Patroni | Kubernetes | Go | |---------|------------|------------|---------|------------|---------| | 0.8.0 | 13 - 17 | 2.53 | 4.0.2 | 1.21+ | 1.21.7 | -| 0.8.3 | 13 - 17 | 2.54-2 | 4.0.5 | 1.21+ | 1.22.12 | +| 0.8.3 | 13 - 17 | 2.54+ | 4.0.5 | 1.21+ | 1.22.12 | +| 0.9.0 | 13 - 18 | 2.56+ | 4.1.0 | 1.21+ | 1.25.2 | --- diff --git a/charts/postgres-cluster/values.yaml b/charts/postgres-cluster/values.yaml index 7a01fbb3..82aeff14 100644 --- a/charts/postgres-cluster/values.yaml +++ b/charts/postgres-cluster/values.yaml @@ -12,7 +12,7 @@ # my-annotation: value cluster: - dockerImage: docker.io/cybertecpostgresql/cybertec-pg-container:postgres-17.6-1 + dockerImage: docker.io/cybertecpostgresql/cybertec-pg-container:postgres-18.0-1 numberOfInstances: 2 postgresql: diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index bd84fc25..0f067177 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -170,7 +170,7 @@ spec: default: "13" target_major_version: type: string - default: "17" + default: "18" kubernetes: type: object properties: @@ -216,7 +216,10 @@ spec: default: true enable_readiness_probe: type: boolean - default: false + default: true + enable_liveness_probe: + type: boolean + default: false enable_sidecars: type: boolean default: true diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index a4c41ba5..e76fe341 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -91,7 +91,7 @@ configMajorVersionUpgrade: # minimal Postgres major version that will not automatically be upgraded minimal_major_version: "13" # target Postgres major version when upgrading clusters automatically - target_major_version: "17" + target_major_version: "18" configKubernetes: # list of additional capabilities for postgres container @@ -130,7 +130,7 @@ configKubernetes: # toggles PDB to set to MinAvailabe 0 or 1 enable_pod_disruption_budget: true # toogles readiness probe for database pods - enable_readiness_probe: false + enable_readiness_probe: true # enables sidecar containers to run alongside Spilo in the same pod enable_sidecars: true diff --git a/docker/build_operator.sh b/docker/build_operator.sh index 0aaba044..4d75d0c5 100644 --- a/docker/build_operator.sh +++ b/docker/build_operator.sh @@ -13,7 +13,7 @@ set -ex ( cd /tmp - wget -q "https://storage.googleapis.com/golang/go1.24.6.linux-${arch}.tar.gz" -O go.tar.gz + wget -q "https://go.dev/dl/go1.25.2.linux-${arch}.tar.gz" -O go.tar.gz tar -xf go.tar.gz mv go /usr/local ln -s /usr/local/go/bin/go /usr/bin/go diff --git a/docs/hugo/content/en/_index.md b/docs/hugo/content/en/_index.md index 9e88cf57..516df012 100644 --- a/docs/hugo/content/en/_index.md +++ b/docs/hugo/content/en/_index.md @@ -4,7 +4,7 @@ date: 2024-03-11T14:26:51+01:00 draft: false weight: 1 --- -Current Release: 0.8.3 (04.04.2025) [Release Notes](release_notes) +Current Release: 0.9.0 (31.10.2025) [Release Notes](release_notes) drawing @@ -21,8 +21,8 @@ The following features characterise our operator: - Reduction of downtime thanks to redundancy, pod anti-affinity, auto-failover and self-healing CPO is tested on the following platforms: -- Kubernetes: 1.21 - 1.28 -- Openshift: 4.8 - 4.13 +- Kubernetes: 1.21 - 1.32 +- Openshift: 4.8 - 4.19 - Rancher - AWS EKS - Azure AKS diff --git a/docs/hugo/content/en/crd/crd-operator-configurator.md b/docs/hugo/content/en/crd/crd-operator-configurator.md index 21d6cca5..f332fa0b 100644 --- a/docs/hugo/content/en/crd/crd-operator-configurator.md +++ b/docs/hugo/content/en/crd/crd-operator-configurator.md @@ -5,85 +5,245 @@ draft: false weight: 332 --- -| Name | Type | default | Description | -| -------------------------------- |:-------:| --------:| ------------------:| -| enable_crd_registration | boolean | true | | -| crd_categories | string | all | | -| enable_lazy_spilo_upgrade | boolean | false | | -| enable_pgversion_env_var | boolean | true | | -| enable_spilo_wal_path_combat | boolean | false | | -| etcd_host | string | | | -| kubernetes_use_configmaps | boolean | false | | -| docker_image | string | | | -| sidecars | list | | | -| enable_shm_volume | boolean | true | | -| workers | int | 8 | | -| max_instances | int | -1 | | -| min_instances | int | -1 | | -| resync_period | string | 30m | | -| repair_period | string | 5m | | -| set_memory_request_to_limit | boolean | false | | -| debug_logging | boolean | true | | -| enable_db_access | boolean | true | | -| spilo_privileged | boolean | false | | -| spilo_allow_privilege_escalation | boolean | true | | -| watched_namespace | string | * | | - -#### major-upgrade-specific - -| Name | Type | default | Description | -| ------------------------------------- |:-------:| --------:| ------------------:| -| major_version_upgrade_mode | string | off | | -| major_version_upgrade_team_allow_list | string | | | -| minimal_major_version | string | 9.6 | | -| target_major_version | string | 14 | | - -#### aws-specific - -| Name | Type | default | Description | -| ------------------------------------- |:-------:| --------:| ------------------:| -| wal_s3_bucket | string | | | -| log_s3_bucket | string | | | -| kube_iam_role | string | | | -| aws_region | string | | | -| additional_secret_mount | string | | | -| additional_secret_mount_path | string | | | -| enable_ebs_gp3_migration | boolean | | | -| enable_ebs_gp3_migration_max_size | int | | | - -#### logical-backup-specific - -| Name | Type | default | Description | -| ------------------------------------- |:-------:| --------:| ------------------:| -| logical_backup_docker_image | string | | | -| logical_backup_google_application_credentials | string | | | -| logical_backup_job_prefix | string | | | -| logical_backup_provider | string | | | -| logical_backup_s3_access_key_id | string | | | -| logical_backup_s3_bucket | string | | | -| logical_backup_s3_endpoint | string | | | -| logical_backup_s3_region | string | | | -| logical_backup_s3_secret_access_key | string | | | -| logical_backup_s3_sse | string | | | -| logical_backup_s3_retention_time | string | | | -| logical_backup_schedule | string | | (Cron-Syntax) | - -#### team-api-specific - -| Name | Type | default | Description | -| ------------------------------------- |:-------:| --------:| ------------------:| -| enable_teams_api | string | | | -| teams_api_url | string | | | -| teams_api_role_configuration | string | | | -| enable_team_superuser | boolean | | | -| team_admin_role | boolean | | | -| enable_admin_role_for_users | boolean | | | -| pam_role_name | string | | | -| pam_configuration | string | | | -| protected_role_names | list | | | -| postgres_superuser_teams | string | | | -| role_deletion_suffix | string | | | -| enable_team_member_deprecation | boolean | | | -| enable_postgres_team_crd | boolean | | | -| enable_postgres_team_crd_superusers | boolean | | | -| enable_team_id_clustername_prefix | boolean | | | \ No newline at end of file +#### CRD for kind OperatorConfiguration + +| Name | Type | required | Description | +| ----------- |:-------------------:| ---------:| ------------------:| +| apiVersion | string | true | cpo.opensource.cybertec.at/v1 | +| kind | string | true | OperatorConfiguration | +| metadata | object | true | | +| [configuration](#configuration) | object | true | | + +--- + +#### configuration + +| Name | Type | default | Description | +| ------------------------------------------------- |:-------------:| ---------:| ------------------:| +| [kubernetes](#kubernetes) | object | | | +| [users](#users) | object | | | +| [connection_pooler](#connection_pooler) | object | | | +| [logging_rest_api](#logging_rest_api) | object | | | +| [load_balancer](#load_balancer) | object | | | +| [major_version_upgrade](#major_version_upgrade) | object | | | +| [teams_api](#teams_api) | object | | | +| [timeouts](#timeouts) | object | | | +| [debug](#debug) | object | | | +| [logical_backup](#logical_backup) | object | | | +| [aws_or_gcp](#aws_or_gcp) | object | | | +| [sidecars](#sidecars) | list | | Each item is of type [Container](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#container-v1-core) | +| docker_image | string | | | +| enable_crd_registration | boolean | `true` | True, Operator updates the crd itself | +| enable_crd_validation | boolean | `true ` | deprecated | +| enable_lazy_spilo_upgrade | boolean | `false ` | If true, update statefulset with new images without rolling update. | +| enable_pgversion_env_var | boolean | `true ` | Set PGVersion via ENV-Label. Changes can create issues | +| enable_shm_volume | boolean | `true` | True adds tmpfs-Volume to remove shm memory-limitations | +| enable_spilo_wal_path_compat | boolean | `false` | | +| enable_team_id_clustername_prefix | boolean | false | | +| etcd_host | string | | Only required if the Kubernetes-native approach is not used. | +| kubernetes_use_configmaps | boolean | true | Recommended! Uses configmaps for Patroni instead of entrypoints. | +| max_instances | int | -1 | Maximum number of Postgres pods per cluster. | +| min_instances | int | -1 | Minimal number of Postgres pods per cluster. | +| postgres_pod_resources | string | true | | +| repair_period | string | 5m | Period between subsequent repair requests | +| resync_period | string | 30m | Period between subsequent resync requests | +| set_memory_request_to_limit | boolean | false | | +| workers | int | 8 | Number of workers in the operator that simultaneously process tasks such as create/update/delete clusters | + +{{< back >}} + +--- + +#### kubernetes + +| Name | Type | default | Description | +| --------------------------------------------- |:-------------:| -----------:| ------------------:| +| cluster_labels | map | | a map of key-value pairs adding labels | +| cluster_domain | string | `cluster.local` | DNS domain used inside the K8s-Cluster. Used by the operator to communicate with clusters | +| cluster_name_label | string | `cluster.cpo.opensource.cybertec.at/name` | Label to identify all resources of a cluster | +| container_readonly_root_filesystem | boolean | `false` | Enables ReadOnlyRootFilesystem in the SecurityContext of the pods | +| enable_cross_namespace_secret | boolean | `false` | Enables the storage of secrets in another namespace, provided that it is activated. The namespace is defined in the cluster manifest. | +| enable_init_containers | boolean | `true` | Allows the definition of init containers in the cluster manifest | +| enable_pod_antiaffinity | boolean | `true` | The pod anti-affinity rules are applied when activated. | +| enable_pod_disruption_budget | boolean | `true` | Pod Disruption Budgets (PDB) are generated for clusters when activated. | +| enable_readiness_probe | boolean | `true` | Operator adds readiness probe for resources when enabled | +| enable_liveness _probe | boolean | `false` | Operator adds liveness probe for resources when enabled | +| enable_sidecars | boolean | `true` | Allows the definition of sidecars in the cluster manifest | +| inherited_labels | list | | Labels added to each resource | +| master_pod_move_timeout | string | `20m` | Timeout for waiting for a primary pod to switch to another Kubernetes node. | +| oauth_token_secret_name | string | `postgresql-operator` | | +| pdb_name_format | string | `postgres-{cluster}-pdb` | Naming scheme for generated pod disruption budgets (PDB) | +| pod_management_policy | string | `ordered_ready` | Pod-Management-Strategy for the [statefulset](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) | +| pod_antiaffinity_topology_key | string | `kubernetes.io/hostname` | Defines the anti-affinity [topology Key](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/) | +| pod_antiaffinity_preferred_during_scheduling | boolean | `false` | | +| pod_role_label | string | `member.cpo.opensource.cybertec.at/role` | Defines the label for the pod-role| +| pod_service_account_definition | string | `''` | | +| pod_service_account_name | string | `cpo-pod` | ServiceAccount used for all cluster-pods | +| pod_service_account_role_binding_definition | string | `''` | | +| pod_terminate_grace_period | string | `5m` | | +| secret_name_template | string | `{username}.{cluster}.credentials.{tprkind}.{tprgroup}` | | +| share_pgsocket_with_sidecars | boolean | `false` | | +| spilo_allow_privilege_escalation | boolean | `false` | Defines privilege-escalation attribut in SecurityContext | +| spilo_privileged | boolean | `false` | Defines privileged attribut in SecurityContext | +| storage_resize_mode | string | `pvc` | | +| watched_namespace | string | `*` | Operator watches for Objects in the defined Namespace. `*` means all, `` means only operator-namespace, `NAMESPACE_NAME` means specific namespace | + + +{{< back >}} + +--- + +#### users + +| Name | Type | default | Description | +| --------------------------------------------- |:-------------:| ---------:| ------------------:| +| enable_password_rotation | boolean | `false` | password rotation by the Operator for all Login Roles excluding DB_Owner | +| password_rotation_interval | int | `90` | Interval in days | +| password_rotation_user_retention | int | `180` | To avoid a constantly growing number of new users due to password rotation, the operator deletes the created users after a certain number of days. The number can be configured with this parameter. However, the operator checks whether the retention policy is at least twice as long as the rotation interval and updates it to this minimum if this is not the case. | +| replication_username | string | `cpo_replication` | Name for the replication-user| +| super_username | string | `postgres` | Name for the Superuser. Changes can create issues | + +{{< back >}} + +--- + +#### connection_pooler + +| Name | Type | default | Description | +| --------------------------------------------- |:-------------:| -------------:| ----------------------:| +| connection_pooler_default_cpu_request | int | `500m` | CPU-Request for Pod | +| connection_pooler_default_cpu_limit | string | `1` | CPU-Limit for Pod | +| connection_pooler_default_memory_request | string | `100Mi` | Memory-Request for Pod | +| connection_pooler_default_memory_limit | string | `100Mi` | Memory-Limit for Pod | +| connection_pooler_image | string | | Container-Image | +| connection_pooler_max_db_connections | int | `60` | Max Connections between DB and Pooler. Divided by the `connection_pooler_number_of_instances` | +| connection_pooler_mode | string | `transaction` | Pooler mode | +| connection_pooler_number_of_instances | int | `2` | Number of Instances | +| connection_pooler_schema | string | `pooler` | Schema to create needed Objects like lookup function | +| connection_pooler_user | int | `pooler` | Database-User for pooler | + +{{< back >}} + +--- + +#### logging_rest_api + +| Name | Type | default | Description | +| --------------------------------------------- |:-------------:| ---------:| ------------------:| +| api_port | int | `8080` | REST-API port | +| cluster_history_entries | int | `1000` | Number of lines used to store cluster logs. | +| ring_log_lines | int | `100` | number of entries | + +{{< back >}} + +--- + +#### load_balancer + +| Name | Type | default | Description | +| --------------------------------------------- |:-------------:| ----------------:| ------------------:| +| db_hosted_zone | string | `db.example.com` | DNS-Definition for the Cluster DNS | +| enable_master_load_balancer | boolean | `false` | Creates loadbalancer service for the primary pod, if enabled | +| enable_master_pooler_load_balancer | boolean | `false` | Creates loadbalancer service for the primary pooler, if enabled | +| enable_replica_load_balancer | boolean | `false` | Creates loadbalancer service for the replica pods, if enabled | +| enable_replica_pooler_load_balancer | boolean | `false` | Creates loadbalancer service for the replica pooler, if enabled | +| external_traffic_policy | string | `Cluster` | Defines traffic policy for loadbalancers. Possible Values: `Cluster`, `local`| +| master_dns_name_format | string | `{cluster}.{namespace}.{hostedzone}` | DNS-Format for the primary loadbalancer | +| replica_dns_name_format | string | `{cluster}-repl.{namespace}.{hostedzone}` | DNS-Format for the replica loadbalancer | +| master_legacy_dns_name_format | string | `{cluster}.{team}.{hostedzone}` | deprecated | +| replica_legacy_dns_name_format | string | `{cluster}-repl.{team}.{hostedzone}` | deprecated | + +{{< back >}} + +--- + +#### major_version_upgrade + +| Name | Type | default | Description | +| --------------------------------------------- |:-------------:| ---------:| ------------------:| +| major_version_upgrade_mode | string | `manual` | Mode for Major-Upgrades. `manual` Upgrade triggert bei `PGVERSION`-defintion in Cluster-Manifest, `full` Upgrade triggert by the operator based on `target_major_version`, `off` The operator never triggers an upgrade. | +| minimal_major_version | string | `13` | The minimum Postgres major version that will not be automatically `updated when major_version_upgrade_mode = full` | +| target_major_version | string | `18` | Target Postgres Major if the upgrade is triggered automatically via + `updated when major_version_upgrade_mode = full` | + +{{< back >}} + +--- + +#### teams_api + +| Name | Type | default | Description | +| --------------------------------------------- |:-------------:| ----------:| ------------------:| +| enable_team_superuser | boolean | `false` | | +| teams_api_url | string | `https://teams.example.com/api/` | | +| team_admin_role | string | `admin` | | +| enable_postgres_team_crd_superusers | boolean | `false` | | +| protected_role_names | list | | | +| pam_role_name | string | `cpo_pam` | | +| pam_configuration | string | `https://info.example.com/oauth2/tokeninfo?access_token= uid realm=/employees` | | +| team_api_role_configuration | map | | a map of key-value pairs adding labels | +| enable_teams_api | boolean | `false` | | +| enable_team_member_deprecation | boolean | `false` | | +| enable_admin_role_for_users | boolean | `false` | | +| role_deletion_suffix | string | `_deleted` | | +| enable_postgres_team_crd | boolean | `false` | | + +{{< back >}} + +--- + +#### timeouts + +| Name | Type | default | Description | +| --------------------------------------------- |:-------------:| ---------:| ------------------:| +| patroni_api_check_interval | string | `1s` | | +| patroni_api_check_timeout | string | `5s` | | +| pod_deletion_wait_timeout | string | `10m` | | +| pod_label_wait_timeout | string | `10m` | | +| ready_wait_interval | string | `4s` | | +| ready_wait_timeout | string | `30s` | | +| resource_check_interval | string | `3s` | | +| resource_check_timeout | string | `10m` | | + +{{< back >}} + +--- + +#### debug + +| Name | Type | default | Description | +| --------------------------------------------- |:--------------:| ---------:| ------------------:| +| debug_logging | boolean | `true` | Enable Debug-Logs | +| enable_database_access | boolean | `true` | Allows the Operator to connect to the database (to create users and for other actions) | + +{{< back >}} + +--- + +#### logical_backup (deprecated) + +| Name | Type | default | Description | +| --------------------------------------------- |:-------------:| -------------:| ------------------:| +| logical_backup_docker_image | string | | deprecated | +| logical_backup_job_prefix | string | `logical-backup-` | deprecated | +| logical_backup_provider | string | `s3` | deprecated | +| logical_backup_s3_bucket | string | `my-bucket-url` | deprecated | +| logical_backup_s3_sse | string | `AES256` | deprecated | +| logical_backup_schedule | string | `30 00 * * *` | deprecated | + +{{< back >}} + +--- + +#### aws_or_gcp + +| Name | Type | default | Description | +| --------------------------------------------- |:-------------:| --------------:| ------------------:| +| additional_secret_mount_path | string | `/meta/credentials` | | +| aws_region | string | `eu-central-1` | | +| enable_ebs_gp3_migration | boolean | `false` | | +| enable_ebs_gp3_migration_max_size | int | `1000` | | + +{{< back >}} + +--- \ No newline at end of file diff --git a/docs/hugo/content/en/crd/crd-postgresql.md b/docs/hugo/content/en/crd/crd-postgresql.md index b972b7f4..062f20b6 100644 --- a/docs/hugo/content/en/crd/crd-postgresql.md +++ b/docs/hugo/content/en/crd/crd-postgresql.md @@ -8,7 +8,7 @@ weight: 331 | Name | Type | required | Description | | ----------- |:--------------:| ---------:| ------------------:| -| apiVersion | string | true | acid.zalando.do/v1 | +| apiVersion | string | true | cpo.opensource.cybertec.at/v1 | | kind | string | true | | | metadata | object | true | | | [spec](#spec) | object | true | | diff --git a/docs/hugo/content/en/first_cluster/_index.md b/docs/hugo/content/en/first_cluster/_index.md index 0caa9e04..742b32ae 100644 --- a/docs/hugo/content/en/first_cluster/_index.md +++ b/docs/hugo/content/en/first_cluster/_index.md @@ -16,10 +16,10 @@ kind: postgresql metadata: name: cluster-1 spec: - dockerImage: "docker.io/cybertecpostgresql/cybertec-pg-container:postgres-17.6-1" + dockerImage: "docker.io/cybertecpostgresql/cybertec-pg-container:postgres-18.0-1" numberOfInstances: 1 postgresql: - version: "17" + version: "18" resources: limits: cpu: 500m @@ -30,7 +30,7 @@ spec: volume: size: 5Gi ``` -Based on this Manifest the Operator will deploy a single-Node-Cluster based on the defined dockerImage and start the included Postgres-17-Server. +Based on this Manifest the Operator will deploy a single-Node-Cluster based on the defined dockerImage and start the included Postgres-18-Server. Also created is a volume based on your default-storage Class. The Ressource-Definiton means, that we reserve a half cpu and a half GB Memory for this Cluster with the same Definition as limit. After some seconds we should see, that the operator creates our cluster based on the declared definitions. diff --git a/docs/hugo/content/en/ha_cluster/_index.md b/docs/hugo/content/en/ha_cluster/_index.md index f1192977..8c1ad5e5 100644 --- a/docs/hugo/content/en/ha_cluster/_index.md +++ b/docs/hugo/content/en/ha_cluster/_index.md @@ -30,10 +30,10 @@ kind: postgresql metadata: name: cluster-1 spec: - dockerImage: "docker.io/cybertecpostgresql/cybertec-pg-container:postgres-17.6-1" + dockerImage: "docker.io/cybertecpostgresql/cybertec-pg-container:postgres-18.0-1" numberOfInstances: 2 postgresql: - version: "17" + version: "18" resources: limits: cpu: 500m diff --git a/docs/hugo/content/en/installation/configuration_operator.md b/docs/hugo/content/en/installation/configuration_operator.md index 189aea39..7ed3a73e 100644 --- a/docs/hugo/content/en/installation/configuration_operator.md +++ b/docs/hugo/content/en/installation/configuration_operator.md @@ -5,80 +5,148 @@ draft: false weight: 503 --- -Users who are already used to working with PostgreSQL from Baremetal or VMs are already familiar with the need for various files to configure PostgreSQL. These include -- postgresql.conf -- pg_hba.conf -- ... +# Configuring the PostgreSQL Operator -Although these files are available in the container, direct modification is not planned. As part of the declarative mode of operation of the operator, these files are defined via the operator. The modifying intervention within the container also represents a contradiction to the immutability of the container. +The PostgreSQL Operator is configured based on the custom resource type **`OperatorConfiguration`**. +This resource allows you to control the behaviour of the operator in detail and adapt it to individual requirements. -For these reasons, the operator provides a way to make adjustments to the various files, from PostgreSQL to Patroni. +The supplied **Helm chart** already contains a **default configuration** that is suitable for most use cases. +These default values cover typical operating requirements and enable a quick start without additional adjustments. -We differentiate between two main objects in the cluster manifest: -- [`postgresql`](documentation/how-to-use/configuration/#postgresql) with the child objects `version` and `parameters` -- [`patroni`](documentation/how-to-use/configuration/#patroni) with objects for the `pg_hab`, `slots` and much more - -## postgresql +The assignment to OperatorConfiguration is based on the ENV section in the operator deployment: +```yaml + containers: + - name: postgres-operator + env: + - name: POSTGRES_OPERATOR_CONFIGURATION_OBJECT + value: postgresql-operator-configuration +``` -The `postgresql `object consists of the following elements: -- `version` - allows you to select the major version of PostgreSQL used. -- `parameters`- enables the postgresql.conf to be changed +In addition, the `OperatorConfiguration` offers a wide range of options for specifically influencing the behaviour of the operator. +Among other things, the following aspects can be configured: +## Behaviour for major upgrades +The operator allows you to configure the behaviour during major upgrades using the following fields: +```yaml + major_version_upgrade_mode: manual + minimal_major_version: '13' + target_major_version: '18' ``` -spec: - postgresql: - parameters: - shared_preload_libraries: 'pg_stat_statements,pgnodemx, timescaledb' - shared_buffers: '512MB' - version: '16' -``` +### Explanation of parameters -Any known PostgreSQL parameter from postgresql.conf can be entered here and will be delivered by the operator to all nodes of the cluster accordingly. +- **major_version_upgrade_mode** +Controls how major upgrades are performed: -You can find more information about the parameters in the [PostgreSQL documentation](https://www.postgresql.org/docs/) + - `manual`: The upgrade is triggered **manually** via the cluster manifest. + - `off`: Upgrades via Operator are disabled. + - `full`: The operator compares the version in the manifest with the configured `minimal_major_version`. If the version is lower, the operator starts an **automatic upgrade** to the configured `target_major_version`. -## patroni +- **minimal_major_version** +Specifies The minimum Postgres major version that will not be automatically upgraded (only relevant in `full` mode). -The patroni object contains numerous options for customising the patroni-setu, and the pg_hba.conf is also configured here. A complete list of all available elements can be found here. +- **target_major_version** +The version to which the automatic upgrade should update (only relevant in `full` mode). + -The most important elements include -- `pg_hba` - pg_hba.conf -- `slots` -- `synchronous_mode` - enables synchronous mode in the cluster. The default is set to `false` -- `maximum_lag_on_failover` - Specifies the maximum lag so that the pod is still considered healthy in the event of a failover. -- `failsafe_mode` Allows you to cancel the downgrading of the leader if all cluster members can be reached via the Patroni Rest Api. -You can find more information on this in the [Patroni documentation](https://patroni-readthedocs-io.translate.goog/en/master/dcs_failsafe_mode.html?_x_tr_sl=auto&_x_tr_tl=de&_x_tr_hl=de&_x_tr_pto=wapp) +## Readiness and liveness probes +The operator allows health checks to be configured using the following fields: -### pg_hba +```yaml +enable_readiness_probe: true +enable_liveness_probe: false +``` +### Explanation of parameters -The pg_hba.conf contains all defined authentication rules for PostgreSQL. +- **enable_readiness_probe** +Specifies whether the readiness probe definition should be added to the container. -When customising this configuration, it is important that the entire version of pg_hba is written to the manifest. -The current configuration can be read out in the database using table pg_hba_file_rules ;. +- **enable_liveness_probe** +Specifies whether the liveness probe definition should be added to the container. -Further information can be found in the [PostgreSQL documentation](https://www.postgresql.org/docs/current/auth-pg-hba-conf.html) +## SecurityContext settings +The operator allows the configuration of the **SecurityContext** for the PostgreSQL containers via the following fields: +```yaml + spilo_privileged: false + spilo_allow_privilege_escalation: false + container_readonly_root_filesystem: true +``` +### Explanation of parameters + +- **spilo_privileged** +Specifies whether the container should run in **privileged mode**. + - `true`: Privileged mode enabled + - `false`: Privileged mode disabled (recommended for production environments) + +- **container_readonly_root_filesystem** +Enables a **read-only root filesystem** to increase security. + - `true`: Root filesystem is read-only + - `false`: Write access to root filesystem allowed + +- **spilo_allow_privilege_escalation** +Specifies whether the container is allowed to **escalate privileges**. + - `true`: Privilege escalation allowed + - `false`: Privilege escalation disabled (security-friendly) + +## Connection pooler configuration +The operator enables detailed configuration of the **connection pooler** via the following fields: + +```yaml + connection_pooler: + connection_pooler_user: pooler + connection_pooler_default_memory_request: 100Mi + connection_pooler_max_db_connections: 60 + connection_pooler_default_cpu_request: 500m + connection_pooler_image: 'docker.io/cybertecpostgresql/cybertec-pg-container:pgbouncer-1.24.1-4' + connection_pooler_default_memory_limit: 100Mi + connection_pooler_default_cpu_limit: '1' + connection_pooler_schema: pooler + connection_pooler_number_of_instances: 2 + connection_pooler_mode: transaction +``` +### Explanation of parameters -### slots +- **connection_pooler_user** +Username for the pooler role in the database. -When using user-defined slots, for example for the use of CDC using Debezium, there are problems when interacting with Patroni, as the slot and its current status are not automatically synchronised to the replicas. +- **connection_pooler_default_memory_request / connection_pooler_default_cpu_request** +Resource request for the pooler container (memory and CPU). -In the event of a failover, the client cannot start replication as both the entire slot and the information about the data that has already been synchronised are missing. +- **connection_pooler_default_memory_limit / connection_pooler_default_cpu_limit** +Resource limits for the pooler container (memory and CPU). -To resolve this problem, slots must be defined in the cluster manifest rather than in PostgreSQL. +- **connection_pooler_max_db_connections** +Maximum number of simultaneous connections that the pooler creates to PostgreSQL. -``` -spec: - patroni: - slots: - cdc-example: - database: app_db - plugin: pgoutput - type: logical -``` -This example creates a logical replication slot with the name `cdc-example` within the `app_db` database and uses the `pgoutput` plugin for the slot. +- **connection_pooler_image** +Container image for the pooler. + +- **connection_pooler_schema** +Database schema used by the pooler. + +- **connection_pooler_number_of_instances** +Number of pooler pods. +- **connection_pooler_mode** +Operating mode of the pooler: +- `transaction`: Pooler manages connections per transaction +- `session`: Pooler manages connections per session$ +- `statement`: Pooler manages connections per statement + + +## Debug options +*(Here you can explain which debug features are available and how they are activated)* +```yaml + debug: + debug_logging: true + enable_database_access: true +``` +### Explanation of parameters -{{< hint type=Info >}}Slots are only synchronised from the leader/standby leader to the replicas. This means that using the slots read-only on the replicas will cause a problem in the event of a failover.{{< /hint >}} +- **debug_logging** +Enables or disables debug output in the operator log +- **enable_database_access** +Defines whether the operator is permitted to access the database, for example to create users or databases +The complete structure and description of all available parameters is documented in the [OperatorConfiguration](crd/crd-operator-configurator). diff --git a/docs/hugo/content/en/release_notes/_index.md b/docs/hugo/content/en/release_notes/_index.md index b62aa777..c7b8e1e1 100644 --- a/docs/hugo/content/en/release_notes/_index.md +++ b/docs/hugo/content/en/release_notes/_index.md @@ -4,6 +4,34 @@ date: 2024-03-11T14:26:51+01:00 draft: false weight: 2500 --- +### 0.9.0 + +#### Features +- Adding PG18 +- Liveness check added (can be activated via Operator Configuration) +- ReadOnlyRootFilesystem added as SecurityContext (can be activated via Operator Configuration) + +#### Changes +- Add OwnerReference for Statefulsets +- Optimisations for major upgrade +- Statefulsert receives OwnerReference to the CR + +#### Fixes +- cert-Handling for Multisite +- pgBackRest Restore with TDE +- Fix for Monitoring pgBackRest +- Dependency updates and several small changes + +#### Notification of upcoming deprecation +- PG13 has reached its EoL + +#### Supported Versions + +- PG: 13 - 18 +- Patroni: 4.1.0 +- pgBackRest: 2.57.0 +- Kubernetes: 1.21 - 1.32 +- Openshift: 4.8 - 4.18 ### 0.8.3 diff --git a/go.mod b/go.mod index a42fca6a..3ea09fbe 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,9 @@ module github.com/cybertec-postgresql/cybertec-pg-operator -go 1.23.4 +go 1.25.2 require ( + github.com/Masterminds/semver v1.5.0 github.com/aws/aws-sdk-go v1.55.8 github.com/golang/mock v1.6.0 github.com/lib/pq v1.10.4 @@ -64,7 +65,7 @@ require ( go.uber.org/atomic v1.7.0 // indirect go.uber.org/multierr v1.6.0 // indirect go.uber.org/zap v1.19.0 // indirect - golang.org/x/mod v0.26.0 // indirect + golang.org/x/mod v0.27.0 // indirect golang.org/x/net v0.43.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect golang.org/x/sync v0.16.0 // indirect @@ -72,7 +73,7 @@ require ( golang.org/x/term v0.34.0 // indirect golang.org/x/text v0.28.0 // indirect golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.35.0 // indirect + golang.org/x/tools v0.36.0 // indirect google.golang.org/protobuf v1.36.6 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 290b39f3..f45d4cba 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= +github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= @@ -272,8 +274,8 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg= -golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ= +golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ= +golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -350,8 +352,8 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= -golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= +golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= +golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= golang.org/x/tools/go/expect v0.1.0-deprecated h1:jY2C5HGYR5lqex3gEniOQL0r7Dq5+VGVgY1nudX5lXY= golang.org/x/tools/go/expect v0.1.0-deprecated/go.mod h1:eihoPOH+FgIqa3FpoTwguz/bVUSGBlGQU67vpBeOrBY= golang.org/x/tools/go/packages/packagestest v0.1.1-deprecated h1:1h2MnaIAIXISqTFKdENegdpAgUXz6NrPEsbIeWaBRvM= diff --git a/kubectl-pg/go.mod b/kubectl-pg/go.mod index 4038c649..73c2f6e5 100644 --- a/kubectl-pg/go.mod +++ b/kubectl-pg/go.mod @@ -1,6 +1,6 @@ module github.com/cybertec-postgresql/cybertec-pg-operator/kubectl-pg -go 1.24.6 +go 1.25.2 require ( github.com/cybertec-postgresql/cybertec-pg-operator v0.8.2 diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 195946f9..820963ce 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -53,7 +53,7 @@ data: # enable_pod_disruption_budget: "true" # enable_postgres_team_crd: "false" # enable_postgres_team_crd_superusers: "false" - enable_readiness_probe: "false" + enable_readiness_probe: "true" enable_replica_load_balancer: "false" enable_replica_pooler_load_balancer: "false" # enable_shm_volume: "true" diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index f24a603f..34495c32 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -168,7 +168,7 @@ spec: default: "13" target_major_version: type: string - default: "17" + default: "18" kubernetes: type: object properties: @@ -213,6 +213,9 @@ spec: type: boolean default: true enable_readiness_probe: + type: boolean + default: true + enable_liveness_probe: type: boolean default: false enable_sidecars: diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index d4991dfb..547cf50a 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -40,7 +40,7 @@ configuration: # major_version_upgrade_team_allow_list: # - acid minimal_major_version: "13" - target_major_version: "17" + target_major_version: "18" kubernetes: # additional_pod_capabilities: # - "SYS_NICE" @@ -60,7 +60,7 @@ configuration: enable_init_containers: true enable_pod_antiaffinity: false enable_pod_disruption_budget: true - enable_readiness_probe: false + enable_readiness_probe: true enable_sidecars: true # ignored_annotations: # - k8s.v1.cni.cncf.io/network-status diff --git a/pkg/apis/cpo.opensource.cybertec.at/v1/operator_configuration_type.go b/pkg/apis/cpo.opensource.cybertec.at/v1/operator_configuration_type.go index a7603677..e58703d0 100644 --- a/pkg/apis/cpo.opensource.cybertec.at/v1/operator_configuration_type.go +++ b/pkg/apis/cpo.opensource.cybertec.at/v1/operator_configuration_type.go @@ -50,7 +50,7 @@ type MajorVersionUpgradeConfiguration struct { MajorVersionUpgradeMode string `json:"major_version_upgrade_mode" default:"off"` // off - no actions, manual - manifest triggers action, full - manifest and minimal version violation trigger upgrade MajorVersionUpgradeTeamAllowList []string `json:"major_version_upgrade_team_allow_list,omitempty"` MinimalMajorVersion string `json:"minimal_major_version" default:"13"` - TargetMajorVersion string `json:"target_major_version" default:"17"` + TargetMajorVersion string `json:"target_major_version" default:"18"` } // KubernetesMetaConfiguration defines k8s conf required for all Postgres clusters and the operator itself @@ -62,6 +62,7 @@ type KubernetesMetaConfiguration struct { PodTerminateGracePeriod Duration `json:"pod_terminate_grace_period,omitempty"` SpiloPrivileged bool `json:"spilo_privileged,omitempty"` SpiloAllowPrivilegeEscalation *bool `json:"spilo_allow_privilege_escalation,omitempty"` + ReadOnlyRootFilesystem *bool `json:"container_readonly_root_filesystem" default:"false"` SpiloRunAsUser *int64 `json:"spilo_runasuser,omitempty"` SpiloRunAsGroup *int64 `json:"spilo_runasgroup,omitempty"` SpiloFSGroup *int64 `json:"spilo_fsgroup,omitempty"` @@ -102,6 +103,7 @@ type KubernetesMetaConfiguration struct { PodManagementPolicy string `json:"pod_management_policy,omitempty"` PersistentVolumeClaimRetentionPolicy map[string]string `json:"persistent_volume_claim_retention_policy,omitempty"` EnableReadinessProbe bool `json:"enable_readiness_probe,omitempty"` + EnableLivenessProbe bool `json:"enable_liveness_probe,omitempty"` EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"` } diff --git a/pkg/apis/cpo.opensource.cybertec.at/v1/postgresql_type.go b/pkg/apis/cpo.opensource.cybertec.at/v1/postgresql_type.go index 9e9ba2f0..21908897 100644 --- a/pkg/apis/cpo.opensource.cybertec.at/v1/postgresql_type.go +++ b/pkg/apis/cpo.opensource.cybertec.at/v1/postgresql_type.go @@ -216,11 +216,13 @@ type CloneDescription struct { // Sidecar defines a container to be run in the same pod as the Postgres container. type Sidecar struct { - *Resources `json:"resources,omitempty"` - Name string `json:"name,omitempty"` - DockerImage string `json:"image,omitempty"` - Ports []v1.ContainerPort `json:"ports,omitempty"` - Env []v1.EnvVar `json:"env,omitempty"` + *Resources `json:"resources,omitempty"` + Name string `json:"name,omitempty"` + DockerImage string `json:"image,omitempty"` + Ports []v1.ContainerPort `json:"ports,omitempty"` + Env []v1.EnvVar `json:"env,omitempty"` + SecurityContext *v1.SecurityContext `json:"securityContext,omitempty"` + VolumeMounts []v1.VolumeMount `json:"volumeMounts,omitempty"` } // UserFlags defines flags (such as superuser, nologin) that could be assigned to individual users diff --git a/pkg/apis/cpo.opensource.cybertec.at/v1/zz_generated.deepcopy.go b/pkg/apis/cpo.opensource.cybertec.at/v1/zz_generated.deepcopy.go index 7bb49d49..001cf58e 100644 --- a/pkg/apis/cpo.opensource.cybertec.at/v1/zz_generated.deepcopy.go +++ b/pkg/apis/cpo.opensource.cybertec.at/v1/zz_generated.deepcopy.go @@ -241,6 +241,11 @@ func (in *KubernetesMetaConfiguration) DeepCopyInto(out *KubernetesMetaConfigura *out = new(bool) **out = **in } + if in.ReadOnlyRootFilesystem != nil { + in, out := &in.ReadOnlyRootFilesystem, &out.ReadOnlyRootFilesystem + *out = new(bool) + **out = **in + } if in.SpiloRunAsUser != nil { in, out := &in.SpiloRunAsUser, &out.SpiloRunAsUser *out = new(int64) @@ -1502,6 +1507,18 @@ func (in *Sidecar) DeepCopyInto(out *Sidecar) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.SecurityContext != nil { + in, out := &in.SecurityContext, &out.SecurityContext + *out = new(corev1.SecurityContext) + (*in).DeepCopyInto(*out) + } + if in.VolumeMounts != nil { + in, out := &in.VolumeMounts, &out.VolumeMounts + *out = make([]corev1.VolumeMount, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } return } diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index bc6f09d1..4b0b3cb2 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -49,6 +49,11 @@ var ( patroniObjectSuffixes = []string{"leader", "config", "sync", "failover"} ) +const ( + crAPIVersion = "cpo.opensource.cybertec.at/v1" + crKind = "postgresql" +) + // Config contains operator-wide clients and configuration used from a cluster. TODO: remove struct duplication. type Config struct { OpConfig config.Config @@ -176,6 +181,23 @@ func (c *Cluster) clusterNamespace() string { return c.ObjectMeta.Namespace } +func (c *Cluster) createOwnerReference() []metav1.OwnerReference { + if c.APIVersion == "" || c.Kind == "" { + c.APIVersion = crAPIVersion + c.Kind = crKind + } + return []metav1.OwnerReference{ + { + APIVersion: c.APIVersion, + Kind: c.Kind, + Name: c.Name, + UID: c.UID, + Controller: util.True(), + BlockOwnerDeletion: util.False(), + }, + } +} + func (c *Cluster) teamName() string { // TODO: check Teams API for the actual name (in case the user passes an integer Id). return c.Spec.TeamID @@ -651,6 +673,15 @@ func (c *Cluster) compareStatefulSetWith(oldSts, newSts *appsv1.StatefulSet) *co return &compareStatefulsetResult{match: match, reasons: reasons, rollingUpdate: needsRollUpdate, replace: needsReplace} } +func (c *Cluster) compareOwnerReferenceFromStatefulSet(current *appsv1.StatefulSet) bool { + for _, ref := range current.OwnerReferences { + if ref.UID == c.UID && ref.Controller != nil && *ref.Controller { + return true + } + } + return false +} + type containerCondition func(a, b v1.Container) bool type containerCheck struct { @@ -677,6 +708,8 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe func(a, b v1.Container) bool { return a.Name != b.Name }), newCheck("new statefulset %s's %s (index %d) readiness probe does not match the current one", func(a, b v1.Container) bool { return !reflect.DeepEqual(a.ReadinessProbe, b.ReadinessProbe) }), + newCheck("new statefulset %s's %s (index %d) liveness probe does not match the current one", + func(a, b v1.Container) bool { return !reflect.DeepEqual(a.LivenessProbe, b.LivenessProbe) }), newCheck("new statefulset %s's %s (index %d) ports do not match the current one", func(a, b v1.Container) bool { return !comparePorts(a.Ports, b.Ports) }), newCheck("new statefulset %s's %s (index %d) resources do not match the current ones", diff --git a/pkg/cluster/database.go b/pkg/cluster/database.go index 36c68175..b4428c78 100644 --- a/pkg/cluster/database.go +++ b/pkg/cluster/database.go @@ -113,6 +113,11 @@ const ( last_entry_timestamp TIMESTAMP; record_count INT; BEGIN + IF pg_is_in_recovery() THEN + RAISE NOTICE 'Skipping pgbackrest_info update: running on a replica.'; + RETURN; + END IF; + SELECT COUNT(*) INTO record_count FROM exporter.pgbackrestbackupinfo; @@ -122,11 +127,12 @@ const ( ORDER BY data_time DESC LIMIT 1; - IF last_entry_timestamp < NOW() - INTERVAL '5 minutes' THEN - DELETE FROM exporter.pgbackrestbackupinfo; - ELSE + IF last_entry_timestamp >= NOW() - INTERVAL '5 minutes' THEN + RAISE NOTICE 'Skipping pgbackrest_info update: data does not need an update (last update at %).', last_entry_timestamp; RETURN; END IF; + + DELETE FROM exporter.pgbackrestbackupinfo; END IF; EXECUTE format( @@ -134,6 +140,7 @@ const ( ); END; $$ LANGUAGE plpgsql; + ` ) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 7fb4d1d3..8b516893 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -188,7 +188,7 @@ func (c *Cluster) enforceMinResourceLimits(resources *v1.ResourceRequirements) e if isSmaller { msg = fmt.Sprintf("defined CPU limit %s for %q container is below required minimum %s and will be increased", cpuLimit.String(), constants.PostgresContainerName, minCPULimit) - c.logger.Warningf(msg) + c.logger.Warningf("%s", msg) c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "ResourceLimits", msg) resources.Limits[v1.ResourceCPU], _ = resource.ParseQuantity(minCPULimit) } @@ -205,7 +205,7 @@ func (c *Cluster) enforceMinResourceLimits(resources *v1.ResourceRequirements) e if isSmaller { msg = fmt.Sprintf("defined memory limit %s for %q container is below required minimum %s and will be increased", memoryLimit.String(), constants.PostgresContainerName, minMemoryLimit) - c.logger.Warningf(msg) + c.logger.Warningf("%s", msg) c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "ResourceLimits", msg) resources.Limits[v1.ResourceMemory], _ = resource.ParseQuantity(minMemoryLimit) } @@ -335,10 +335,11 @@ func generateSpiloJSONConfiguration(pg *cpov1.PostgresqlParam, patroni *cpov1.Pa pgVersion, err := strconv.Atoi(pg.PgVersion) if err != nil { fmt.Println("Problem to get PGVersion:", err) - pgVersion = 16 + pgVersion = 18 } if pgVersion > 14 { config.Bootstrap.Initdb = []interface{}{map[string]string{"auth-host": "scram-sha-256"}, + "data-checksums", map[string]string{"auth-local": "trust"}, map[string]string{"encoding": "UTF8"}, map[string]string{"locale": "en_US.UTF-8"}, @@ -679,6 +680,7 @@ func generateContainer( volumeMounts []v1.VolumeMount, privilegedMode bool, privilegeEscalationMode *bool, + readOnlyRootFilesystem *bool, additionalPodCapabilities *v1.Capabilities, ) *v1.Container { return &v1.Container{ @@ -705,7 +707,7 @@ func generateContainer( SecurityContext: &v1.SecurityContext{ AllowPrivilegeEscalation: privilegeEscalationMode, Privileged: &privilegedMode, - ReadOnlyRootFilesystem: util.False(), + ReadOnlyRootFilesystem: readOnlyRootFilesystem, Capabilities: additionalPodCapabilities, }, } @@ -738,7 +740,7 @@ func (c *Cluster) generateSidecarContainers(sidecars []cpov1.Sidecar, } // adds common fields to sidecars -func patchSidecarContainers(in []v1.Container, volumeMounts []v1.VolumeMount, superUserName string, credentialsSecretName string, logger *logrus.Entry) []v1.Container { +func patchSidecarContainers(in []v1.Container, volumeMounts []v1.VolumeMount, superUserName string, credentialsSecretName string, logger *logrus.Entry, privilegedMode bool, privilegeEscalationMode *bool, additionalPodCapabilities *v1.Capabilities) []v1.Container { result := []v1.Container{} for _, container := range in { @@ -779,6 +781,7 @@ func patchSidecarContainers(in []v1.Container, volumeMounts []v1.VolumeMount, su }, } container.Env = appendEnvVars(env, container.Env...) + result = append(result, container) } @@ -875,6 +878,19 @@ func (c *Cluster) generatePodTemplate( podSpec.PriorityClassName = priorityClassName } + if c.Postgresql.Spec.Monitoring != nil { + addEmptyDirVolume(&podSpec, "exporter-tmp", "postgres-exporter", "/tmp") + } + + if c.OpConfig.ReadOnlyRootFilesystem != nil && *c.OpConfig.ReadOnlyRootFilesystem && !isRepoHost { + addRunVolume(&podSpec, "postgres-run", "postgres", "/run") + addEmptyDirVolume(&podSpec, "postgres-tmp", "postgres", "/tmp") + } + + if c.OpConfig.ReadOnlyRootFilesystem != nil && *c.OpConfig.ReadOnlyRootFilesystem && isRepoHost { + addEmptyDirVolume(&podSpec, "pgbackrest-tmp", "pgbackrest", "/tmp") + } + if sharePgSocketWithSidecars != nil && *sharePgSocketWithSidecars { addVarRunVolume(&podSpec) } @@ -990,6 +1006,19 @@ func (c *Cluster) generateSpiloPodEnvVars( Name: "HUMAN_ROLE", Value: c.OpConfig.PamRoleName, }, + // NSS WRAPPER + { + Name: "LD_PRELOAD", + Value: "/usr/lib64/libnss_wrapper.so", + }, + { + Name: "NSS_WRAPPER_PASSWD", + Value: "/tmp/nss_wrapper/passwd", + }, + { + Name: "NSS_WRAPPER_GROUP", + Value: "/tmp/nss_wrapper/group", + }, } if c.OpConfig.EnableSpiloWalPathCompat { @@ -1000,6 +1029,10 @@ func (c *Cluster) generateSpiloPodEnvVars( envVars = append(envVars, v1.EnvVar{Name: "USE_PGBACKREST", Value: "true"}) } + if c.OpConfig.ReadOnlyRootFilesystem != nil && *c.OpConfig.ReadOnlyRootFilesystem { + envVars = append(envVars, v1.EnvVar{Name: "HOME", Value: "/home/postgres"}) + } + if spec.TDE != nil && spec.TDE.Enable { envVars = append(envVars, v1.EnvVar{Name: "TDE", Value: "true"}) // envVars = append(envVars, v1.EnvVar{Name: "PGENCRKEYCMD", Value: "/tmp/tde.sh"}) @@ -1245,6 +1278,8 @@ func getSidecarContainer(sidecar cpov1.Sidecar, index int, resources *v1.Resourc Resources: *resources, Env: sidecar.Env, Ports: sidecar.Ports, + SecurityContext: sidecar.SecurityContext, + VolumeMounts: sidecar.VolumeMounts, } } @@ -1294,6 +1329,23 @@ func generateSpiloReadinessProbe() *v1.Probe { } } +func generatePatroniLivenessProbe() *v1.Probe { + return &v1.Probe{ + FailureThreshold: 6, + ProbeHandler: v1.ProbeHandler{ + HTTPGet: &v1.HTTPGetAction{ + Path: "/liveness", + Port: intstr.IntOrString{IntVal: patroni.ApiPort}, + Scheme: v1.URISchemeHTTP, + }, + }, + InitialDelaySeconds: 30, + PeriodSeconds: 10, + TimeoutSeconds: 5, + SuccessThreshold: 1, + } +} + func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.StatefulSet, error) { var ( @@ -1424,6 +1476,7 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu } additionalVolumes = append(additionalVolumes, tlsVolumes...) } + repo_host_mode := false // Add this envVar so that it is not added to the pgbackrest initcontainer if specHasPgbackrestPVCRepo(spec) { @@ -1448,6 +1501,7 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu volumeMounts, c.OpConfig.Resources.SpiloPrivileged, c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, + c.OpConfig.Resources.ReadOnlyRootFilesystem, generateCapabilities(c.OpConfig.AdditionalPodCapabilities), ) @@ -1455,6 +1509,10 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu if c.OpConfig.EnableReadinessProbe { spiloContainer.ReadinessProbe = generateSpiloReadinessProbe() } + // + if c.OpConfig.EnableLivenessProbe { + spiloContainer.LivenessProbe = generatePatroniLivenessProbe() + } // generate container specs for sidecars specified in the cluster manifest clusterSpecificSidecars := []v1.Container{} @@ -1510,7 +1568,7 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu containerName, containerName) } - sidecarContainers = patchSidecarContainers(sidecarContainers, volumeMounts, c.OpConfig.SuperUsername, c.credentialSecretName(c.OpConfig.SuperUsername), c.logger) + sidecarContainers = patchSidecarContainers(sidecarContainers, volumeMounts, c.OpConfig.SuperUsername, c.credentialSecretName(c.OpConfig.SuperUsername), c.logger, c.OpConfig.Resources.SpiloPrivileged, c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, generateCapabilities(c.OpConfig.AdditionalPodCapabilities)) tolerationSpec := tolerations(&spec.Tolerations, c.OpConfig.PodToleration) topologySpreadConstraintsSpec := topologySpreadConstraints(&spec.TopologySpreadConstraints) @@ -1519,7 +1577,7 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu podAnnotations := c.generatePodAnnotations(spec) if spec.GetBackup().Pgbackrest != nil { - initContainers = append(initContainers, c.generatePgbackrestRestoreContainer(spec, repo_host_mode, volumeMounts, resourceRequirements)) + initContainers = append(initContainers, c.generatePgbackrestRestoreContainer(spec, repo_host_mode, volumeMounts, resourceRequirements, c.OpConfig.Resources.SpiloPrivileged, c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, generateCapabilities(c.OpConfig.AdditionalPodCapabilities))) additionalVolumes = append(additionalVolumes, c.generatePgbackrestConfigVolume(spec.Backup.Pgbackrest, false)) @@ -1602,10 +1660,11 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu statefulSet := &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ - Name: c.statefulSetName(), - Namespace: c.Namespace, - Labels: c.labelsSetWithType(true, TYPE_POSTGRESQL), - Annotations: c.AnnotationsToPropagate(c.annotationsSet(nil)), + Name: c.statefulSetName(), + Namespace: c.Namespace, + Labels: c.labelsSetWithType(true, TYPE_POSTGRESQL), + Annotations: c.AnnotationsToPropagate(c.annotationsSet(nil)), + OwnerReferences: c.createOwnerReference(), }, Spec: appsv1.StatefulSetSpec{ Replicas: &numberOfInstances, @@ -1622,7 +1681,7 @@ func (c *Cluster) generateStatefulSet(spec *cpov1.PostgresSpec) (*appsv1.Statefu return statefulSet, nil } -func (c *Cluster) generatePgbackrestRestoreContainer(spec *cpov1.PostgresSpec, repo_host_mode bool, volumeMounts []v1.VolumeMount, resourceRequirements *v1.ResourceRequirements) v1.Container { +func (c *Cluster) generatePgbackrestRestoreContainer(spec *cpov1.PostgresSpec, repo_host_mode bool, volumeMounts []v1.VolumeMount, resourceRequirements *v1.ResourceRequirements, privilegedMode bool, privilegeEscalationMode *bool, additionalPodCapabilities *v1.Capabilities) v1.Container { isOptional := true pgbackrestRestoreEnvVars := []v1.EnvVar{ { @@ -1695,6 +1754,18 @@ func (c *Cluster) generatePgbackrestRestoreContainer(spec *cpov1.PostgresSpec, r }, ) } + if spec.TDE != nil && spec.TDE.Enable { + pgbackrestRestoreEnvVars = append(pgbackrestRestoreEnvVars, v1.EnvVar{Name: "TDE", Value: "true"}) + pgbackrestRestoreEnvVars = append(pgbackrestRestoreEnvVars, v1.EnvVar{Name: "TDE_KEY", ValueFrom: &v1.EnvVarSource{ + SecretKeyRef: &v1.SecretKeySelector{ + LocalObjectReference: v1.LocalObjectReference{ + Name: c.getTDESecretName(), + }, + Key: "key", + }, + }, + }) + } return v1.Container{ Name: constants.RestoreContainerName, @@ -1702,6 +1773,12 @@ func (c *Cluster) generatePgbackrestRestoreContainer(spec *cpov1.PostgresSpec, r Env: pgbackrestRestoreEnvVars, VolumeMounts: volumeMounts, Resources: *resourceRequirements, + SecurityContext: &v1.SecurityContext{ + AllowPrivilegeEscalation: privilegeEscalationMode, + Privileged: &privilegedMode, + ReadOnlyRootFilesystem: util.True(), + Capabilities: additionalPodCapabilities, + }, } } @@ -1760,6 +1837,7 @@ func (c *Cluster) generateRepoHostStatefulSet(spec *cpov1.PostgresSpec) (*appsv1 volumeMounts, c.OpConfig.Resources.SpiloPrivileged, c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, + c.OpConfig.Resources.ReadOnlyRootFilesystem, generateCapabilities(c.OpConfig.AdditionalPodCapabilities), ) @@ -1844,10 +1922,11 @@ func (c *Cluster) generateRepoHostStatefulSet(spec *cpov1.PostgresSpec) (*appsv1 statefulSet := &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ - Name: c.getPgbackrestRepoHostName(), - Namespace: c.Namespace, - Labels: repoHostLabels, - Annotations: c.AnnotationsToPropagate(c.annotationsSet(nil)), + Name: c.getPgbackrestRepoHostName(), + Namespace: c.Namespace, + Labels: repoHostLabels, + Annotations: c.AnnotationsToPropagate(c.annotationsSet(nil)), + OwnerReferences: c.createOwnerReference(), }, Spec: appsv1.StatefulSetSpec{ Replicas: &numberOfInstances, @@ -2163,6 +2242,55 @@ func addShmVolume(podSpec *v1.PodSpec) { podSpec.Volumes = volumes } +func addEmptyDirVolume(podSpec *v1.PodSpec, volumeName string, containerName string, path string) { + vol := v1.Volume{ + Name: volumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }, + } + podSpec.Volumes = append(podSpec.Volumes, vol) + + mount := v1.VolumeMount{ + Name: vol.Name, + MountPath: path, + } + + for i := range podSpec.Containers { + if podSpec.Containers[i].Name == containerName { + podSpec.Containers[i].VolumeMounts = append(podSpec.Containers[i].VolumeMounts, mount) + } + } + if vol.Name == "postgres-tmp" && len(podSpec.InitContainers) > 0 { + for i := range podSpec.InitContainers { + if podSpec.InitContainers[i].Name == "pgbackrest-restore" { + podSpec.InitContainers[i].VolumeMounts = append(podSpec.InitContainers[i].VolumeMounts, mount) + } + } + } +} + +func addRunVolume(podSpec *v1.PodSpec, volumeName string, containerName string, path string) { + vol := v1.Volume{ + Name: volumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }, + } + podSpec.Volumes = append(podSpec.Volumes, vol) + + mount := v1.VolumeMount{ + Name: vol.Name, + MountPath: path, + } + + for i := range podSpec.Containers { + if podSpec.Containers[i].Name == containerName { + podSpec.Containers[i].VolumeMounts = append(podSpec.Containers[i].VolumeMounts, mount) + } + } +} + func addVarRunVolume(podSpec *v1.PodSpec) { volumes := append(podSpec.Volumes, v1.Volume{ Name: "postgresql-run", @@ -2730,6 +2858,7 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) { []v1.VolumeMount{}, c.OpConfig.SpiloPrivileged, // use same value as for normal DB pods c.OpConfig.SpiloAllowPrivilegeEscalation, + util.False(), nil, ) @@ -3256,9 +3385,13 @@ func (c *Cluster) generatePgbackrestJob(backup *cpov1.Pgbackrest, repo *cpov1.Re []v1.VolumeMount{}, c.OpConfig.SpiloPrivileged, // use same value as for normal DB pods c.OpConfig.SpiloAllowPrivilegeEscalation, + c.OpConfig.Resources.ReadOnlyRootFilesystem, nil, ) + // Patch securityContext - readOnlyRootFilesystem + pgbackrestContainer.SecurityContext.ReadOnlyRootFilesystem = util.True() + podAffinityTerm := v1.PodAffinityTerm{ LabelSelector: c.roleLabelsSelector(Master), TopologyKey: "kubernetes.io/hostname", diff --git a/pkg/cluster/majorversionupgrade.go b/pkg/cluster/majorversionupgrade.go index 697c97c4..61edf190 100644 --- a/pkg/cluster/majorversionupgrade.go +++ b/pkg/cluster/majorversionupgrade.go @@ -1,12 +1,17 @@ package cluster import ( + "context" + "encoding/json" "fmt" "strings" + "github.com/Masterminds/semver" "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/spec" "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/util" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" ) // VersionMap Map of version numbers @@ -19,6 +24,11 @@ var VersionMap = map[string]int{ "18": 180000, } +const ( + majorVersionUpgradeSuccessAnnotation = "last-major-upgrade-success" + majorVersionUpgradeFailureAnnotation = "last-major-upgrade-failure" +) + // IsBiggerPostgresVersion Compare two Postgres version numbers func IsBiggerPostgresVersion(old string, new string) bool { oldN := VersionMap[old] @@ -35,7 +45,7 @@ func (c *Cluster) GetDesiredMajorVersionAsInt() int { func (c *Cluster) GetDesiredMajorVersion() string { if c.Config.OpConfig.MajorVersionUpgradeMode == "full" { - // e.g. current is 10, minimal is 11 allowing 11 to 15 clusters, everything below is upgraded + // e.g. current is 13, minimal is 13 allowing 13 to 17 clusters, everything below is upgraded if IsBiggerPostgresVersion(c.Spec.PgVersion, c.Config.OpConfig.MinimalMajorVersion) { c.logger.Infof("overwriting configured major version %s to %s", c.Spec.PgVersion, c.Config.OpConfig.TargetMajorVersion) return c.Config.OpConfig.TargetMajorVersion @@ -55,6 +65,63 @@ func (c *Cluster) isUpgradeAllowedForTeam(owningTeam string) bool { return util.SliceContains(allowedTeams, owningTeam) } +func (c *Cluster) annotatePostgresResource(isSuccess bool) error { + annotations := make(map[string]string) + currentTime := metav1.Now().Format("2006-01-02T15:04:05Z") + if isSuccess { + annotations[majorVersionUpgradeSuccessAnnotation] = currentTime + } else { + annotations[majorVersionUpgradeFailureAnnotation] = currentTime + } + patchData, err := metaAnnotationsPatch(annotations) + if err != nil { + c.logger.Errorf("could not form patch for %s postgresql resource: %v", c.Name, err) + return err + } + _, err = c.KubeClient.Postgresqls(c.Namespace).Patch(context.Background(), c.Name, types.MergePatchType, patchData, metav1.PatchOptions{}) + if err != nil { + c.logger.Errorf("failed to patch annotations to postgresql resource: %v", err) + return err + } + return nil +} + +func (c *Cluster) removeFailuresAnnotation() error { + annotationToRemove := []map[string]string{ + { + "op": "remove", + "path": fmt.Sprintf("/metadata/annotations/%s", majorVersionUpgradeFailureAnnotation), + }, + } + removePatch, err := json.Marshal(annotationToRemove) + if err != nil { + c.logger.Errorf("could not form removal patch for %s postgresql resource: %v", c.Name, err) + return err + } + _, err = c.KubeClient.Postgresqls(c.Namespace).Patch(context.Background(), c.Name, types.JSONPatchType, removePatch, metav1.PatchOptions{}) + if err != nil { + c.logger.Errorf("failed to remove annotations from postgresql resource: %v", err) + return err + } + return nil +} + +func (c *Cluster) criticalOperationLabel(pods []v1.Pod, value *string) error { + metadataReq := map[string]map[string]map[string]*string{"metadata": {"labels": {"critical-operation": value}}} + + patchReq, err := json.Marshal(metadataReq) + if err != nil { + return fmt.Errorf("could not marshal ObjectMeta: %v", err) + } + for _, pod := range pods { + _, err = c.KubeClient.Pods(c.Namespace).Patch(context.TODO(), pod.Name, types.StrategicMergePatchType, patchReq, metav1.PatchOptions{}) + if err != nil { + return err + } + } + return nil +} + /* Execute upgrade when mode is set to manual or full or when the owning team is allowed for upgrade (and mode is "off"). @@ -70,6 +137,10 @@ func (c *Cluster) majorVersionUpgrade() error { desiredVersion := c.GetDesiredMajorVersionAsInt() if c.currentMajorVersion >= desiredVersion { + if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists { // if failure annotation exists, remove it + c.removeFailuresAnnotation() + c.logger.Infof("removing failure annotation as the cluster is already up to date") + } c.logger.Infof("cluster version up to date. current: %d, min desired: %d", c.currentMajorVersion, desiredVersion) return nil } @@ -80,12 +151,19 @@ func (c *Cluster) majorVersionUpgrade() error { } allRunning := true + isStandbyCluster := false var masterPod *v1.Pod for i, pod := range pods { ps, _ := c.patroni.GetMemberData(&pod) + if ps.Role == "standby_leader" { + isStandbyCluster = true + c.currentMajorVersion = ps.ServerVersion + break + } + if ps.State != "running" { allRunning = false c.logger.Infof("identified non running pod, potentially skipping major version upgrade") @@ -97,37 +175,120 @@ func (c *Cluster) majorVersionUpgrade() error { } } + if masterPod == nil { + c.logger.Infof("no master in the cluster, skipping major version upgrade") + return nil + } + + // Recheck version with newest data from Patroni + if c.currentMajorVersion >= desiredVersion { + if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists { // if failure annotation exists, remove it + c.removeFailuresAnnotation() + c.logger.Infof("removing failure annotation as the cluster is already up to date") + } + c.logger.Infof("recheck cluster version is already up to date. current: %d, min desired: %d", c.currentMajorVersion, desiredVersion) + return nil + } else if isStandbyCluster { + c.logger.Warnf("skipping major version upgrade for %s/%s standby cluster. Re-deploy standby cluster with the required Postgres version specified", c.Namespace, c.Name) + return nil + } + + if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists { + c.logger.Infof("last major upgrade failed, skipping upgrade") + return nil + } + + if !isInMaintenanceWindow(c.Spec.MaintenanceWindows) { + c.logger.Infof("skipping major version upgrade, not in maintenance window") + return nil + } + + members, err := c.patroni.GetClusterMembers(masterPod) + if err != nil { + c.logger.Error("could not get cluster members data from Patroni API, skipping major version upgrade") + return err + } + patroniData, err := c.patroni.GetMemberData(masterPod) + if err != nil { + c.logger.Error("could not get members data from Patroni API, skipping major version upgrade") + return err + } + patroniVersion := patroniData.Patroni.Version + parts := strings.Split(patroniVersion, ".") + if len(parts) > 3 { + patroniVersion = strings.Join(parts[:3], ".") + } + patroniVer, err := semver.NewVersion(patroniVersion) + + if err != nil { + c.logger.Error("error parsing Patroni version") + patroniVer, _ = semver.NewVersion("3.0.4") + } + verConstraint, _ := semver.NewConstraint(">= 3.0.4") + checkStreaming, _ := verConstraint.Validate(patroniVer) + + for _, member := range members { + if PostgresRole(member.Role) == Leader { + continue + } + if checkStreaming && member.State != "streaming" { + c.logger.Infof("skipping major version upgrade, replica %s is not streaming from primary", member.Name) + return nil + } + if member.Lag > 16*1024*1024 { + c.logger.Infof("skipping major version upgrade, replication lag on member %s is too high", member.Name) + return nil + } + } + + isUpgradeSuccess := true numberOfPods := len(pods) - if allRunning && masterPod != nil { + if allRunning { c.logger.Infof("healthy cluster ready to upgrade, current: %d desired: %d", c.currentMajorVersion, desiredVersion) if c.currentMajorVersion < desiredVersion { + defer func() error { + if err = c.criticalOperationLabel(pods, nil); err != nil { + return fmt.Errorf("failed to remove critical-operation label: %s", err) + } + return nil + }() + val := "true" + if err = c.criticalOperationLabel(pods, &val); err != nil { + return fmt.Errorf("failed to assign critical-operation label: %s", err) + } + podName := &spec.NamespacedName{Namespace: masterPod.Namespace, Name: masterPod.Name} c.logger.Infof("triggering major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods) - c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "Starting major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods) + c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "starting major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods) upgradeCommand := fmt.Sprintf("set -o pipefail && /usr/bin/python3 /scripts/inplace_upgrade.py %d 2>&1 | tee last_upgrade.log", numberOfPods) - c.logger.Debugf("checking if the spilo image runs with root or non-root (check for user id=0)") + c.logger.Debug("checking if the spilo image runs with root or non-root (check for user id=0)") resultIdCheck, errIdCheck := c.ExecCommand(podName, "/bin/bash", "-c", "/usr/bin/id -u") if errIdCheck != nil { - c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "Checking user id to run upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, errIdCheck) + c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "checking user id to run upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, errIdCheck) } resultIdCheck = strings.TrimSuffix(resultIdCheck, "\n") - var result string + var result, scriptErrMsg string if resultIdCheck != "0" { - c.logger.Infof("User id was identified as: %s, hence default user is non-root already", resultIdCheck) + c.logger.Infof("user id was identified as: %s, hence default user is non-root already", resultIdCheck) result, err = c.ExecCommand(podName, "/bin/bash", "-c", upgradeCommand) + scriptErrMsg, _ = c.ExecCommand(podName, "/bin/bash", "-c", "tail -n 1 last_upgrade.log") } else { - c.logger.Infof("User id was identified as: %s, using su to reach the postgres user", resultIdCheck) + c.logger.Infof("user id was identified as: %s, using su to reach the postgres user", resultIdCheck) result, err = c.ExecCommand(podName, "/bin/su", "postgres", "-c", upgradeCommand) + scriptErrMsg, _ = c.ExecCommand(podName, "/bin/bash", "-c", "tail -n 1 last_upgrade.log") } if err != nil { - c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "Upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, err) - return err + isUpgradeSuccess = false + c.annotatePostgresResource(isUpgradeSuccess) + c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, scriptErrMsg) + return fmt.Errorf("%s", scriptErrMsg) } - c.logger.Infof("upgrade action triggered and command completed: %s", result[:100]) - c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "Upgrade from %d to %d finished", c.currentMajorVersion, desiredVersion) + c.annotatePostgresResource(isUpgradeSuccess) + c.logger.Infof("upgrade action triggered and command completed: %s", result[:100]) + c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "upgrade from %d to %d finished", c.currentMajorVersion, desiredVersion) } } diff --git a/pkg/cluster/resources.go b/pkg/cluster/resources.go index 9a336121..e242dd74 100644 --- a/pkg/cluster/resources.go +++ b/pkg/cluster/resources.go @@ -2,6 +2,7 @@ package cluster import ( "context" + "encoding/json" "fmt" "strconv" "strings" @@ -92,6 +93,12 @@ func (c *Cluster) createStatefulSet() (*appsv1.StatefulSet, error) { }, }, Env: c.generateMonitoringEnvVars(), + SecurityContext: &v1.SecurityContext{ + AllowPrivilegeEscalation: c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, + Privileged: &c.OpConfig.Resources.SpiloPrivileged, + ReadOnlyRootFilesystem: util.True(), + Capabilities: generateCapabilities(c.OpConfig.AdditionalPodCapabilities), + }, } c.Spec.Sidecars = append(c.Spec.Sidecars, *sidecar) //populate the sidecar spec so that the sidecar is automatically created } @@ -166,6 +173,39 @@ func (c *Cluster) preScaleDown(newStatefulSet *appsv1.StatefulSet) error { return nil } +func (c *Cluster) patchOwnerReference(sts *appsv1.StatefulSet) (*appsv1.StatefulSet, error) { + c.setProcessName("patching ownerReference") + + if sts == nil { + return nil, fmt.Errorf("there is no statefulset in the cluster") + } + + statefulSetName := util.NameFromMeta(sts.ObjectMeta) + ownerRefs := c.createOwnerReference() + + patchData, err := json.Marshal(map[string]interface{}{ + "metadata": map[string]interface{}{ + "ownerReferences": ownerRefs, + }, + }) + if err != nil { + return nil, fmt.Errorf("could not marshal patch for ownerReference: %w", err) + } + + patched, err := c.KubeClient.StatefulSets(sts.Namespace).Patch( + context.TODO(), + sts.Name, + types.MergePatchType, + patchData, + metav1.PatchOptions{}, + ) + if err != nil { + return nil, fmt.Errorf("could not patch ownerReference for StatefulSet %q: %w", statefulSetName, err) + } + + return patched, nil +} + func (c *Cluster) updateStatefulSet(newStatefulSet *appsv1.StatefulSet) error { c.setProcessName("updating statefulset") if c.Statefulset == nil { diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index a12117d9..0da424ad 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -514,6 +514,12 @@ func (c *Cluster) syncStatefulSet() error { }, }, Env: c.generateMonitoringEnvVars(), + SecurityContext: &v1.SecurityContext{ + AllowPrivilegeEscalation: c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, + Privileged: &c.OpConfig.Resources.SpiloPrivileged, + ReadOnlyRootFilesystem: util.True(), + Capabilities: generateCapabilities(c.OpConfig.AdditionalPodCapabilities), + }, } c.Spec.Sidecars = append(c.Spec.Sidecars, *sidecar) //populate the sidecar spec so that the sidecar is automatically created } @@ -576,6 +582,15 @@ func (c *Cluster) syncStatefulSet() error { c.applyRestoreStatefulSetSyncOverrides(desiredSts, c.Statefulset) } + // Check if OwnerReference still up to date - if not patch it + if !c.compareOwnerReferenceFromStatefulSet(c.Statefulset) { + patched, err := c.patchOwnerReference(c.Statefulset) + if err != nil { + return err + } + c.Statefulset = patched + } + cmp := c.compareStatefulSetWith(c.Statefulset, desiredSts) if !cmp.match { if cmp.rollingUpdate { @@ -1093,7 +1108,7 @@ func (c *Cluster) updateSecret( } if updateSecret { - c.logger.Debugln(updateSecretMsg) + c.logger.Debugln("%s", updateSecretMsg) if _, err = c.KubeClient.Secrets(secret.Namespace).Update(context.TODO(), secret, metav1.UpdateOptions{}); err != nil { return fmt.Errorf("could not update secret %s: %v", secretName, err) } @@ -1596,6 +1611,15 @@ func (c *Cluster) syncPgbackrestRepoHostConfig(spec *cpov1.PostgresSpec) error { return fmt.Errorf("could not generate pgbackrest repo-host statefulset: %v", err) } + // Check if OwnerReference still up to date - if not patch it + if !c.compareOwnerReferenceFromStatefulSet(curSts) { + patched, err := c.patchOwnerReference(curSts) + if err != nil { + return err + } + curSts = patched + } + cmp := c.compareStatefulSetWith(curSts, desiredSts) if !cmp.match { c.logStatefulSetChanges(curSts, desiredSts, false, cmp.reasons) diff --git a/pkg/cluster/util.go b/pkg/cluster/util.go index c04ca223..51271955 100644 --- a/pkg/cluster/util.go +++ b/pkg/cluster/util.go @@ -269,9 +269,9 @@ func (c *Cluster) getTeamMembers(teamID string) ([]string, error) { if teamID == "" { msg := "no teamId specified" if c.OpConfig.EnableTeamIdClusternamePrefix { - return nil, fmt.Errorf(msg) + return nil, fmt.Errorf("%s", msg) } - c.logger.Warnf(msg) + c.logger.Warnf("%s", msg) return nil, nil } @@ -759,3 +759,24 @@ func (c *Cluster) multisiteEnabled() bool { } return enable != nil && *enable } + +func isInMaintenanceWindow(specMaintenanceWindows []cpov1.MaintenanceWindow) bool { + if len(specMaintenanceWindows) == 0 { + return true + } + now := time.Now() + currentDay := now.Weekday() + currentTime := now.Format("15:04") + + for _, window := range specMaintenanceWindows { + startTime := window.StartTime.Format("15:04") + endTime := window.EndTime.Format("15:04") + + if window.Everyday || window.Weekday == currentDay { + if currentTime >= startTime && currentTime <= endTime { + return true + } + } + } + return false +} diff --git a/pkg/cluster/volumes.go b/pkg/cluster/volumes.go index b9631f40..295103c0 100644 --- a/pkg/cluster/volumes.go +++ b/pkg/cluster/volumes.go @@ -135,7 +135,7 @@ func (c *Cluster) syncUnderlyingEBSVolume() error { if len(errors) > 0 { for _, s := range errors { - c.logger.Warningf(s) + c.logger.Warningf("%s", s) } } return nil diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 8e55b498..931eacb0 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -10,7 +10,6 @@ import ( "sync" "time" - "github.com/sirupsen/logrus" cpov1 "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/apis/cpo.opensource.cybertec.at/v1" "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/apiserver" "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/cluster" @@ -22,6 +21,7 @@ import ( "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/util/constants" "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/util/k8sutil" "github.com/cybertec-postgresql/cybertec-pg-operator/pkg/util/ringlog" + "github.com/sirupsen/logrus" v1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index a3ebd883..a8f31a8c 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -63,7 +63,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *cpov1.OperatorConfigura result.MajorVersionUpgradeMode = util.Coalesce(fromCRD.MajorVersionUpgrade.MajorVersionUpgradeMode, "off") result.MajorVersionUpgradeTeamAllowList = fromCRD.MajorVersionUpgrade.MajorVersionUpgradeTeamAllowList result.MinimalMajorVersion = util.Coalesce(fromCRD.MajorVersionUpgrade.MinimalMajorVersion, "13") - result.TargetMajorVersion = util.Coalesce(fromCRD.MajorVersionUpgrade.TargetMajorVersion, "17") + result.TargetMajorVersion = util.Coalesce(fromCRD.MajorVersionUpgrade.TargetMajorVersion, "18") // kubernetes config result.CustomPodAnnotations = fromCRD.Kubernetes.CustomPodAnnotations @@ -75,6 +75,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *cpov1.OperatorConfigura result.PodTerminateGracePeriod = util.CoalesceDuration(time.Duration(fromCRD.Kubernetes.PodTerminateGracePeriod), "5m") result.SpiloPrivileged = fromCRD.Kubernetes.SpiloPrivileged result.SpiloAllowPrivilegeEscalation = util.CoalesceBool(fromCRD.Kubernetes.SpiloAllowPrivilegeEscalation, util.True()) + result.ReadOnlyRootFilesystem = util.CoalesceBool(fromCRD.Kubernetes.ReadOnlyRootFilesystem, util.False()) result.SpiloRunAsUser = fromCRD.Kubernetes.SpiloRunAsUser result.SpiloRunAsGroup = fromCRD.Kubernetes.SpiloRunAsGroup result.SpiloFSGroup = fromCRD.Kubernetes.SpiloFSGroup @@ -121,6 +122,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *cpov1.OperatorConfigura result.PodManagementPolicy = util.Coalesce(fromCRD.Kubernetes.PodManagementPolicy, "ordered_ready") result.PersistentVolumeClaimRetentionPolicy = fromCRD.Kubernetes.PersistentVolumeClaimRetentionPolicy result.EnableReadinessProbe = fromCRD.Kubernetes.EnableReadinessProbe + result.EnableLivenessProbe = fromCRD.Kubernetes.EnableLivenessProbe result.MasterPodMoveTimeout = util.CoalesceDuration(time.Duration(fromCRD.Kubernetes.MasterPodMoveTimeout), "10m") result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity result.PodAntiAffinityTopologyKey = util.Coalesce(fromCRD.Kubernetes.PodAntiAffinityTopologyKey, "kubernetes.io/hostname") diff --git a/pkg/controller/postgresql.go b/pkg/controller/postgresql.go index 9952e4a3..d0bb879c 100644 --- a/pkg/controller/postgresql.go +++ b/pkg/controller/postgresql.go @@ -588,7 +588,7 @@ func (c *Controller) createPodServiceAccount(namespace string) error { _, err := c.KubeClient.ServiceAccounts(namespace).Get(context.TODO(), podServiceAccountName, metav1.GetOptions{}) if k8sutil.ResourceNotFound(err) { - c.logger.Infof(fmt.Sprintf("creating pod service account %q in the %q namespace", podServiceAccountName, namespace)) + c.logger.Infof("creating pod service account %q in the %q namespace", podServiceAccountName, namespace) // get a separate copy of service account // to prevent a race condition when setting a namespace for many clusters diff --git a/pkg/controller/util.go b/pkg/controller/util.go index fa504303..7d08a5c3 100644 --- a/pkg/controller/util.go +++ b/pkg/controller/util.go @@ -249,7 +249,7 @@ func (c *Controller) getInfrastructureRoles( } if len(errors) > 0 { - return uniqRoles, fmt.Errorf(strings.Join(errors, `', '`)) + return uniqRoles, fmt.Errorf("%s", strings.Join(errors, `', '`)) } return uniqRoles, nil diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index a167fdb5..492db958 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -38,6 +38,7 @@ type Resources struct { SpiloPrivileged bool `name:"spilo_privileged" default:"false"` SpiloAllowPrivilegeEscalation *bool `name:"spilo_allow_privilege_escalation" default:"true"` AdditionalPodCapabilities []string `name:"additional_pod_capabilities" default:""` + ReadOnlyRootFilesystem *bool `name:"container_readonly_root_filesystem" default:"false"` ClusterLabels map[string]string `name:"cluster_labels" default:"application:cpo"` InheritedLabels []string `name:"inherited_labels" default:""` InheritedAnnotations []string `name:"inherited_annotations" default:""` @@ -248,7 +249,8 @@ type Config struct { TeamAPIRoleConfiguration map[string]string `name:"team_api_role_configuration" default:"log_statement:all"` PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"` PodManagementPolicy string `name:"pod_management_policy" default:"ordered_ready"` - EnableReadinessProbe bool `name:"enable_readiness_probe" default:"false"` + EnableReadinessProbe bool `name:"enable_readiness_probe" default:"true"` + EnableLivenessProbe bool `name:"enable_liveness_probe" default:"false"` ProtectedRoles []string `name:"protected_role_names" default:"admin,cron_admin"` PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""` SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" default:"false"` @@ -260,7 +262,7 @@ type Config struct { MajorVersionUpgradeMode string `name:"major_version_upgrade_mode" default:"off"` MajorVersionUpgradeTeamAllowList []string `name:"major_version_upgrade_team_allow_list" default:""` MinimalMajorVersion string `name:"minimal_major_version" default:"13"` - TargetMajorVersion string `name:"target_major_version" default:"17"` + TargetMajorVersion string `name:"target_major_version" default:"18"` PatroniAPICheckInterval time.Duration `name:"patroni_api_check_interval" default:"1s"` PatroniAPICheckTimeout time.Duration `name:"patroni_api_check_timeout" default:"5s"` EnablePatroniFailsafeMode *bool `name:"enable_patroni_failsafe_mode" default:"false"`