Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions internal/controller/postgrescluster/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -1218,11 +1218,12 @@ func (r *Reconciler) reconcileInstance(
}
}

// For now, we are not using logrotate to rotate postgres or patroni logs
// but we are using it for pgbackrest logs in the postgres pod
// For now, we are not using logrotate to rotate postgres or patroni logs,
// but we are using it for pgbackrest logs in the postgres pod, so we will
// set includeLogrotate to true, but only if backups are enabled.
collector.AddToPod(ctx, cluster.Spec.Instrumentation, cluster.Spec.ImagePullPolicy, instanceConfigMap, &instance.Spec.Template,
[]corev1.VolumeMount{postgres.DataVolumeMount()}, pgPassword,
[]string{naming.PGBackRestPGDataLogPath}, true, true)
[]string{naming.PGBackRestPGDataLogPath}, backupsSpecFound, true)
}

// Add postgres-exporter to the instance Pod spec
Expand Down
6 changes: 6 additions & 0 deletions testing/kuttl/e2e/otel-logging-and-metrics/05--backup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply:
- files/05--annotate-cluster.yaml
assert:
- files/05-backup-completed.yaml
6 changes: 0 additions & 6 deletions testing/kuttl/e2e/otel-logging-and-metrics/06--backup.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply:
- files/07--add-instrumentation.yaml
assert:
- files/07-instrumentation-added.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply:
- files/09--add-custom-queries.yaml
assert:
- files/09-custom-queries-added.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply:
- files/11--add-logs-exporter.yaml
assert:
- files/11-logs-exporter-added.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply:
- files/13--create-cluster.yaml
assert:
- files/13-cluster-created.yaml
55 changes: 55 additions & 0 deletions testing/kuttl/e2e/otel-logging-and-metrics/14-assert-instance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
# First, check that all containers in the instance pod are ready.
# Then, grab the collector metrics output and check that a postgres
# metric is present, as well as a patroni metric.
# Then, check the collector logs for patroni, and postgres logs.
# Finally, ensure the monitoring user exists and is configured.
- script: |
retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; }
check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; }
contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; }

pod=$(kubectl get pods -o name -n "${NAMESPACE}" \
-l postgres-operator.crunchydata.com/cluster=otel-cluster-no-backups,postgres-operator.crunchydata.com/data=postgres)
[ "$pod" = "" ] && retry "Pod not found" && exit 1

condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}")
[ "$condition_json" = "" ] && retry "conditions not found" && exit 1
{ check_containers_ready "$condition_json"; } || {
retry "containers not ready"
exit 1
}

scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \
curl --insecure --silent http://localhost:9187/metrics)
{ contains "${scrape_metrics}" 'ccp_connection_stats_active'; } || {
retry "5 second metric not found"
exit 1
}
{ contains "${scrape_metrics}" 'patroni_postgres_running'; } || {
retry "patroni metric not found"
exit 1
}

logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope)
{ contains "${logs}" 'InstrumentationScope patroni'; } || {
retry "patroni logs not found"
exit 1
}
{ contains "${logs}" 'InstrumentationScope postgres'; } || {
retry "postgres logs not found"
exit 1
}

kubectl exec --stdin "${pod}" --namespace "${NAMESPACE}" -c database \
-- psql -qb --set ON_ERROR_STOP=1 --file=- <<'SQL'
DO $$
DECLARE
result record;
BEGIN
SELECT * INTO result FROM pg_catalog.pg_roles WHERE rolname = 'ccp_monitoring';
ASSERT FOUND, 'user not found';
END $$
SQL
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply:
- files/15--add-backups.yaml
assert:
- files/15-backups-added.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
commands:
- command: |-
kubectl patch postgrescluster otel-cluster-no-backups --type 'merge' -p '{"spec":{"backups": null}}'
namespaced: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
commands:
- command: kubectl annotate postgrescluster otel-cluster-no-backups postgres-operator.crunchydata.com/authorizeBackupRemoval="true"
namespaced: true
assert:
- files/17-backups-removed.yaml
6 changes: 6 additions & 0 deletions testing/kuttl/e2e/otel-logging-and-metrics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ This test assumes that the operator has both OpenTelemetryLogs and OpenTelemetry
4. Add an `otlp` exporter to both PostgresCluster and PGAdmin `instrumentation` specs and create a standalone OTel collector to receive data from our sidecar collectors.
1. Ensure that the ConfigMap, Service, and Deployment for the standalone OTel collector come up and that the collector container is running and ready.
2. Assert that the standalone collector is receiving logs from all of our components (i.e. the standalone collector is getting logs for postgres, patroni, pgbackrest, pgbouncer, pgadmin, and gunicorn).
5. Create a new cluster with `instrumentation` spec in place, but no `backups` spec to test the OTel features with optional backups.
1. Ensure that the cluster comes up and the database and collector containers are running and ready.
2. Add a backups spec to the new cluster and ensure that pgbackrest is added to the instance pod, a repo-host pod is created, and the collector runs on both pods.
3. Remove the backups spec from the new cluster.
4. Annotate the cluster to allow backups to be removed.
5. Ensure that the repo-host pod is destroyed, pgbackrest is removed from the instance pod, and the collector continues to run on the instance pod.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am curious -- if you skip steps 3 and 4, will 5 fail?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I follow... If you don't remove the backups spec and annotate the cluster, pgbackrest will just continue to run (in the instance pod and repo-host)...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right -- I'm wondering if step 5 tests what we want it to test.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, first we create a brand new cluster that does not have backups enabled. And we make sure that the collector runs in that scenario. Then we add backups and ensure that everything works properly with that transition. Then we remove backups and ensure that everything works properly with that transition... What scenario do you think we aren't testing? Or what is it about the test that is insufficient?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's been a while since I did a KUTTL test, but I remember running into issues where I asserted that X existed, but it actually ignored what wasn't X. So here, we assert there's a few containers, but not the collector container -- but will it fail if that collector container does exist? Or will it just skip that?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, I think I understand now... In the assert step we tell it to check the instance pod for specific containers and it will fail if the list is not exactly correct (if we ask about 3 containers, but there are actually 5 containers that include the 3 we ask about, it will fail).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, it checks if the list (of containers, etc.) is complete still? I think we (and others) were pushing to get it to check only for the presence of the items in the list.


### NOTES

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,3 @@ spec:
proxy:
pgBouncer: {}
instrumentation: {}
---
apiVersion: postgres-operator.crunchydata.com/v1beta1
kind: PGAdmin
metadata:
name: otel-pgadmin
spec:
users:
- username: otel@example.com
role: Administrator
passwordRef:
name: pgadmin-password-secret
key: otel-password
dataVolumeClaimSpec:
accessModes:
- "ReadWriteOnce"
resources:
requests:
storage: 1Gi
serverGroups:
- name: supply
# An empty selector selects all postgresclusters in the Namespace
postgresClusterSelector: {}
config:
settings:
AUTHENTICATION_SOURCES: ['internal']
instrumentation: {}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ metadata:
labels:
postgres-operator.crunchydata.com/data: pgbackrest
postgres-operator.crunchydata.com/cluster: otel-cluster
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
status:
containerStatuses:
- name: collector
Expand Down Expand Up @@ -98,12 +99,8 @@ metadata:
postgres-operator.crunchydata.com/data: pgadmin
postgres-operator.crunchydata.com/role: pgadmin
postgres-operator.crunchydata.com/pgadmin: otel-pgadmin
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
status:
containerStatuses:
- name: collector
ready: true
started: true
- name: pgadmin
ready: true
started: true
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
apiVersion: postgres-operator.crunchydata.com/v1beta1
kind: PGAdmin
metadata:
name: otel-pgadmin
spec:
users:
- username: otel@example.com
role: Administrator
passwordRef:
name: pgadmin-password-secret
key: otel-password
dataVolumeClaimSpec:
accessModes:
- "ReadWriteOnce"
resources:
requests:
storage: 1Gi
serverGroups:
- name: supply
# An empty selector selects all postgresclusters in the Namespace
postgresClusterSelector: {}
config:
settings:
AUTHENTICATION_SOURCES: ['internal']
instrumentation: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
apiVersion: postgres-operator.crunchydata.com/v1beta1
kind: PostgresCluster
metadata:
name: otel-cluster
status:
instances:
- name: instance1
readyReplicas: 1
replicas: 1
updatedReplicas: 1
proxy:
pgBouncer:
readyReplicas: 1
replicas: 1
---
apiVersion: v1
kind: Pod
metadata:
labels:
postgres-operator.crunchydata.com/data: postgres
postgres-operator.crunchydata.com/role: master
postgres-operator.crunchydata.com/cluster: otel-cluster
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
status:
containerStatuses:
- name: collector
ready: true
started: true
- name: database
ready: true
started: true
- name: pgbackrest
ready: true
started: true
- name: pgbackrest-config
ready: true
started: true
- name: replication-cert-copy
ready: true
started: true
phase: Running
---
apiVersion: v1
kind: Pod
metadata:
labels:
postgres-operator.crunchydata.com/data: pgbackrest
postgres-operator.crunchydata.com/cluster: otel-cluster
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
status:
containerStatuses:
- name: collector
ready: true
started: true
- name: pgbackrest
ready: true
started: true
- name: pgbackrest-config
ready: true
started: true
phase: Running
---
apiVersion: v1
kind: Pod
metadata:
labels:
postgres-operator.crunchydata.com/role: pgbouncer
postgres-operator.crunchydata.com/cluster: otel-cluster
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
status:
containerStatuses:
- name: collector
ready: true
started: true
- name: pgbouncer
ready: true
started: true
- name: pgbouncer-config
ready: true
started: true
phase: Running
---
apiVersion: v1
kind: Service
metadata:
name: otel-cluster-primary
---
apiVersion: v1
kind: ConfigMap
metadata:
labels:
postgres-operator.crunchydata.com/role: pgadmin
postgres-operator.crunchydata.com/pgadmin: otel-pgadmin
---
apiVersion: v1
kind: Pod
metadata:
labels:
postgres-operator.crunchydata.com/data: pgadmin
postgres-operator.crunchydata.com/role: pgadmin
postgres-operator.crunchydata.com/pgadmin: otel-pgadmin
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
status:
containerStatuses:
- name: collector
ready: true
started: true
- name: pgadmin
ready: true
started: true
phase: Running
---
apiVersion: v1
kind: Secret
metadata:
labels:
postgres-operator.crunchydata.com/role: pgadmin
postgres-operator.crunchydata.com/pgadmin: otel-pgadmin
type: Opaque
---
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ metadata:
labels:
postgres-operator.crunchydata.com/data: pgbackrest
postgres-operator.crunchydata.com/cluster: otel-cluster
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
status:
containerStatuses:
- name: collector
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ metadata:
labels:
postgres-operator.crunchydata.com/data: pgbackrest
postgres-operator.crunchydata.com/cluster: otel-cluster
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
status:
containerStatuses:
- name: collector
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
apiVersion: postgres-operator.crunchydata.com/v1beta1
kind: PostgresCluster
metadata:
name: otel-cluster-no-backups
spec:
postgresVersion: ${KUTTL_PG_VERSION}
instances:
- name: instance1
dataVolumeClaimSpec:
accessModes:
- "ReadWriteOnce"
resources:
requests:
storage: 1Gi
instrumentation: {}
Loading
Loading