From d253f53c53b5ad2faa5b4937c2d976411a8af7b6 Mon Sep 17 00:00:00 2001 From: tony-landreth Date: Wed, 7 May 2025 11:29:02 -0400 Subject: [PATCH] Adds ccp_settings_gauge to OTel metrics This commit adds the folowing metrics: - ccp_settings_gauge_checkpoint_timeout - ccp_settings_gauge_checkpoint_completion_target - ccp_settings_gauge_shared_buffers These metrics have been supported for PGO on postgres_exporter. They are now supported for PGO on OpenTelemetry. Issue: PGO-2473 --- .../generated/postgres_5s_metrics.json | 2 +- internal/collector/postgres_5s_metrics.yaml | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/internal/collector/generated/postgres_5s_metrics.json b/internal/collector/generated/postgres_5s_metrics.json index 484c99dfa0..20e697c470 100644 --- a/internal/collector/generated/postgres_5s_metrics.json +++ b/internal/collector/generated/postgres_5s_metrics.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["application_name","datname","state","usename"],"description":"number of connections in this state","metric_name":"ccp_pg_stat_activity_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT\n pg_database.datname,\n tmp.state,\n COALESCE(tmp2.usename, '') as usename,\n COALESCE(tmp2.application_name, '') as application_name,\n COALESCE(count,0) as count,\n COALESCE(max_tx_duration,0) as max_tx_duration\nFROM\n (\n VALUES ('active'),\n ('idle'),\n ('idle in transaction'),\n ('idle in transaction (aborted)'),\n ('fastpath function call'),\n ('disabled')\n ) AS tmp(state) CROSS JOIN pg_database\nLEFT JOIN (\n SELECT\n datname,\n state,\n usename,\n application_name,\n count(*) AS count,\n MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration\n FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2\n ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname;\n"},{"metrics":[{"description":"Seconds since the last successful archive operation","metric_name":"ccp_archive_command_status_seconds_since_last_archive","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_archive","value_type":"double"}],"sql":"SELECT COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive FROM pg_catalog.pg_stat_archiver;\n"},{"metrics":[{"description":"Number of WAL files that have been successfully archived","metric_name":"ccp_archive_command_status_archived_count","static_attributes":{"server":"localhost:5432"},"value_column":"archived_count"}],"sql":"SELECT archived_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Number of failed attempts for archiving WAL files","metric_name":"ccp_archive_command_status_failed_count","static_attributes":{"server":"localhost:5432"},"value_column":"failed_count"}],"sql":"SELECT failed_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Seconds since the last recorded failure of the archive_command","metric_name":"ccp_archive_command_status_seconds_since_last_fail","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_fail"}],"sql":"SELECT CASE\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) \u003c 0 THEN 0\n ELSE EXTRACT(epoch from (last_failed_time - last_archived_time))\n END AS seconds_since_last_fail\nFROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Total non-idle connections","metric_name":"ccp_connection_stats_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"description":"Total idle connections","metric_name":"ccp_connection_stats_idle","static_attributes":{"server":"localhost:5432"},"value_column":"idle"},{"description":"Total idle in transaction connections","metric_name":"ccp_connection_stats_idle_in_txn","static_attributes":{"server":"localhost:5432"},"value_column":"idle_in_txn"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_blocked_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_blocked_query_time","value_type":"double"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_connections","static_attributes":{"server":"localhost:5432"},"value_column":"max_connections"},{"description":"Length of time in seconds of the longest idle in transaction session","metric_name":"ccp_connection_stats_max_idle_in_txn_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_idle_in_txn_time","value_type":"double"},{"description":"Length of time in seconds of the longest running query","metric_name":"ccp_connection_stats_max_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_query_time","value_type":"double"},{"description":"Total idle and non-idle connections","metric_name":"ccp_connection_stats_total","static_attributes":{"server":"localhost:5432"},"value_column":"total"}],"sql":"SELECT ((total - idle) - idle_in_txn) as active\n , total\n , idle\n , idle_in_txn\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - state_change))),0) FROM pg_catalog.pg_stat_activity WHERE state = 'idle in transaction') AS max_idle_in_txn_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND state \u003c\u003e 'idle' ) AS max_query_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND wait_event_type = 'Lock' ) AS max_blocked_query_time\n , max_connections\n FROM (\n SELECT COUNT(*) as total\n , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle\n , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x\n JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true);\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Total number of checksum failures on this database","metric_name":"ccp_data_checksum_failure_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"},{"attribute_columns":["dbname"],"description":"Time interval in seconds since the last checksum failure was encountered","metric_name":"ccp_data_checksum_failure_time_since_last_failure_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"time_since_last_failure_seconds","value_type":"double"}],"sql":"SELECT datname AS dbname , checksum_failures AS count , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds FROM pg_catalog.pg_stat_database WHERE pg_stat_database.datname IS NOT NULL;\n"},{"metrics":[{"attribute_columns":["dbname","mode"],"description":"Number of locks per mode type","metric_name":"ccp_locks_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT pg_database.datname as dbname , tmp.mode , COALESCE(count,0) as count FROM (\n VALUES ('accesssharelock'),\n ('rowsharelock'),\n ('rowexclusivelock'),\n ('shareupdateexclusivelock'),\n ('sharelock'),\n ('sharerowexclusivelock'),\n ('exclusivelock'),\n ('accessexclusivelock')\n) AS tmp(mode) CROSS JOIN pg_catalog.pg_database LEFT JOIN\n (SELECT database, lower(mode) AS mode,count(*) AS count\n FROM pg_catalog.pg_locks WHERE database IS NOT NULL\n GROUP BY database, lower(mode)\n) AS tmp2 ON tmp.mode=tmp2.mode and pg_database.oid = tmp2.database;\n"},{"metrics":[{"description":"CPU limit value in milli cores","metric_name":"ccp_nodemx_cpu_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"CPU request value in milli cores","metric_name":"ccp_nodemx_cpu_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"}],"sql":"SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request , monitor.kdapi_scalar_bigint('cpu_limit') AS limit\n"},{"metrics":[{"description":"CPU usage in nanoseconds","metric_name":"ccp_nodemx_cpuacct_usage","static_attributes":{"server":"localhost:5432"},"value_column":"usage","value_type":"double"},{"description":"CPU usage snapshot timestamp","metric_name":"ccp_nodemx_cpuacct_usage_ts","static_attributes":{"server":"localhost:5432"},"value_column":"usage_ts","value_type":"double"}],"sql":"SELECT CASE WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('cpuacct.usage')\n ELSE (SELECT val FROM monitor.cgroup_setof_kv('cpu.stat') where key = 'usage_usec') * 1000\n END AS usage,\n extract(epoch from clock_timestamp()) AS usage_ts;\n"},{"metrics":[{"description":"The total available run-time within a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_period_us","static_attributes":{"server":"localhost:5432"},"value_column":"period_us"},{"description":"The length of a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_quota_us","static_attributes":{"server":"localhost:5432"},"value_column":"quota_us","value_type":"double"}],"sql":"SELECT\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n monitor.cgroup_scalar_bigint('cpu.cfs_period_us')\n ELSE\n (monitor.cgroup_array_bigint('cpu.max'))[2]\n END AS period_us,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n GREATEST(monitor.cgroup_scalar_bigint('cpu.cfs_quota_us'), 0)\n ELSE\n GREATEST((monitor.cgroup_array_bigint('cpu.max'))[1], 0)\n END AS quota_us;\n"},{"metrics":[{"description":"Number of periods that any thread was runnable","metric_name":"ccp_nodemx_cpustat_nr_periods","static_attributes":{"server":"localhost:5432"},"value_column":"nr_periods","value_type":"double"},{"description":"Number of runnable periods in which the application used its entire quota and was throttled","metric_name":"ccp_nodemx_cpustat_nr_throttled","static_attributes":{"server":"localhost:5432"},"value_column":"nr_throttled"},{"description":"CPU stat snapshot timestamp","metric_name":"ccp_nodemx_cpustat_snap_ts","static_attributes":{"server":"localhost:5432"},"value_column":"snap_ts","value_type":"double"},{"description":"Sum total amount of time individual threads within the monitor.cgroup were throttled","metric_name":"ccp_nodemx_cpustat_throttled_time","static_attributes":{"server":"localhost:5432"},"value_column":"throttled_time","value_type":"double"}],"sql":"WITH d(key, val) AS (select key, val from monitor.cgroup_setof_kv('cpu.stat')) SELECT\n (SELECT val FROM d WHERE key='nr_periods') AS nr_periods,\n (SELECT val FROM d WHERE key='nr_throttled') AS nr_throttled,\n (SELECT val FROM d WHERE key='throttled_usec') AS throttled_time,\n extract(epoch from clock_timestamp()) as snap_ts;\n"},{"metrics":[{"attribute_columns":["fs_type","mount_point"],"description":"Available size in bytes","metric_name":"ccp_nodemx_data_disk_available_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"available_bytes","value_type":"double"},{"attribute_columns":["fs_type","mount_point"],"description":"Available file nodes","metric_name":"ccp_nodemx_data_disk_free_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"free_file_nodes"},{"attribute_columns":["fs_type","mount_point"],"description":"Size in bytes","metric_name":"ccp_nodemx_data_disk_total_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_bytes"},{"attribute_columns":["fs_type","mount_point"],"description":"Total file nodes","metric_name":"ccp_nodemx_data_disk_total_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"total_file_nodes"}],"sql":"SELECT mount_point,fs_type,total_bytes,available_bytes,total_file_nodes,free_file_nodes\n FROM monitor.proc_mountinfo() m\n JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%'\n"},{"metrics":[{"attribute_columns":["mount_point"],"description":"Total sectors read","metric_name":"ccp_nodemx_disk_activity_sectors_read","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_read"},{"attribute_columns":["mount_point"],"description":"Total sectors written","metric_name":"ccp_nodemx_disk_activity_sectors_written","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_written"}],"sql":"SELECT mount_point,sectors_read,sectors_written\n FROM monitor.proc_mountinfo() m\n JOIN monitor.proc_diskstats() d USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%';\n"},{"metrics":[{"description":"Total bytes of anonymous and swap cache memory on active LRU list","metric_name":"ccp_nodemx_mem_active_anon","static_attributes":{"server":"localhost:5432"},"value_column":"active_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on active LRU list","metric_name":"ccp_nodemx_mem_active_file","static_attributes":{"server":"localhost:5432"},"value_column":"active_file","value_type":"double"},{"description":"Total bytes of page cache memory","metric_name":"ccp_nodemx_mem_cache","static_attributes":{"server":"localhost:5432"},"value_column":"cache","value_type":"double"},{"description":"Total bytes that are waiting to get written back to the disk","metric_name":"ccp_nodemx_mem_dirty","static_attributes":{"server":"localhost:5432"},"value_column":"dirty"},{"description":"Total bytes of anonymous and swap cache memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_anon","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_file","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_file","value_type":"double"},{"description":"Unknown metric from ccp_nodemx_mem","metric_name":"ccp_nodemx_mem_kmem_usage_in_byte","static_attributes":{"server":"localhost:5432"},"value_column":"kmem_usage_in_byte"},{"description":"Memory limit value in bytes","metric_name":"ccp_nodemx_mem_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"Total bytes of mapped file (includes tmpfs/shmem)","metric_name":"ccp_nodemx_mem_mapped_file","static_attributes":{"server":"localhost:5432"},"value_column":"mapped_file"},{"description":"Memory request value in bytes","metric_name":"ccp_nodemx_mem_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"},{"description":"Total bytes of anonymous and swap cache memory","metric_name":"ccp_nodemx_mem_rss","static_attributes":{"server":"localhost:5432"},"value_column":"rss","value_type":"double"},{"description":"Total bytes of shared memory","metric_name":"ccp_nodemx_mem_shmem","static_attributes":{"server":"localhost:5432"},"value_column":"shmem","value_type":"double"},{"description":"Total usage in bytes","metric_name":"ccp_nodemx_mem_usage_in_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"usage_in_bytes"}],"sql":"WITH d(key, val) as (SELECT key, val FROM monitor.cgroup_setof_kv('memory.stat')) SELECT\n monitor.kdapi_scalar_bigint('mem_request') AS request,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.limit_in_bytes') = 9223372036854771712 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.limit_in_bytes') END)\n ELSE\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.max') = 9223372036854775807 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.max') END)\n END AS limit,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='cache')\n ELSE 0\n END as cache,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='rss')\n ELSE 0\n END as RSS,\n (SELECT val FROM d WHERE key='shmem') as shmem,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='mapped_file')\n ELSE 0\n END as mapped_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='dirty')\n ELSE (SELECT val FROM d WHERE key='file_dirty')\n END as dirty,\n (SELECT val FROM d WHERE key='active_anon') as active_anon,\n (SELECT val FROM d WHERE key='inactive_anon') as inactive_anon,\n (SELECT val FROM d WHERE key='active_file') as active_file,\n (SELECT val FROM d WHERE key='inactive_file') as inactive_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.usage_in_bytes')\n ELSE monitor.cgroup_scalar_bigint('memory.current')\n END as usage_in_bytes,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.kmem.usage_in_bytes')\n ELSE 0\n END as kmem_usage_in_byte;\n"},{"metrics":[{"attribute_columns":["interface"],"description":"Number of bytes received","metric_name":"ccp_nodemx_network_rx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"rx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets received","metric_name":"ccp_nodemx_network_rx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"rx_packets"},{"attribute_columns":["interface"],"description":"Number of bytes transmitted","metric_name":"ccp_nodemx_network_tx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"tx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets transmitted","metric_name":"ccp_nodemx_network_tx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"tx_packets"}],"sql":"SELECT interface\n ,tx_bytes\n ,tx_packets\n ,rx_bytes\n ,rx_packets from monitor.proc_network_stats()\n"},{"metrics":[{"description":"Total number of database processes","metric_name":"ccp_nodemx_process_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT monitor.cgroup_process_count() as count;\n"},{"metrics":[{"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_reset_time","static_attributes":{"server":"localhost:5432"},"value_column":"time"}],"sql":"SELECT monitor.pg_stat_statements_reset_info(-1) as time;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Average query runtime in milliseconds","metric_name":"ccp_pg_stat_statements_top_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"top_mean_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max(monitor.mean_exec_time) AS top_mean_exec_time_ms\nFROM monitor GROUP BY 1,2,3,4 ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","role"],"description":"Total number of queries run per user/database","metric_name":"ccp_pg_stat_statements_total_calls_count","static_attributes":{"server":"localhost:5432"},"value_column":"calls_count","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"mean_exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total rows returned from all queries per user/database","metric_name":"ccp_pg_stat_statements_total_row_count","static_attributes":{"server":"localhost:5432"},"value_column":"row_count","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.calls\n , s.total_exec_time\n , s.mean_exec_time\n , s.rows\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , sum(calls) AS calls_count\n , sum(total_exec_time) AS exec_time_ms\n , avg(mean_exec_time) AS mean_exec_time_ms\n , sum(rows) AS row_count\nFROM monitor GROUP BY 1,2;\n"},{"metrics":[{"description":"The current version of PostgreSQL that this exporter is running on as a 6 digit integer (######).","metric_name":"ccp_postgresql_version_current","static_attributes":{"server":"localhost:5432"},"value_column":"current"}],"sql":"SELECT current_setting('server_version_num')::int AS current;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_postmaster_uptime_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"seconds","value_type":"double"}],"sql":"SELECT extract(epoch from (clock_timestamp() - pg_postmaster_start_time() )) AS seconds;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_replication_lag_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"}],"sql":"SELECT * FROM get_replication_lag();\n"},{"metrics":[{"description":"Return value of 1 means database is in recovery. Otherwise 2 it is a primary","metric_name":"ccp_is_in_recovery_status","static_attributes":{"server":"localhost:5432"},"value_column":"status","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last WAL file was received and replayed on replica.\nAlways increases, possibly causing false positives if the primary stops writing.\nMonitors for replicas that stop receiving WAL all together.\n","metric_name":"ccp_replication_lag_received_time","static_attributes":{"server":"localhost:5432"},"value_column":"received_time","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last transaction was replayed on replica.\nReturns zero if last WAL received equals last WAL replayed. Avoids\nfalse positives when primary stops writing. Monitors for replicas that\ncannot keep up with primary WAL generation.\n","metric_name":"ccp_replication_lag_replay_time","static_attributes":{"server":"localhost:5432"},"value_column":"replay_time","value_type":"double"}],"sql":"SELECT\n COALESCE(\n CASE\n WHEN (pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) OR (pg_is_in_recovery() = false) THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS replay_time,\n COALESCE(\n CASE\n WHEN pg_is_in_recovery() = false THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS received_time,\n CASE\n WHEN pg_is_in_recovery() = true THEN 'replica'\n ELSE 'primary'\n END AS role,\n CASE\n WHEN pg_is_in_recovery() = true THEN 1\n ELSE 2\n END AS status;\n"},{"metrics":[{"description":"Number of settings from pg_settings catalog in a pending_restart state","metric_name":"ccp_settings_pending_restart_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM pg_catalog.pg_settings WHERE pending_restart = true;\n"},{"metrics":[{"description":"Number of buffers allocated","metric_name":"ccp_stat_bgwriter_buffers_alloc","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_alloc"},{"data_type":"sum","description":"Number of buffers written by the background writer","metric_name":"ccp_stat_bgwriter_buffers_clean","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_clean"},{"description":"Number of times the background writer stopped a cleaning scan because it had written too many buffers","metric_name":"ccp_stat_bgwriter_maxwritten_clean","static_attributes":{"server":"localhost:5432"},"value_column":"maxwritten_clean"}],"sql":"SELECT\n buffers_clean\n , maxwritten_clean\n , buffers_alloc\nFROM pg_catalog.pg_stat_bgwriter;\n"},{"metrics":[{"description":"Oldest current transaction ID in cluster","metric_name":"ccp_transaction_wraparound_oldest_current_xid","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_current_xid"},{"description":"Percentage towards emergency autovacuum process starting","metric_name":"ccp_transaction_wraparound_percent_towards_emergency_autovac","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_emergency_autovac"},{"description":"Percentage towards transaction ID wraparound","metric_name":"ccp_transaction_wraparound_percent_towards_wraparound","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_wraparound"}],"sql":"WITH max_age AS (\n SELECT 2000000000 as max_old_xid\n , setting AS autovacuum_freeze_max_age\n FROM pg_catalog.pg_settings\n WHERE name = 'autovacuum_freeze_max_age')\n, per_database_stats AS (\n SELECT datname\n , m.max_old_xid::int\n , m.autovacuum_freeze_max_age::int\n , age(d.datfrozenxid) AS oldest_current_xid\n FROM pg_catalog.pg_database d\n JOIN max_age m ON (true)\n WHERE d.datallowconn)\nSELECT max(oldest_current_xid) AS oldest_current_xid , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound , max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac FROM per_database_stats;\n"},{"metrics":[{"description":"Current size in bytes of the WAL directory","metric_name":"ccp_wal_activity_total_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_size_bytes"}],"sql":"SELECT last_5_min_size_bytes,\n (SELECT COALESCE(sum(size),0) FROM pg_catalog.pg_ls_waldir()) AS total_size_bytes\n FROM (SELECT COALESCE(sum(size),0) AS last_5_min_size_bytes FROM pg_catalog.pg_ls_waldir() WHERE modification \u003e CURRENT_TIMESTAMP - '5 minutes'::interval) x;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_top_max_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"max_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total time spent in the statement in milliseconds","metric_name":"ccp_pg_stat_statements_top_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"total_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , total_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total amount of WAL generated by the statement in bytes","metric_name":"ccp_pg_stat_statements_top_wal_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL full page images generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_fpi","static_attributes":{"server":"localhost:5432"},"value_column":"fpi","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL records generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_records","static_attributes":{"server":"localhost:5432"},"value_column":"records","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , query\n , queryid\n , records\n , fpi\n , bytes\nFROM monitor ORDER BY bytes DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["repo"],"description":"Seconds since the last completed full or differential backup. Differential is always based off last full.","metric_name":"ccp_backrest_last_diff_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_diff_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full backup","metric_name":"ccp_backrest_last_full_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_full_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full, differential or incremental backup.\nIncremental is always based off last full or differential.\n","metric_name":"ccp_backrest_last_incr_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_incr_backup"},{"attribute_columns":["backup_type","repo"],"description":"pgBackRest version number when this backup was performed","metric_name":"ccp_backrest_last_info_backrest_repo_version","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backrest_repo_version"},{"attribute_columns":["backup_type","repo"],"description":"An error has been encountered in the backup. Check logs for more information.","metric_name":"ccp_backrest_last_info_backup_error","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backup_error"},{"attribute_columns":["backup_type","repo"],"description":"Total runtime in seconds of this backup","metric_name":"ccp_backrest_last_info_backup_runtime_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"backup_runtime_seconds"},{"attribute_columns":["backup_type","repo"],"description":"Actual size of only this individual backup in the pgbackrest repository","metric_name":"ccp_backrest_last_info_repo_backup_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_backup_size_bytes"},{"attribute_columns":["backup_type","repo"],"description":"Total size of this backup in the pgbackrest repository, including all required previous backups and WAL","metric_name":"ccp_backrest_last_info_repo_total_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_total_size_bytes"},{"attribute_columns":["repo"],"description":"Seconds since the oldest completed full backup","metric_name":"ccp_backrest_oldest_full_backup_time_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_full_backup"}],"sql":"SELECT * FROM get_pgbackrest_info();\n"}] +[{"metrics":[{"attribute_columns":["application_name","datname","state","usename"],"description":"number of connections in this state","metric_name":"ccp_pg_stat_activity_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT\n pg_database.datname,\n tmp.state,\n COALESCE(tmp2.usename, '') as usename,\n COALESCE(tmp2.application_name, '') as application_name,\n COALESCE(count,0) as count,\n COALESCE(max_tx_duration,0) as max_tx_duration\nFROM\n (\n VALUES ('active'),\n ('idle'),\n ('idle in transaction'),\n ('idle in transaction (aborted)'),\n ('fastpath function call'),\n ('disabled')\n ) AS tmp(state) CROSS JOIN pg_database\nLEFT JOIN (\n SELECT\n datname,\n state,\n usename,\n application_name,\n count(*) AS count,\n MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration\n FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2\n ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname;\n"},{"metrics":[{"description":"checkpoint timeout in seconds","metric_name":"ccp_settings_gauge_checkpoint_timeout","static_attributes":{"server":"localhost:5432"},"value_column":"checkpoint_timeout"},{"description":"checkpoint completion target, ranging from 0 to 1","metric_name":"ccp_settings_gauge_checkpoint_completion_target","static_attributes":{"server":"localhost:5432"},"value_column":"checkpoint_completion_target","value_type":"double"},{"description":"size of shared_buffers in bytes","metric_name":"ccp_settings_gauge_shared_buffers","static_attributes":{"server":"localhost:5432"},"value_column":"shared_buffers"}],"sql":"SELECT\n (select setting::int from pg_catalog.pg_settings where name = 'checkpoint_timeout') as checkpoint_timeout,\n (select setting::float from pg_catalog.pg_settings where name = 'checkpoint_completion_target') as checkpoint_completion_target,\n (select 8192*setting::bigint as bytes from pg_catalog.pg_settings where name = 'shared_buffers') as shared_buffers\n"},{"metrics":[{"description":"Seconds since the last successful archive operation","metric_name":"ccp_archive_command_status_seconds_since_last_archive","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_archive","value_type":"double"}],"sql":"SELECT COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive FROM pg_catalog.pg_stat_archiver;\n"},{"metrics":[{"description":"Number of WAL files that have been successfully archived","metric_name":"ccp_archive_command_status_archived_count","static_attributes":{"server":"localhost:5432"},"value_column":"archived_count"}],"sql":"SELECT archived_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Number of failed attempts for archiving WAL files","metric_name":"ccp_archive_command_status_failed_count","static_attributes":{"server":"localhost:5432"},"value_column":"failed_count"}],"sql":"SELECT failed_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Seconds since the last recorded failure of the archive_command","metric_name":"ccp_archive_command_status_seconds_since_last_fail","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_fail"}],"sql":"SELECT CASE\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) \u003c 0 THEN 0\n ELSE EXTRACT(epoch from (last_failed_time - last_archived_time))\n END AS seconds_since_last_fail\nFROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Total non-idle connections","metric_name":"ccp_connection_stats_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"description":"Total idle connections","metric_name":"ccp_connection_stats_idle","static_attributes":{"server":"localhost:5432"},"value_column":"idle"},{"description":"Total idle in transaction connections","metric_name":"ccp_connection_stats_idle_in_txn","static_attributes":{"server":"localhost:5432"},"value_column":"idle_in_txn"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_blocked_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_blocked_query_time","value_type":"double"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_connections","static_attributes":{"server":"localhost:5432"},"value_column":"max_connections"},{"description":"Length of time in seconds of the longest idle in transaction session","metric_name":"ccp_connection_stats_max_idle_in_txn_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_idle_in_txn_time","value_type":"double"},{"description":"Length of time in seconds of the longest running query","metric_name":"ccp_connection_stats_max_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_query_time","value_type":"double"},{"description":"Total idle and non-idle connections","metric_name":"ccp_connection_stats_total","static_attributes":{"server":"localhost:5432"},"value_column":"total"}],"sql":"SELECT ((total - idle) - idle_in_txn) as active\n , total\n , idle\n , idle_in_txn\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - state_change))),0) FROM pg_catalog.pg_stat_activity WHERE state = 'idle in transaction') AS max_idle_in_txn_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND state \u003c\u003e 'idle' ) AS max_query_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND wait_event_type = 'Lock' ) AS max_blocked_query_time\n , max_connections\n FROM (\n SELECT COUNT(*) as total\n , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle\n , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x\n JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true);\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Total number of checksum failures on this database","metric_name":"ccp_data_checksum_failure_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"},{"attribute_columns":["dbname"],"description":"Time interval in seconds since the last checksum failure was encountered","metric_name":"ccp_data_checksum_failure_time_since_last_failure_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"time_since_last_failure_seconds","value_type":"double"}],"sql":"SELECT datname AS dbname , checksum_failures AS count , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds FROM pg_catalog.pg_stat_database WHERE pg_stat_database.datname IS NOT NULL;\n"},{"metrics":[{"attribute_columns":["dbname","mode"],"description":"Number of locks per mode type","metric_name":"ccp_locks_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT pg_database.datname as dbname , tmp.mode , COALESCE(count,0) as count FROM (\n VALUES ('accesssharelock'),\n ('rowsharelock'),\n ('rowexclusivelock'),\n ('shareupdateexclusivelock'),\n ('sharelock'),\n ('sharerowexclusivelock'),\n ('exclusivelock'),\n ('accessexclusivelock')\n) AS tmp(mode) CROSS JOIN pg_catalog.pg_database LEFT JOIN\n (SELECT database, lower(mode) AS mode,count(*) AS count\n FROM pg_catalog.pg_locks WHERE database IS NOT NULL\n GROUP BY database, lower(mode)\n) AS tmp2 ON tmp.mode=tmp2.mode and pg_database.oid = tmp2.database;\n"},{"metrics":[{"description":"CPU limit value in milli cores","metric_name":"ccp_nodemx_cpu_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"CPU request value in milli cores","metric_name":"ccp_nodemx_cpu_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"}],"sql":"SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request , monitor.kdapi_scalar_bigint('cpu_limit') AS limit\n"},{"metrics":[{"description":"CPU usage in nanoseconds","metric_name":"ccp_nodemx_cpuacct_usage","static_attributes":{"server":"localhost:5432"},"value_column":"usage","value_type":"double"},{"description":"CPU usage snapshot timestamp","metric_name":"ccp_nodemx_cpuacct_usage_ts","static_attributes":{"server":"localhost:5432"},"value_column":"usage_ts","value_type":"double"}],"sql":"SELECT CASE WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('cpuacct.usage')\n ELSE (SELECT val FROM monitor.cgroup_setof_kv('cpu.stat') where key = 'usage_usec') * 1000\n END AS usage,\n extract(epoch from clock_timestamp()) AS usage_ts;\n"},{"metrics":[{"description":"The total available run-time within a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_period_us","static_attributes":{"server":"localhost:5432"},"value_column":"period_us"},{"description":"The length of a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_quota_us","static_attributes":{"server":"localhost:5432"},"value_column":"quota_us","value_type":"double"}],"sql":"SELECT\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n monitor.cgroup_scalar_bigint('cpu.cfs_period_us')\n ELSE\n (monitor.cgroup_array_bigint('cpu.max'))[2]\n END AS period_us,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n GREATEST(monitor.cgroup_scalar_bigint('cpu.cfs_quota_us'), 0)\n ELSE\n GREATEST((monitor.cgroup_array_bigint('cpu.max'))[1], 0)\n END AS quota_us;\n"},{"metrics":[{"description":"Number of periods that any thread was runnable","metric_name":"ccp_nodemx_cpustat_nr_periods","static_attributes":{"server":"localhost:5432"},"value_column":"nr_periods","value_type":"double"},{"description":"Number of runnable periods in which the application used its entire quota and was throttled","metric_name":"ccp_nodemx_cpustat_nr_throttled","static_attributes":{"server":"localhost:5432"},"value_column":"nr_throttled"},{"description":"CPU stat snapshot timestamp","metric_name":"ccp_nodemx_cpustat_snap_ts","static_attributes":{"server":"localhost:5432"},"value_column":"snap_ts","value_type":"double"},{"description":"Sum total amount of time individual threads within the monitor.cgroup were throttled","metric_name":"ccp_nodemx_cpustat_throttled_time","static_attributes":{"server":"localhost:5432"},"value_column":"throttled_time","value_type":"double"}],"sql":"WITH d(key, val) AS (select key, val from monitor.cgroup_setof_kv('cpu.stat')) SELECT\n (SELECT val FROM d WHERE key='nr_periods') AS nr_periods,\n (SELECT val FROM d WHERE key='nr_throttled') AS nr_throttled,\n (SELECT val FROM d WHERE key='throttled_usec') AS throttled_time,\n extract(epoch from clock_timestamp()) as snap_ts;\n"},{"metrics":[{"attribute_columns":["fs_type","mount_point"],"description":"Available size in bytes","metric_name":"ccp_nodemx_data_disk_available_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"available_bytes","value_type":"double"},{"attribute_columns":["fs_type","mount_point"],"description":"Available file nodes","metric_name":"ccp_nodemx_data_disk_free_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"free_file_nodes"},{"attribute_columns":["fs_type","mount_point"],"description":"Size in bytes","metric_name":"ccp_nodemx_data_disk_total_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_bytes"},{"attribute_columns":["fs_type","mount_point"],"description":"Total file nodes","metric_name":"ccp_nodemx_data_disk_total_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"total_file_nodes"}],"sql":"SELECT mount_point,fs_type,total_bytes,available_bytes,total_file_nodes,free_file_nodes\n FROM monitor.proc_mountinfo() m\n JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%'\n"},{"metrics":[{"attribute_columns":["mount_point"],"description":"Total sectors read","metric_name":"ccp_nodemx_disk_activity_sectors_read","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_read"},{"attribute_columns":["mount_point"],"description":"Total sectors written","metric_name":"ccp_nodemx_disk_activity_sectors_written","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_written"}],"sql":"SELECT mount_point,sectors_read,sectors_written\n FROM monitor.proc_mountinfo() m\n JOIN monitor.proc_diskstats() d USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%';\n"},{"metrics":[{"description":"Total bytes of anonymous and swap cache memory on active LRU list","metric_name":"ccp_nodemx_mem_active_anon","static_attributes":{"server":"localhost:5432"},"value_column":"active_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on active LRU list","metric_name":"ccp_nodemx_mem_active_file","static_attributes":{"server":"localhost:5432"},"value_column":"active_file","value_type":"double"},{"description":"Total bytes of page cache memory","metric_name":"ccp_nodemx_mem_cache","static_attributes":{"server":"localhost:5432"},"value_column":"cache","value_type":"double"},{"description":"Total bytes that are waiting to get written back to the disk","metric_name":"ccp_nodemx_mem_dirty","static_attributes":{"server":"localhost:5432"},"value_column":"dirty"},{"description":"Total bytes of anonymous and swap cache memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_anon","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_file","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_file","value_type":"double"},{"description":"Unknown metric from ccp_nodemx_mem","metric_name":"ccp_nodemx_mem_kmem_usage_in_byte","static_attributes":{"server":"localhost:5432"},"value_column":"kmem_usage_in_byte"},{"description":"Memory limit value in bytes","metric_name":"ccp_nodemx_mem_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"Total bytes of mapped file (includes tmpfs/shmem)","metric_name":"ccp_nodemx_mem_mapped_file","static_attributes":{"server":"localhost:5432"},"value_column":"mapped_file"},{"description":"Memory request value in bytes","metric_name":"ccp_nodemx_mem_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"},{"description":"Total bytes of anonymous and swap cache memory","metric_name":"ccp_nodemx_mem_rss","static_attributes":{"server":"localhost:5432"},"value_column":"rss","value_type":"double"},{"description":"Total bytes of shared memory","metric_name":"ccp_nodemx_mem_shmem","static_attributes":{"server":"localhost:5432"},"value_column":"shmem","value_type":"double"},{"description":"Total usage in bytes","metric_name":"ccp_nodemx_mem_usage_in_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"usage_in_bytes"}],"sql":"WITH d(key, val) as (SELECT key, val FROM monitor.cgroup_setof_kv('memory.stat')) SELECT\n monitor.kdapi_scalar_bigint('mem_request') AS request,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.limit_in_bytes') = 9223372036854771712 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.limit_in_bytes') END)\n ELSE\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.max') = 9223372036854775807 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.max') END)\n END AS limit,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='cache')\n ELSE 0\n END as cache,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='rss')\n ELSE 0\n END as RSS,\n (SELECT val FROM d WHERE key='shmem') as shmem,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='mapped_file')\n ELSE 0\n END as mapped_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='dirty')\n ELSE (SELECT val FROM d WHERE key='file_dirty')\n END as dirty,\n (SELECT val FROM d WHERE key='active_anon') as active_anon,\n (SELECT val FROM d WHERE key='inactive_anon') as inactive_anon,\n (SELECT val FROM d WHERE key='active_file') as active_file,\n (SELECT val FROM d WHERE key='inactive_file') as inactive_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.usage_in_bytes')\n ELSE monitor.cgroup_scalar_bigint('memory.current')\n END as usage_in_bytes,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.kmem.usage_in_bytes')\n ELSE 0\n END as kmem_usage_in_byte;\n"},{"metrics":[{"attribute_columns":["interface"],"description":"Number of bytes received","metric_name":"ccp_nodemx_network_rx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"rx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets received","metric_name":"ccp_nodemx_network_rx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"rx_packets"},{"attribute_columns":["interface"],"description":"Number of bytes transmitted","metric_name":"ccp_nodemx_network_tx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"tx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets transmitted","metric_name":"ccp_nodemx_network_tx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"tx_packets"}],"sql":"SELECT interface\n ,tx_bytes\n ,tx_packets\n ,rx_bytes\n ,rx_packets from monitor.proc_network_stats()\n"},{"metrics":[{"description":"Total number of database processes","metric_name":"ccp_nodemx_process_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT monitor.cgroup_process_count() as count;\n"},{"metrics":[{"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_reset_time","static_attributes":{"server":"localhost:5432"},"value_column":"time"}],"sql":"SELECT monitor.pg_stat_statements_reset_info(-1) as time;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Average query runtime in milliseconds","metric_name":"ccp_pg_stat_statements_top_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"top_mean_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max(monitor.mean_exec_time) AS top_mean_exec_time_ms\nFROM monitor GROUP BY 1,2,3,4 ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","role"],"description":"Total number of queries run per user/database","metric_name":"ccp_pg_stat_statements_total_calls_count","static_attributes":{"server":"localhost:5432"},"value_column":"calls_count","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"mean_exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total rows returned from all queries per user/database","metric_name":"ccp_pg_stat_statements_total_row_count","static_attributes":{"server":"localhost:5432"},"value_column":"row_count","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.calls\n , s.total_exec_time\n , s.mean_exec_time\n , s.rows\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , sum(calls) AS calls_count\n , sum(total_exec_time) AS exec_time_ms\n , avg(mean_exec_time) AS mean_exec_time_ms\n , sum(rows) AS row_count\nFROM monitor GROUP BY 1,2;\n"},{"metrics":[{"description":"The current version of PostgreSQL that this exporter is running on as a 6 digit integer (######).","metric_name":"ccp_postgresql_version_current","static_attributes":{"server":"localhost:5432"},"value_column":"current"}],"sql":"SELECT current_setting('server_version_num')::int AS current;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_postmaster_uptime_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"seconds","value_type":"double"}],"sql":"SELECT extract(epoch from (clock_timestamp() - pg_postmaster_start_time() )) AS seconds;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_replication_lag_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"}],"sql":"SELECT * FROM get_replication_lag();\n"},{"metrics":[{"description":"Return value of 1 means database is in recovery. Otherwise 2 it is a primary","metric_name":"ccp_is_in_recovery_status","static_attributes":{"server":"localhost:5432"},"value_column":"status","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last WAL file was received and replayed on replica.\nAlways increases, possibly causing false positives if the primary stops writing.\nMonitors for replicas that stop receiving WAL all together.\n","metric_name":"ccp_replication_lag_received_time","static_attributes":{"server":"localhost:5432"},"value_column":"received_time","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last transaction was replayed on replica.\nReturns zero if last WAL received equals last WAL replayed. Avoids\nfalse positives when primary stops writing. Monitors for replicas that\ncannot keep up with primary WAL generation.\n","metric_name":"ccp_replication_lag_replay_time","static_attributes":{"server":"localhost:5432"},"value_column":"replay_time","value_type":"double"}],"sql":"SELECT\n COALESCE(\n CASE\n WHEN (pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) OR (pg_is_in_recovery() = false) THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS replay_time,\n COALESCE(\n CASE\n WHEN pg_is_in_recovery() = false THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS received_time,\n CASE\n WHEN pg_is_in_recovery() = true THEN 'replica'\n ELSE 'primary'\n END AS role,\n CASE\n WHEN pg_is_in_recovery() = true THEN 1\n ELSE 2\n END AS status;\n"},{"metrics":[{"description":"Number of settings from pg_settings catalog in a pending_restart state","metric_name":"ccp_settings_pending_restart_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM pg_catalog.pg_settings WHERE pending_restart = true;\n"},{"metrics":[{"description":"Number of buffers allocated","metric_name":"ccp_stat_bgwriter_buffers_alloc","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_alloc"},{"data_type":"sum","description":"Number of buffers written by the background writer","metric_name":"ccp_stat_bgwriter_buffers_clean","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_clean"},{"description":"Number of times the background writer stopped a cleaning scan because it had written too many buffers","metric_name":"ccp_stat_bgwriter_maxwritten_clean","static_attributes":{"server":"localhost:5432"},"value_column":"maxwritten_clean"}],"sql":"SELECT\n buffers_clean\n , maxwritten_clean\n , buffers_alloc\nFROM pg_catalog.pg_stat_bgwriter;\n"},{"metrics":[{"description":"Oldest current transaction ID in cluster","metric_name":"ccp_transaction_wraparound_oldest_current_xid","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_current_xid"},{"description":"Percentage towards emergency autovacuum process starting","metric_name":"ccp_transaction_wraparound_percent_towards_emergency_autovac","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_emergency_autovac"},{"description":"Percentage towards transaction ID wraparound","metric_name":"ccp_transaction_wraparound_percent_towards_wraparound","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_wraparound"}],"sql":"WITH max_age AS (\n SELECT 2000000000 as max_old_xid\n , setting AS autovacuum_freeze_max_age\n FROM pg_catalog.pg_settings\n WHERE name = 'autovacuum_freeze_max_age')\n, per_database_stats AS (\n SELECT datname\n , m.max_old_xid::int\n , m.autovacuum_freeze_max_age::int\n , age(d.datfrozenxid) AS oldest_current_xid\n FROM pg_catalog.pg_database d\n JOIN max_age m ON (true)\n WHERE d.datallowconn)\nSELECT max(oldest_current_xid) AS oldest_current_xid , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound , max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac FROM per_database_stats;\n"},{"metrics":[{"description":"Current size in bytes of the WAL directory","metric_name":"ccp_wal_activity_total_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_size_bytes"}],"sql":"SELECT last_5_min_size_bytes,\n (SELECT COALESCE(sum(size),0) FROM pg_catalog.pg_ls_waldir()) AS total_size_bytes\n FROM (SELECT COALESCE(sum(size),0) AS last_5_min_size_bytes FROM pg_catalog.pg_ls_waldir() WHERE modification \u003e CURRENT_TIMESTAMP - '5 minutes'::interval) x;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_top_max_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"max_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total time spent in the statement in milliseconds","metric_name":"ccp_pg_stat_statements_top_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"total_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , total_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total amount of WAL generated by the statement in bytes","metric_name":"ccp_pg_stat_statements_top_wal_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL full page images generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_fpi","static_attributes":{"server":"localhost:5432"},"value_column":"fpi","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL records generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_records","static_attributes":{"server":"localhost:5432"},"value_column":"records","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , query\n , queryid\n , records\n , fpi\n , bytes\nFROM monitor ORDER BY bytes DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["repo"],"description":"Seconds since the last completed full or differential backup. Differential is always based off last full.","metric_name":"ccp_backrest_last_diff_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_diff_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full backup","metric_name":"ccp_backrest_last_full_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_full_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full, differential or incremental backup.\nIncremental is always based off last full or differential.\n","metric_name":"ccp_backrest_last_incr_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_incr_backup"},{"attribute_columns":["backup_type","repo"],"description":"pgBackRest version number when this backup was performed","metric_name":"ccp_backrest_last_info_backrest_repo_version","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backrest_repo_version"},{"attribute_columns":["backup_type","repo"],"description":"An error has been encountered in the backup. Check logs for more information.","metric_name":"ccp_backrest_last_info_backup_error","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backup_error"},{"attribute_columns":["backup_type","repo"],"description":"Total runtime in seconds of this backup","metric_name":"ccp_backrest_last_info_backup_runtime_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"backup_runtime_seconds"},{"attribute_columns":["backup_type","repo"],"description":"Actual size of only this individual backup in the pgbackrest repository","metric_name":"ccp_backrest_last_info_repo_backup_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_backup_size_bytes"},{"attribute_columns":["backup_type","repo"],"description":"Total size of this backup in the pgbackrest repository, including all required previous backups and WAL","metric_name":"ccp_backrest_last_info_repo_total_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_total_size_bytes"},{"attribute_columns":["repo"],"description":"Seconds since the oldest completed full backup","metric_name":"ccp_backrest_oldest_full_backup_time_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_full_backup"}],"sql":"SELECT * FROM get_pgbackrest_info();\n"}] diff --git a/internal/collector/postgres_5s_metrics.yaml b/internal/collector/postgres_5s_metrics.yaml index 82ab10ef3c..8ee37b0dda 100644 --- a/internal/collector/postgres_5s_metrics.yaml +++ b/internal/collector/postgres_5s_metrics.yaml @@ -41,6 +41,29 @@ static_attributes: server: "localhost:5432" + - sql: > + SELECT + (select setting::int from pg_catalog.pg_settings where name = 'checkpoint_timeout') as checkpoint_timeout, + (select setting::float from pg_catalog.pg_settings where name = 'checkpoint_completion_target') as checkpoint_completion_target, + (select 8192*setting::bigint as bytes from pg_catalog.pg_settings where name = 'shared_buffers') as shared_buffers + metrics: + - metric_name: ccp_settings_gauge_checkpoint_timeout + value_column: checkpoint_timeout + description: checkpoint timeout in seconds + static_attributes: + server: "localhost:5432" + - metric_name: ccp_settings_gauge_checkpoint_completion_target + value_column: checkpoint_completion_target + value_type: double + description: checkpoint completion target, ranging from 0 to 1 + static_attributes: + server: "localhost:5432" + - metric_name: ccp_settings_gauge_shared_buffers + value_column: shared_buffers + description: size of shared_buffers in bytes + static_attributes: + server: "localhost:5432" + - sql: > SELECT COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive