diff --git a/docs/severity.md b/docs/severity.md index 30be3a0cd..4f3433460 100644 --- a/docs/severity.md +++ b/docs/severity.md @@ -1311,9 +1311,11 @@ |Detector|Critical|Major|Minor|Warning|Info| |---|---|---|---|---|---| +|Zookeeper server-health|-|X|-|-|-| +|Zookeeper cluster-health|X|-|-|-|-| +|Zookeeper server-latency|-|X|-|-|-| +|Zookeeper cluster-latency|X|-|-|-|-| |Zookeeper heartbeat|X|-|-|-|-| -|Zookeeper service health|X|-|-|-|-| -|Zookeeper latency|X|X|-|-|-| |Zookeeper file descriptors usage|X|X|-|-|-| diff --git a/modules/smart-agent_zookeeper/README.md b/modules/smart-agent_zookeeper/README.md index b4e0cbe78..3157a15cf 100644 --- a/modules/smart-agent_zookeeper/README.md +++ b/modules/smart-agent_zookeeper/README.md @@ -59,7 +59,7 @@ Note the following parameters: These 3 parameters along with all variables defined in [common-variables.tf](common-variables.tf) are common to all [modules](../) in this repository. Other variables, specific to this module, are available in -[variables.tf](variables.tf). +[variables.tf](variables.tf) and [variables-gen.tf](variables-gen.tf). In general, the default configuration "works" but all of these Terraform [variables](https://www.terraform.io/language/values/variables) make it possible to customize the detectors behavior to better fit your needs. @@ -77,9 +77,11 @@ This module creates the following SignalFx detectors which could contain one or |Detector|Critical|Major|Minor|Warning|Info| |---|---|---|---|---|---| +|Zookeeper server-health|-|X|-|-|-| +|Zookeeper cluster-health|X|-|-|-|-| +|Zookeeper server-latency|-|X|-|-|-| +|Zookeeper cluster-latency|X|-|-|-|-| |Zookeeper heartbeat|X|-|-|-|-| -|Zookeeper service health|X|-|-|-|-| -|Zookeeper latency|X|X|-|-|-| |Zookeeper file descriptors usage|X|X|-|-|-| ## How to collect required metrics? diff --git a/modules/smart-agent_zookeeper/conf/00-zookeeper-server-health.yaml b/modules/smart-agent_zookeeper/conf/00-zookeeper-server-health.yaml new file mode 100644 index 000000000..d2daca7f5 --- /dev/null +++ b/modules/smart-agent_zookeeper/conf/00-zookeeper-server-health.yaml @@ -0,0 +1,12 @@ +module: zookeeper +name: server-health +aggregation: false +signals: + signal: + metric: "gauge.zk_service_health" +rules: + major: + threshold: 1 + comparator: "!=" + description: "Zookeeper server is not running" + lasting_duration: "5m" \ No newline at end of file diff --git a/modules/smart-agent_zookeeper/conf/01-zookeeper-cluster-health.yaml b/modules/smart-agent_zookeeper/conf/01-zookeeper-cluster-health.yaml new file mode 100644 index 000000000..9eb576b9e --- /dev/null +++ b/modules/smart-agent_zookeeper/conf/01-zookeeper-cluster-health.yaml @@ -0,0 +1,12 @@ +module: zookeeper +name: cluster-health +aggregation: ".mean(by=['kubernetes_cluster'])" +signals: + signal: + metric: "gauge.zk_service_health" +rules: + critical: + threshold: 0 + comparator: "==" + description: "Zookeeper cluster is not running" + lasting_duration: "5m" \ No newline at end of file diff --git a/modules/smart-agent_zookeeper/conf/02-zookeeper-server-latency.yaml b/modules/smart-agent_zookeeper/conf/02-zookeeper-server-latency.yaml new file mode 100644 index 000000000..b729bdc6c --- /dev/null +++ b/modules/smart-agent_zookeeper/conf/02-zookeeper-server-latency.yaml @@ -0,0 +1,12 @@ +module: zookeeper +name: server-latency +aggregation: false +signals: + signal: + metric: "gauge.zk_avg_latency" +rules: + major: + threshold: 250000 + comparator: ">" + description: "Zookeeper server latency is too high" + lasting_duration: "5m" \ No newline at end of file diff --git a/modules/smart-agent_zookeeper/conf/03-zookeeper-cluster-latency.yaml b/modules/smart-agent_zookeeper/conf/03-zookeeper-cluster-latency.yaml new file mode 100644 index 000000000..e379e62aa --- /dev/null +++ b/modules/smart-agent_zookeeper/conf/03-zookeeper-cluster-latency.yaml @@ -0,0 +1,12 @@ +module: zookeeper +name: cluster-latency +aggregation: ".mean(by=['kubernetes_cluster'])" +signals: + signal: + metric: "gauge.zk_avg_latency" +rules: + critical: + threshold: 300000 + comparator: ">" + description: "Zookeeper cluster latency is too high" + lasting_duration: "5m" \ No newline at end of file diff --git a/modules/smart-agent_zookeeper/detectors-gen.tf b/modules/smart-agent_zookeeper/detectors-gen.tf new file mode 100644 index 000000000..634ac7ad1 --- /dev/null +++ b/modules/smart-agent_zookeeper/detectors-gen.tf @@ -0,0 +1,108 @@ +resource "signalfx_detector" "server-health" { + name = format("%s %s", local.detector_name_prefix, "Zookeeper server-health") + + authorized_writer_teams = var.authorized_writer_teams + teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) + tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) + + program_text = <<-EOF + signal = data('gauge.zk_service_health', filter=${module.filtering.signalflow})${var.server-health_transformation_function}.publish('signal') + detect(when(signal != ${var.server-health_threshold_major}, lasting=%{if var.server-health_lasting_duration_major == null}None%{else}'${var.server-health_lasting_duration_major}'%{endif}, at_least=${var.server-health_at_least_percentage_major})).publish('MAJOR') +EOF + + rule { + description = "Zookeeper server is not running != ${var.server-health_threshold_major}" + severity = "Major" + detect_label = "MAJOR" + disabled = coalesce(var.server-health_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.server-health_notifications, "major", []), var.notifications.major), null) + runbook_url = try(coalesce(var.server-health_runbook_url, var.runbook_url), "") + tip = var.server-health_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + + max_delay = var.server-health_max_delay +} + +resource "signalfx_detector" "cluster-health" { + name = format("%s %s", local.detector_name_prefix, "Zookeeper cluster-health") + + authorized_writer_teams = var.authorized_writer_teams + teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) + tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) + + program_text = <<-EOF + signal = data('gauge.zk_service_health', filter=${module.filtering.signalflow})${var.cluster-health_aggregation_function}${var.cluster-health_transformation_function}.publish('signal') + detect(when(signal == ${var.cluster-health_threshold_critical}, lasting=%{if var.cluster-health_lasting_duration_critical == null}None%{else}'${var.cluster-health_lasting_duration_critical}'%{endif}, at_least=${var.cluster-health_at_least_percentage_critical})).publish('CRIT') +EOF + + rule { + description = "Zookeeper cluster is not running == ${var.cluster-health_threshold_critical}" + severity = "Critical" + detect_label = "CRIT" + disabled = coalesce(var.cluster-health_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.cluster-health_notifications, "critical", []), var.notifications.critical), null) + runbook_url = try(coalesce(var.cluster-health_runbook_url, var.runbook_url), "") + tip = var.cluster-health_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + + max_delay = var.cluster-health_max_delay +} + +resource "signalfx_detector" "server-latency" { + name = format("%s %s", local.detector_name_prefix, "Zookeeper server-latency") + + authorized_writer_teams = var.authorized_writer_teams + teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) + tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) + + program_text = <<-EOF + signal = data('gauge.zk_avg_latency', filter=${module.filtering.signalflow})${var.server-latency_transformation_function}.publish('signal') + detect(when(signal > ${var.server-latency_threshold_major}, lasting=%{if var.server-latency_lasting_duration_major == null}None%{else}'${var.server-latency_lasting_duration_major}'%{endif}, at_least=${var.server-latency_at_least_percentage_major})).publish('MAJOR') +EOF + + rule { + description = "Zookeeper server latency is too high > ${var.server-latency_threshold_major}" + severity = "Major" + detect_label = "MAJOR" + disabled = coalesce(var.server-latency_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.server-latency_notifications, "major", []), var.notifications.major), null) + runbook_url = try(coalesce(var.server-latency_runbook_url, var.runbook_url), "") + tip = var.server-latency_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + + max_delay = var.server-latency_max_delay +} + +resource "signalfx_detector" "cluster-latency" { + name = format("%s %s", local.detector_name_prefix, "Zookeeper cluster-latency") + + authorized_writer_teams = var.authorized_writer_teams + teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) + tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) + + program_text = <<-EOF + signal = data('gauge.zk_avg_latency', filter=${module.filtering.signalflow})${var.cluster-latency_aggregation_function}${var.cluster-latency_transformation_function}.publish('signal') + detect(when(signal > ${var.cluster-latency_threshold_critical}, lasting=%{if var.cluster-latency_lasting_duration_critical == null}None%{else}'${var.cluster-latency_lasting_duration_critical}'%{endif}, at_least=${var.cluster-latency_at_least_percentage_critical})).publish('CRIT') +EOF + + rule { + description = "Zookeeper cluster latency is too high > ${var.cluster-latency_threshold_critical}" + severity = "Critical" + detect_label = "CRIT" + disabled = coalesce(var.cluster-latency_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.cluster-latency_notifications, "critical", []), var.notifications.critical), null) + runbook_url = try(coalesce(var.cluster-latency_runbook_url, var.runbook_url), "") + tip = var.cluster-latency_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + + max_delay = var.cluster-latency_max_delay +} + diff --git a/modules/smart-agent_zookeeper/detectors-zookeeper.tf b/modules/smart-agent_zookeeper/detectors-zookeeper.tf index 125012060..ceb386b3c 100644 --- a/modules/smart-agent_zookeeper/detectors-zookeeper.tf +++ b/modules/smart-agent_zookeeper/detectors-zookeeper.tf @@ -26,73 +26,6 @@ EOF max_delay = var.heartbeat_max_delay } -resource "signalfx_detector" "zookeeper_health" { - name = format("%s %s", local.detector_name_prefix, "Zookeeper service health") - - authorized_writer_teams = var.authorized_writer_teams - teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) - tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) - - program_text = <<-EOF - signal = data('gauge.zk_service_health', filter=filter('plugin', 'zookeeper') and ${module.filtering.signalflow})${var.zookeeper_health_aggregation_function}${var.zookeeper_health_transformation_function}.publish('signal') - detect(when(signal != 1)).publish('CRIT') -EOF - - rule { - description = "is not running" - severity = "Critical" - detect_label = "CRIT" - disabled = coalesce(var.zookeeper_health_disabled_critical, var.zookeeper_health_disabled, var.detectors_disabled) - notifications = try(coalescelist(lookup(var.zookeeper_health_notifications, "critical", []), var.notifications.critical), null) - runbook_url = try(coalesce(var.zookeeper_health_runbook_url, var.runbook_url), "") - tip = var.zookeeper_health_tip - parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject - parameterized_body = var.message_body == "" ? local.rule_body : var.message_body - } - - max_delay = var.zookeeper_health_max_delay -} - -resource "signalfx_detector" "zookeeper_latency" { - name = format("%s %s", local.detector_name_prefix, "Zookeeper latency") - - authorized_writer_teams = var.authorized_writer_teams - teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) - tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) - - program_text = <<-EOF - signal = data('gauge.zk_avg_latency', filter=filter('plugin', 'zookeeper') and ${module.filtering.signalflow})${var.zookeeper_latency_aggregation_function}${var.zookeeper_latency_transformation_function}.publish('signal') - detect(when(signal > ${var.zookeeper_latency_threshold_critical})).publish('CRIT') - detect(when(signal > ${var.zookeeper_latency_threshold_major}) and (not when(signal > ${var.zookeeper_latency_threshold_critical}))).publish('MAJOR') -EOF - - rule { - description = "is too high > ${var.zookeeper_latency_threshold_critical}" - severity = "Critical" - detect_label = "CRIT" - disabled = coalesce(var.zookeeper_latency_disabled_critical, var.zookeeper_latency_disabled, var.detectors_disabled) - notifications = try(coalescelist(lookup(var.zookeeper_latency_notifications, "critical", []), var.notifications.critical), null) - runbook_url = try(coalesce(var.zookeeper_latency_runbook_url, var.runbook_url), "") - tip = var.zookeeper_latency_tip - parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject - parameterized_body = var.message_body == "" ? local.rule_body : var.message_body - } - - rule { - description = "is too high > ${var.zookeeper_latency_threshold_major}" - severity = "Major" - detect_label = "MAJOR" - disabled = coalesce(var.zookeeper_latency_disabled_major, var.zookeeper_latency_disabled, var.detectors_disabled) - notifications = try(coalescelist(lookup(var.zookeeper_latency_notifications, "major", []), var.notifications.major), null) - runbook_url = try(coalesce(var.zookeeper_latency_runbook_url, var.runbook_url), "") - tip = var.zookeeper_latency_tip - parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject - parameterized_body = var.message_body == "" ? local.rule_body : var.message_body - } - - max_delay = var.zookeeper_latency_max_delay -} - resource "signalfx_detector" "file_descriptors" { name = format("%s %s", local.detector_name_prefix, "Zookeeper file descriptors usage") diff --git a/modules/smart-agent_zookeeper/outputs.tf b/modules/smart-agent_zookeeper/outputs.tf index e6913132e..a4b5fadd7 100644 --- a/modules/smart-agent_zookeeper/outputs.tf +++ b/modules/smart-agent_zookeeper/outputs.tf @@ -1,3 +1,13 @@ +output "cluster-health" { + description = "Detector resource for cluster-health" + value = signalfx_detector.cluster-health +} + +output "cluster-latency" { + description = "Detector resource for cluster-latency" + value = signalfx_detector.cluster-latency +} + output "file_descriptors" { description = "Detector resource for file_descriptors" value = signalfx_detector.file_descriptors @@ -8,13 +18,13 @@ output "heartbeat" { value = signalfx_detector.heartbeat } -output "zookeeper_health" { - description = "Detector resource for zookeeper_health" - value = signalfx_detector.zookeeper_health +output "server-health" { + description = "Detector resource for server-health" + value = signalfx_detector.server-health } -output "zookeeper_latency" { - description = "Detector resource for zookeeper_latency" - value = signalfx_detector.zookeeper_latency +output "server-latency" { + description = "Detector resource for server-latency" + value = signalfx_detector.server-latency } diff --git a/modules/smart-agent_zookeeper/variables-gen.tf b/modules/smart-agent_zookeeper/variables-gen.tf new file mode 100644 index 000000000..a629d304d --- /dev/null +++ b/modules/smart-agent_zookeeper/variables-gen.tf @@ -0,0 +1,232 @@ +# server-health detector + +variable "server-health_notifications" { + description = "Notification recipients list per severity overridden for server-health detector" + type = map(list(string)) + default = {} +} + +variable "server-health_transformation_function" { + description = "Transformation function for server-health detector (i.e. \".mean(over='5m')\")" + type = string + default = "" +} + +variable "server-health_max_delay" { + description = "Enforce max delay for server-health detector (use \"0\" or \"null\" for \"Auto\")" + type = number + default = null +} + +variable "server-health_tip" { + description = "Suggested first course of action or any note useful for incident handling" + type = string + default = "" +} + +variable "server-health_runbook_url" { + description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" + type = string + default = "" +} + +variable "server-health_disabled" { + description = "Disable all alerting rules for server-health detector" + type = bool + default = null +} + +variable "server-health_threshold_major" { + description = "Major threshold for server-health detector" + type = number + default = 1 +} + +variable "server-health_lasting_duration_major" { + description = "Minimum duration that conditions must be true before raising alert" + type = string + default = "5m" +} + +variable "server-health_at_least_percentage_major" { + description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" + type = number + default = 1 +} +# cluster-health detector + +variable "cluster-health_notifications" { + description = "Notification recipients list per severity overridden for cluster-health detector" + type = map(list(string)) + default = {} +} + +variable "cluster-health_aggregation_function" { + description = "Aggregation function and group by for cluster-health detector (i.e. \".mean(by=['host'])\")" + type = string + default = ".mean(by=['kubernetes_cluster'])" +} + +variable "cluster-health_transformation_function" { + description = "Transformation function for cluster-health detector (i.e. \".mean(over='5m')\")" + type = string + default = "" +} + +variable "cluster-health_max_delay" { + description = "Enforce max delay for cluster-health detector (use \"0\" or \"null\" for \"Auto\")" + type = number + default = null +} + +variable "cluster-health_tip" { + description = "Suggested first course of action or any note useful for incident handling" + type = string + default = "" +} + +variable "cluster-health_runbook_url" { + description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" + type = string + default = "" +} + +variable "cluster-health_disabled" { + description = "Disable all alerting rules for cluster-health detector" + type = bool + default = null +} + +variable "cluster-health_threshold_critical" { + description = "Critical threshold for cluster-health detector" + type = number + default = 0 +} + +variable "cluster-health_lasting_duration_critical" { + description = "Minimum duration that conditions must be true before raising alert" + type = string + default = "5m" +} + +variable "cluster-health_at_least_percentage_critical" { + description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" + type = number + default = 1 +} +# server-latency detector + +variable "server-latency_notifications" { + description = "Notification recipients list per severity overridden for server-latency detector" + type = map(list(string)) + default = {} +} + +variable "server-latency_transformation_function" { + description = "Transformation function for server-latency detector (i.e. \".mean(over='5m')\")" + type = string + default = "" +} + +variable "server-latency_max_delay" { + description = "Enforce max delay for server-latency detector (use \"0\" or \"null\" for \"Auto\")" + type = number + default = null +} + +variable "server-latency_tip" { + description = "Suggested first course of action or any note useful for incident handling" + type = string + default = "" +} + +variable "server-latency_runbook_url" { + description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" + type = string + default = "" +} + +variable "server-latency_disabled" { + description = "Disable all alerting rules for server-latency detector" + type = bool + default = null +} + +variable "server-latency_threshold_major" { + description = "Major threshold for server-latency detector" + type = number + default = 250000 +} + +variable "server-latency_lasting_duration_major" { + description = "Minimum duration that conditions must be true before raising alert" + type = string + default = "5m" +} + +variable "server-latency_at_least_percentage_major" { + description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" + type = number + default = 1 +} +# cluster-latency detector + +variable "cluster-latency_notifications" { + description = "Notification recipients list per severity overridden for cluster-latency detector" + type = map(list(string)) + default = {} +} + +variable "cluster-latency_aggregation_function" { + description = "Aggregation function and group by for cluster-latency detector (i.e. \".mean(by=['host'])\")" + type = string + default = ".mean(by=['kubernetes_cluster'])" +} + +variable "cluster-latency_transformation_function" { + description = "Transformation function for cluster-latency detector (i.e. \".mean(over='5m')\")" + type = string + default = "" +} + +variable "cluster-latency_max_delay" { + description = "Enforce max delay for cluster-latency detector (use \"0\" or \"null\" for \"Auto\")" + type = number + default = null +} + +variable "cluster-latency_tip" { + description = "Suggested first course of action or any note useful for incident handling" + type = string + default = "" +} + +variable "cluster-latency_runbook_url" { + description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" + type = string + default = "" +} + +variable "cluster-latency_disabled" { + description = "Disable all alerting rules for cluster-latency detector" + type = bool + default = null +} + +variable "cluster-latency_threshold_critical" { + description = "Critical threshold for cluster-latency detector" + type = number + default = 300000 +} + +variable "cluster-latency_lasting_duration_critical" { + description = "Minimum duration that conditions must be true before raising alert" + type = string + default = "5m" +} + +variable "cluster-latency_at_least_percentage_critical" { + description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" + type = number + default = 1 +} diff --git a/modules/smart-agent_zookeeper/variables.tf b/modules/smart-agent_zookeeper/variables.tf index 9b8032632..b2ee2670e 100644 --- a/modules/smart-agent_zookeeper/variables.tf +++ b/modules/smart-agent_zookeeper/variables.tf @@ -44,124 +44,6 @@ variable "heartbeat_aggregation_function" { default = "" } -# zookeeper_health detector - -variable "zookeeper_health_max_delay" { - description = "Enforce max delay for zookeeper_health detector (use \"0\" or \"null\" for \"Auto\")" - type = number - default = null -} - -variable "zookeeper_health_tip" { - description = "Suggested first course of action or any note useful for incident handling" - type = string - default = "" -} - -variable "zookeeper_health_runbook_url" { - description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" - type = string - default = "" -} - -variable "zookeeper_health_disabled" { - description = "Disable all alerting rules for zookeeper_health detector" - type = bool - default = null -} - -variable "zookeeper_health_disabled_critical" { - description = "Disable critical alerting rule for zookeeper_health detector" - type = bool - default = null -} - -variable "zookeeper_health_notifications" { - description = "Notification recipients list per severity overridden for zookeeper_health detector" - type = map(list(string)) - default = {} -} - -variable "zookeeper_health_aggregation_function" { - description = "Aggregation function and group by for zookeeper_health detector (i.e. \".mean(by=['host'])\")" - type = string - default = "" -} - -variable "zookeeper_health_transformation_function" { - description = "Transformation function for zookeeper_health detector (i.e. \".mean(over='5m')\")" - type = string - default = ".mean(over='5m')" -} - -# zookeeper_latency detector - -variable "zookeeper_latency_max_delay" { - description = "Enforce max delay for zookeeper_latency detector (use \"0\" or \"null\" for \"Auto\")" - type = number - default = null -} - -variable "zookeeper_latency_tip" { - description = "Suggested first course of action or any note useful for incident handling" - type = string - default = "" -} - -variable "zookeeper_latency_runbook_url" { - description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" - type = string - default = "" -} - -variable "zookeeper_latency_disabled" { - description = "Disable all alerting rules for zookeeper_latency detector" - type = bool - default = null -} - -variable "zookeeper_latency_disabled_critical" { - description = "Disable critical alerting rule for zookeeper_latency detector" - type = bool - default = null -} - -variable "zookeeper_latency_disabled_major" { - description = "Disable major alerting rule for zookeeper_latency detector" - type = bool - default = null -} - -variable "zookeeper_latency_notifications" { - description = "Notification recipients list per severity overridden for zookeeper_latency detector" - type = map(list(string)) - default = {} -} - -variable "zookeeper_latency_aggregation_function" { - description = "Aggregation function and group by for zookeeper_latency detector (i.e. \".mean(by=['host'])\")" - type = string - default = "" -} - -variable "zookeeper_latency_transformation_function" { - description = "Transformation function for zookeeper_latency detector (i.e. \".mean(over='5m')\")" - type = string - default = ".mean(over='5m')" -} - -variable "zookeeper_latency_threshold_critical" { - description = "Critical threshold for zookeeper_latency detector" - type = number - default = 300000 -} - -variable "zookeeper_latency_threshold_major" { - description = "Major threshold for zookeeper_latency detector" - type = number - default = 250000 -} - # file_descriptors detector variable "file_descriptors_max_delay" {