From 219c2c14b94dc130e78ced86da10a36a87d0b986 Mon Sep 17 00:00:00 2001 From: Kevin Date: Thu, 26 Sep 2024 13:38:15 -0400 Subject: [PATCH 01/10] docs updates --- README.md | 26 ++++++++++++++ aws/alb/README.md | 2 +- aws/apigateway/README.md | 2 +- aws/beanstalk/README.md | 2 +- aws/ec2/README.md | 18 +++------- aws/ecs-cluster/README.md | 2 +- aws/ecs-fargate/README.md | 2 +- aws/ecs-service/README.md | 2 +- aws/elasticache/README.md | 2 +- aws/elasticsearch/README.md | 4 +-- aws/elb/README.md | 19 ++++++----- aws/lambda/README.md | 67 +++++++++++++++++++++++-------------- aws/rds/README.md | 61 +++++++++++++++++++-------------- aws/sqs/README.md | 39 ++++++++------------- aws/vpn/README.md | 31 ++++------------- 15 files changed, 148 insertions(+), 131 deletions(-) diff --git a/README.md b/README.md index 62593d8..3c31d8a 100644 --- a/README.md +++ b/README.md @@ -27,3 +27,29 @@ module "monitor" { ``` ## About + + +## Requirements + +No requirements. + +## Providers + +No providers. + +## Modules + +No modules. + +## Resources + +No resources. + +## Inputs + +No inputs. + +## Outputs + +No outputs. + \ No newline at end of file diff --git a/aws/alb/README.md b/aws/alb/README.md index ec3899f..29522da 100644 --- a/aws/alb/README.md +++ b/aws/alb/README.md @@ -20,7 +20,7 @@ Configures the following for ALBs based on tags matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules diff --git a/aws/apigateway/README.md b/aws/apigateway/README.md index aaaf2f8..2596f1f 100644 --- a/aws/apigateway/README.md +++ b/aws/apigateway/README.md @@ -18,7 +18,7 @@ Configures the following for APIs based on tags matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules diff --git a/aws/beanstalk/README.md b/aws/beanstalk/README.md index 007fd00..b2527a1 100644 --- a/aws/beanstalk/README.md +++ b/aws/beanstalk/README.md @@ -20,7 +20,7 @@ Configures the following for Beanstalk environments based on tags matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules diff --git a/aws/ec2/README.md b/aws/ec2/README.md index 9feda50..0051be3 100644 --- a/aws/ec2/README.md +++ b/aws/ec2/README.md @@ -17,7 +17,7 @@ All checks are enabled by default. | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules @@ -57,26 +57,18 @@ No modules. | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | -| [status\_failed\_check\_enabled](#input\_status\_failed\_check\_enabled) | Enable ec2 instance status check monitor | `bool` | `false` | no | +| [status\_failed\_check\_enabled](#input\_status\_failed\_check\_enabled) | Enable ec2 instance status check monitor | `bool` | `true` | no | | [status\_failed\_check\_evaluation\_window](#input\_status\_failed\_check\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [status\_failed\_check\_no\_data\_window](#input\_status\_failed\_check\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [status\_failed\_check\_threshold\_critical](#input\_status\_failed\_check\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [status\_failed\_check\_threshold\_warning](#input\_status\_failed\_check\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [status\_failed\_instance\_enabled](#input\_status\_failed\_instance\_enabled) | Enable instance status check monitor | `bool` | `false` | no | +| [status\_failed\_instance\_enabled](#input\_status\_failed\_instance\_enabled) | Enable instance status check monitor | `bool` | `true` | no | | [status\_failed\_instance\_evaluation\_window](#input\_status\_failed\_instance\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [status\_failed\_instance\_no\_data\_window](#input\_status\_failed\_instance\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [status\_failed\_instance\_threshold\_critical](#input\_status\_failed\_instance\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [status\_failed\_instance\_threshold\_warning](#input\_status\_failed\_instance\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [status\_failed\_system\_enabled](#input\_status\_failed\_system\_enabled) | Enable instance system failure monitor | `bool` | `false` | no | +| [status\_failed\_system\_enabled](#input\_status\_failed\_system\_enabled) | Enable instance system failure monitor | `bool` | `true` | no | | [status\_failed\_system\_evaluation\_window](#input\_status\_failed\_system\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [status\_failed\_system\_no\_data\_window](#input\_status\_failed\_system\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [status\_failed\_system\_threshold\_critical](#input\_status\_failed\_system\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [status\_failed\_system\_threshold\_warning](#input\_status\_failed\_system\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [status\_failed\_volume\_enabled](#input\_status\_failed\_volume\_enabled) | Enable attached volume status monitor | `bool` | `false` | no | +| [status\_failed\_volume\_enabled](#input\_status\_failed\_volume\_enabled) | Enable attached volume status monitor | `bool` | `true` | no | | [status\_failed\_volume\_evaluation\_window](#input\_status\_failed\_volume\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [status\_failed\_volume\_no\_data\_window](#input\_status\_failed\_volume\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [status\_failed\_volume\_threshold\_critical](#input\_status\_failed\_volume\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [status\_failed\_volume\_threshold\_warning](#input\_status\_failed\_volume\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | | [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | diff --git a/aws/ecs-cluster/README.md b/aws/ecs-cluster/README.md index 479ac5c..a118a66 100644 --- a/aws/ecs-cluster/README.md +++ b/aws/ecs-cluster/README.md @@ -19,7 +19,7 @@ Configures the following for ECS clusters based on tags matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules diff --git a/aws/ecs-fargate/README.md b/aws/ecs-fargate/README.md index fc4875e..31f78c5 100644 --- a/aws/ecs-fargate/README.md +++ b/aws/ecs-fargate/README.md @@ -19,7 +19,7 @@ Configures the following for ECS Fargate tasks based on tag matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules diff --git a/aws/ecs-service/README.md b/aws/ecs-service/README.md index f11e074..2c561e9 100644 --- a/aws/ecs-service/README.md +++ b/aws/ecs-service/README.md @@ -19,7 +19,7 @@ Configures the following for ECS services based on tag matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules diff --git a/aws/elasticache/README.md b/aws/elasticache/README.md index 55933f8..7bc29d2 100644 --- a/aws/elasticache/README.md +++ b/aws/elasticache/README.md @@ -24,7 +24,7 @@ Configures the following for ElastiCache clusters based on tag matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules diff --git a/aws/elasticsearch/README.md b/aws/elasticsearch/README.md index fc6d9c4..af4b2f6 100644 --- a/aws/elasticsearch/README.md +++ b/aws/elasticsearch/README.md @@ -20,7 +20,7 @@ Configures the following for ElasticSearch domains based on tag matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules @@ -70,7 +70,7 @@ No modules. | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | -| [free\_storage\_enabled](#input\_free\_storage\_enabled) | Enable free storage monitor | `bool` | `false` | no | +| [free\_storage\_enabled](#input\_free\_storage\_enabled) | Enable free storage monitor | `bool` | `true` | no | | [free\_storage\_evaluation\_window](#input\_free\_storage\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [free\_storage\_no\_data\_window](#input\_free\_storage\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [free\_storage\_threshold\_critical](#input\_free\_storage\_threshold\_critical) | Critical threshold (GB) | `number` | `null` | no | diff --git a/aws/elb/README.md b/aws/elb/README.md index 9063d12..6a0756a 100644 --- a/aws/elb/README.md +++ b/aws/elb/README.md @@ -20,7 +20,7 @@ Configures the following for Classic ELBs based on tag matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules @@ -30,8 +30,8 @@ No modules. | Name | Type | |------|------| +| [datadog_monitor.http_5xx_backend_responses](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | | [datadog_monitor.http_5xx_responses](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.http_5xx_tg_responses](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | | [datadog_monitor.latency](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | | [datadog_monitor.no_healthy_instances](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | @@ -43,21 +43,21 @@ No modules. | [alert\_critical\_priority](#input\_alert\_critical\_priority) | Priority for alerts within critical threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | | [alert\_message](#input\_alert\_message) | Message to prepend to alert notifications | `string` | `"Alert"` | no | | [alert\_nodata\_priority](#input\_alert\_nodata\_priority) | Priority for alerts within warning threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | -| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:alb"
]
| no | +| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:lb"
]
| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | +| [http\_5xx\_backend\_responses\_enabled](#input\_http\_5xx\_backend\_responses\_enabled) | Enable HTTP 5xx response monitor (backend) | `bool` | `false` | no | +| [http\_5xx\_backend\_responses\_evaluation\_window](#input\_http\_5xx\_backend\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [http\_5xx\_backend\_responses\_no\_data\_window](#input\_http\_5xx\_backend\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [http\_5xx\_backend\_responses\_threshold\_critical](#input\_http\_5xx\_backend\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | +| [http\_5xx\_backend\_responses\_threshold\_warning](#input\_http\_5xx\_backend\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | | [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | | [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | | [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `false` | no | -| [http\_5xx\_tg\_responses\_evaluation\_window](#input\_http\_5xx\_tg\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_tg\_responses\_no\_data\_window](#input\_http\_5xx\_tg\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_tg\_responses\_threshold\_critical](#input\_http\_5xx\_tg\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_tg\_responses\_threshold\_warning](#input\_http\_5xx\_tg\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | | [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | | [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | @@ -69,7 +69,8 @@ No modules. | [no\_healthy\_instances\_enabled](#input\_no\_healthy\_instances\_enabled) | Enable no healthy instances monitor | `bool` | `true` | no | | [no\_healthy\_instances\_evaluation\_window](#input\_no\_healthy\_instances\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage, 0 to disable) | `number` | `0` | no | +| [no\_healthy\_instances\_threshold\_critical](#input\_no\_healthy\_instances\_threshold\_critical) | Warning threshold (percentage) | `number` | `0` | no | +| [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage) | `number` | `null` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | diff --git a/aws/lambda/README.md b/aws/lambda/README.md index 2393fd1..f07e953 100644 --- a/aws/lambda/README.md +++ b/aws/lambda/README.md @@ -23,7 +23,7 @@ Configures the following for Lambda functions based on tag matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules @@ -33,10 +33,13 @@ No modules. | Name | Type | |------|------| -| [datadog_monitor.http_5xx_responses](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.http_5xx_tg_responses](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.latency](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.no_healthy_instances](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.cold_starts](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.error_rate](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.iterator_age](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.iterator_age_forecast](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.out_of_memory](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.throttle_rate](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.timeouts](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | ## Inputs @@ -46,44 +49,58 @@ No modules. | [alert\_critical\_priority](#input\_alert\_critical\_priority) | Priority for alerts within critical threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | | [alert\_message](#input\_alert\_message) | Message to prepend to alert notifications | `string` | `"Alert"` | no | | [alert\_nodata\_priority](#input\_alert\_nodata\_priority) | Priority for alerts within warning threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | -| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:alb"
]
| no | +| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:lambda"
]
| no | +| [cold\_starts\_enabled](#input\_cold\_starts\_enabled) | Enable cold starts monitor (requires enhanced metrics) | `bool` | `false` | no | +| [cold\_starts\_evaluation\_window](#input\_cold\_starts\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_4h"` | no | +| [cold\_starts\_no\_data\_window](#input\_cold\_starts\_no\_data\_window) | No data threshold (in minutes, null to disable) | `number` | `null` | no | +| [cold\_starts\_threshold\_critical](#input\_cold\_starts\_threshold\_critical) | Critical threshold (count) | `number` | `null` | no | +| [cold\_starts\_threshold\_warning](#input\_cold\_starts\_threshold\_warning) | Warning threshold (count) | `number` | `null` | no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [error\_rate\_enabled](#input\_error\_rate\_enabled) | Enable Lambda error rate monitor | `bool` | `false` | no | +| [error\_rate\_evaluation\_window](#input\_error\_rate\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [error\_rate\_no\_data\_window](#input\_error\_rate\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [error\_rate\_threshold\_critical](#input\_error\_rate\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | +| [error\_rate\_threshold\_warning](#input\_error\_rate\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | -| [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | -| [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `false` | no | -| [http\_5xx\_tg\_responses\_evaluation\_window](#input\_http\_5xx\_tg\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_tg\_responses\_no\_data\_window](#input\_http\_5xx\_tg\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_tg\_responses\_threshold\_critical](#input\_http\_5xx\_tg\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_tg\_responses\_threshold\_warning](#input\_http\_5xx\_tg\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | -| [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | -| [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | +| [iterator\_age\_enabled](#input\_iterator\_age\_enabled) | Enable iterator age monitor | `bool` | `false` | no | +| [iterator\_age\_evaluation\_window](#input\_iterator\_age\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [iterator\_age\_forecast\_enabled](#input\_iterator\_age\_forecast\_enabled) | Enable iterator age monitor | `bool` | `false` | no | +| [iterator\_age\_forecast\_evaluation\_window](#input\_iterator\_age\_forecast\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1d"` | no | +| [iterator\_age\_forecast\_no\_data\_window](#input\_iterator\_age\_forecast\_no\_data\_window) | No data threshold (in minutes, null to disable) | `number` | `null` | no | +| [iterator\_age\_no\_data\_window](#input\_iterator\_age\_no\_data\_window) | No data threshold (in minutes, null to disable) | `number` | `null` | no | +| [iterator\_age\_threshold\_critical](#input\_iterator\_age\_threshold\_critical) | Critical threshold (milliseconds) | `number` | `86400000` | no | +| [iterator\_age\_threshold\_warning](#input\_iterator\_age\_threshold\_warning) | Warning threshold (milliseconds) | `number` | `null` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | -| [no\_healthy\_instances\_enabled](#input\_no\_healthy\_instances\_enabled) | Enable no healthy instances monitor | `bool` | `true` | no | -| [no\_healthy\_instances\_evaluation\_window](#input\_no\_healthy\_instances\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage, 0 to disable) | `number` | `0` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [out\_of\_memory\_enabled](#input\_out\_of\_memory\_enabled) | Enable out of memory monitor (requires enhanced metrics) | `bool` | `false` | no | +| [out\_of\_memory\_evaluation\_window](#input\_out\_of\_memory\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_4h"` | no | +| [out\_of\_memory\_no\_data\_window](#input\_out\_of\_memory\_no\_data\_window) | No data threshold (in minutes, null to disable) | `number` | `null` | no | +| [out\_of\_memory\_threshold\_critical](#input\_out\_of\_memory\_threshold\_critical) | Critical threshold (count) | `number` | `null` | no | +| [out\_of\_memory\_threshold\_warning](#input\_out\_of\_memory\_threshold\_warning) | Warning threshold (count) | `number` | `null` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [throttle\_rate\_enabled](#input\_throttle\_rate\_enabled) | Enable Lambda throttle rate monitor | `bool` | `false` | no | +| [throttle\_rate\_evaluation\_window](#input\_throttle\_rate\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [throttle\_rate\_no\_data\_window](#input\_throttle\_rate\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [throttle\_rate\_threshold\_critical](#input\_throttle\_rate\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | +| [throttle\_rate\_threshold\_warning](#input\_throttle\_rate\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | | [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | +| [timeouts\_enabled](#input\_timeouts\_enabled) | Enable timeout count monitor | `bool` | `false` | no | +| [timeouts\_evaluation\_window](#input\_timeouts\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [timeouts\_no\_data\_window](#input\_timeouts\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [timeouts\_threshold\_critical](#input\_timeouts\_threshold\_critical) | Critical threshold (count) | `number` | `75` | no | +| [timeouts\_threshold\_warning](#input\_timeouts\_threshold\_warning) | Warning threshold (count) | `number` | `25` | no | | [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | | [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | | [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | diff --git a/aws/rds/README.md b/aws/rds/README.md index cc05203..00f32b6 100644 --- a/aws/rds/README.md +++ b/aws/rds/README.md @@ -21,7 +21,7 @@ Configures the following for RDS databases based on tag matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.37.0 | ## Modules @@ -31,10 +31,10 @@ No modules. | Name | Type | |------|------| -| [datadog_monitor.http_5xx_responses](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.http_5xx_tg_responses](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.latency](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.no_healthy_instances](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.connection_count_anomaly](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.cpu_utilization](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.cpu_utilization_anomaly](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.used_storage](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | ## Inputs @@ -44,33 +44,39 @@ No modules. | [alert\_critical\_priority](#input\_alert\_critical\_priority) | Priority for alerts within critical threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | | [alert\_message](#input\_alert\_message) | Message to prepend to alert notifications | `string` | `"Alert"` | no | | [alert\_nodata\_priority](#input\_alert\_nodata\_priority) | Priority for alerts within warning threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | -| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:alb"
]
| no | +| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:rds"
]
| no | +| [connection\_count\_anomaly\_deviations](#input\_connection\_count\_anomaly\_deviations) | Standard deviations | `number` | `3` | no | +| [connection\_count\_anomaly\_enabled](#input\_connection\_count\_anomaly\_enabled) | Enable CPU utilization anomaly monitor | `bool` | `false` | no | +| [connection\_count\_anomaly\_evaluation\_window](#input\_connection\_count\_anomaly\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [connection\_count\_anomaly\_no\_data\_window](#input\_connection\_count\_anomaly\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [connection\_count\_anomaly\_recovery\_window](#input\_connection\_count\_anomaly\_recovery\_window) | Recovery window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | +| [connection\_count\_anomaly\_rollup](#input\_connection\_count\_anomaly\_rollup) | Rollup interval (must be sized based on evaluation window/span and seasonaility) | `number` | `60` | no | +| [connection\_count\_anomaly\_seasonality](#input\_connection\_count\_anomaly\_seasonality) | Seasonaility (hourly, daily, weekly) | `string` | `"weekly"` | no | +| [connection\_count\_anomaly\_threshold\_critical](#input\_connection\_count\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `null` | no | +| [connection\_count\_anomaly\_threshold\_warning](#input\_connection\_count\_anomaly\_threshold\_warning) | Warning threshold (percent) | `number` | `null` | no | +| [connection\_count\_anomaly\_trigger\_window](#input\_connection\_count\_anomaly\_trigger\_window) | Trigger window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | +| [cpu\_utilization\_anomaly\_deviations](#input\_cpu\_utilization\_anomaly\_deviations) | Standard deviations | `number` | `4` | no | +| [cpu\_utilization\_anomaly\_enabled](#input\_cpu\_utilization\_anomaly\_enabled) | Enable CPU utilization anomaly monitor | `bool` | `false` | no | +| [cpu\_utilization\_anomaly\_evaluation\_window](#input\_cpu\_utilization\_anomaly\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [cpu\_utilization\_anomaly\_no\_data\_window](#input\_cpu\_utilization\_anomaly\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [cpu\_utilization\_anomaly\_recovery\_window](#input\_cpu\_utilization\_anomaly\_recovery\_window) | Recovery window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | +| [cpu\_utilization\_anomaly\_rollup](#input\_cpu\_utilization\_anomaly\_rollup) | Rollup interval (must be sized based on evaluation window/span and seasonaility) | `number` | `60` | no | +| [cpu\_utilization\_anomaly\_seasonality](#input\_cpu\_utilization\_anomaly\_seasonality) | Seasonaility (hourly, daily, weekly) | `string` | `"weekly"` | no | +| [cpu\_utilization\_anomaly\_threshold\_critical](#input\_cpu\_utilization\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `null` | no | +| [cpu\_utilization\_anomaly\_threshold\_warning](#input\_cpu\_utilization\_anomaly\_threshold\_warning) | Warning threshold (percent) | `number` | `null` | no | +| [cpu\_utilization\_anomaly\_trigger\_window](#input\_cpu\_utilization\_anomaly\_trigger\_window) | Trigger window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Enable CPU utilization monitor | `bool` | `false` | no | +| [cpu\_utilization\_evaluation\_window](#input\_cpu\_utilization\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [cpu\_utilization\_no\_data\_window](#input\_cpu\_utilization\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | +| [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | -| [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | -| [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `false` | no | -| [http\_5xx\_tg\_responses\_evaluation\_window](#input\_http\_5xx\_tg\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_tg\_responses\_no\_data\_window](#input\_http\_5xx\_tg\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_tg\_responses\_threshold\_critical](#input\_http\_5xx\_tg\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_tg\_responses\_threshold\_warning](#input\_http\_5xx\_tg\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | -| [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | -| [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | -| [no\_healthy\_instances\_enabled](#input\_no\_healthy\_instances\_enabled) | Enable no healthy instances monitor | `bool` | `true` | no | -| [no\_healthy\_instances\_evaluation\_window](#input\_no\_healthy\_instances\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage, 0 to disable) | `number` | `0` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | @@ -84,6 +90,11 @@ No modules. | [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | | [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | | [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [used\_storage\_enabled](#input\_used\_storage\_enabled) | Enable used storage monitor | `bool` | `true` | no | +| [used\_storage\_evaluation\_window](#input\_used\_storage\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | +| [used\_storage\_no\_data\_window](#input\_used\_storage\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [used\_storage\_threshold\_critical](#input\_used\_storage\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `90` | no | +| [used\_storage\_threshold\_warning](#input\_used\_storage\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `80` | no | | [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | ## Outputs diff --git a/aws/sqs/README.md b/aws/sqs/README.md index 78b8d6e..10fdd7c 100644 --- a/aws/sqs/README.md +++ b/aws/sqs/README.md @@ -18,7 +18,7 @@ Configures the following for Lambda functions based on tag matches: | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.44.0 | ## Modules @@ -28,10 +28,8 @@ No modules. | Name | Type | |------|------| -| [datadog_monitor.http_5xx_responses](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.http_5xx_tg_responses](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.latency](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.no_healthy_instances](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.oldest_message](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.queue_depth](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | ## Inputs @@ -41,39 +39,30 @@ No modules. | [alert\_critical\_priority](#input\_alert\_critical\_priority) | Priority for alerts within critical threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | | [alert\_message](#input\_alert\_message) | Message to prepend to alert notifications | `string` | `"Alert"` | no | | [alert\_nodata\_priority](#input\_alert\_nodata\_priority) | Priority for alerts within warning threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | -| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:alb"
]
| no | +| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:queue"
]
| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | -| [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | -| [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `false` | no | -| [http\_5xx\_tg\_responses\_evaluation\_window](#input\_http\_5xx\_tg\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_tg\_responses\_no\_data\_window](#input\_http\_5xx\_tg\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_tg\_responses\_threshold\_critical](#input\_http\_5xx\_tg\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_tg\_responses\_threshold\_warning](#input\_http\_5xx\_tg\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | -| [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | -| [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | -| [no\_healthy\_instances\_enabled](#input\_no\_healthy\_instances\_enabled) | Enable no healthy instances monitor | `bool` | `true` | no | -| [no\_healthy\_instances\_evaluation\_window](#input\_no\_healthy\_instances\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage, 0 to disable) | `number` | `0` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [oldest\_message\_enabled](#input\_oldest\_message\_enabled) | Enable oldest queued message monitor | `bool` | `false` | no | +| [oldest\_message\_evaluation\_window](#input\_oldest\_message\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [oldest\_message\_no\_data\_window](#input\_oldest\_message\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [oldest\_message\_threshold\_critical](#input\_oldest\_message\_threshold\_critical) | Critical threshold (seconds) | `number` | `75` | no | +| [oldest\_message\_threshold\_warning](#input\_oldest\_message\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | +| [queue\_depth\_enabled](#input\_queue\_depth\_enabled) | Enable queue depth count monitor | `bool` | `false` | no | +| [queue\_depth\_evaluation\_window](#input\_queue\_depth\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [queue\_depth\_no\_data\_window](#input\_queue\_depth\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [queue\_depth\_threshold\_critical](#input\_queue\_depth\_threshold\_critical) | Critical threshold (count) | `number` | `null` | no | +| [queue\_depth\_threshold\_warning](#input\_queue\_depth\_threshold\_warning) | Warning threshold (count) | `number` | `null` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | diff --git a/aws/vpn/README.md b/aws/vpn/README.md index 06a3bb5..1f355c3 100644 --- a/aws/vpn/README.md +++ b/aws/vpn/README.md @@ -15,7 +15,7 @@ Configures up/down monitoring for VPN tunnels | Name | Version | |------|---------| -| [datadog](#provider\_datadog) | >= 3.37 | +| [datadog](#provider\_datadog) | 3.44.0 | ## Modules @@ -25,10 +25,7 @@ No modules. | Name | Type | |------|------| -| [datadog_monitor.http_5xx_responses](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.http_5xx_tg_responses](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.latency](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | -| [datadog_monitor.no_healthy_instances](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.tunnel_state](https://registry.terraform.io/providers/datadog/datadog/latest/docs/resources/monitor) | resource | ## Inputs @@ -38,33 +35,14 @@ No modules. | [alert\_critical\_priority](#input\_alert\_critical\_priority) | Priority for alerts within critical threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | | [alert\_message](#input\_alert\_message) | Message to prepend to alert notifications | `string` | `"Alert"` | no | | [alert\_nodata\_priority](#input\_alert\_nodata\_priority) | Priority for alerts within warning threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | -| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:alb"
]
| no | +| [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:vpn"
]
| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | -| [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | -| [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `false` | no | -| [http\_5xx\_tg\_responses\_evaluation\_window](#input\_http\_5xx\_tg\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [http\_5xx\_tg\_responses\_no\_data\_window](#input\_http\_5xx\_tg\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [http\_5xx\_tg\_responses\_threshold\_critical](#input\_http\_5xx\_tg\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | -| [http\_5xx\_tg\_responses\_threshold\_warning](#input\_http\_5xx\_tg\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | -| [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | -| [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | -| [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | -| [no\_healthy\_instances\_enabled](#input\_no\_healthy\_instances\_enabled) | Enable no healthy instances monitor | `bool` | `true` | no | -| [no\_healthy\_instances\_evaluation\_window](#input\_no\_healthy\_instances\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | -| [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage, 0 to disable) | `number` | `0` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | @@ -78,6 +56,9 @@ No modules. | [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | | [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | | [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | +| [tunnel\_state\_enabled](#input\_tunnel\_state\_enabled) | Enable VPN tunnel state monitor | `bool` | `false` | no | +| [tunnel\_state\_evaluation\_window](#input\_tunnel\_state\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [tunnel\_state\_no\_data\_window](#input\_tunnel\_state\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | ## Outputs From bb521753f85c44db047ed5b76cd8c9ac57389a6d Mon Sep 17 00:00:00 2001 From: Kevin Date: Thu, 26 Sep 2024 13:41:54 -0400 Subject: [PATCH 02/10] Make env optional --- aws/alb/README.md | 2 +- aws/apigateway/README.md | 2 +- aws/beanstalk/README.md | 2 +- aws/ec2/README.md | 2 +- aws/ecs-cluster/README.md | 2 +- aws/ecs-fargate/README.md | 2 +- aws/ecs-service/README.md | 2 +- aws/elasticache/README.md | 2 +- aws/elasticsearch/README.md | 2 +- aws/elb/README.md | 2 +- aws/lambda/README.md | 2 +- aws/rds/README.md | 2 +- aws/sqs/README.md | 2 +- aws/vpn/README.md | 2 +- common/common.tf | 1 + 15 files changed, 15 insertions(+), 14 deletions(-) diff --git a/aws/alb/README.md b/aws/alb/README.md index 29522da..f3df56a 100644 --- a/aws/alb/README.md +++ b/aws/alb/README.md @@ -46,7 +46,7 @@ No modules. | [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:alb"
]
| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | | [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | diff --git a/aws/apigateway/README.md b/aws/apigateway/README.md index 2596f1f..5acf4df 100644 --- a/aws/apigateway/README.md +++ b/aws/apigateway/README.md @@ -42,7 +42,7 @@ No modules. | [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:apigateway"
]
| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | | [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | diff --git a/aws/beanstalk/README.md b/aws/beanstalk/README.md index b2527a1..15156eb 100644 --- a/aws/beanstalk/README.md +++ b/aws/beanstalk/README.md @@ -46,7 +46,7 @@ No modules. | [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:beanstalk"
]
| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [health\_enabled](#input\_health\_enabled) | Enable Beanstalk health monitor (requires enhanced metrics) | `bool` | `false` | no | | [health\_evaluation\_window](#input\_health\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`) | `string` | `"last_5m"` | no | diff --git a/aws/ec2/README.md b/aws/ec2/README.md index 0051be3..de24d42 100644 --- a/aws/ec2/README.md +++ b/aws/ec2/README.md @@ -43,7 +43,7 @@ No modules. | [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:ec2"
]
| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | diff --git a/aws/ecs-cluster/README.md b/aws/ecs-cluster/README.md index a118a66..99e2faf 100644 --- a/aws/ecs-cluster/README.md +++ b/aws/ecs-cluster/README.md @@ -65,7 +65,7 @@ No modules. | [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | | [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [memory\_reservation\_enabled](#input\_memory\_reservation\_enabled) | Enable cluster memory reservation monitor | `bool` | `false` | no | | [memory\_reservation\_evaluation\_window](#input\_memory\_reservation\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | diff --git a/aws/ecs-fargate/README.md b/aws/ecs-fargate/README.md index 31f78c5..eefe028 100644 --- a/aws/ecs-fargate/README.md +++ b/aws/ecs-fargate/README.md @@ -60,7 +60,7 @@ No modules. | [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | | [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [fargate\_check\_enabled](#input\_fargate\_check\_enabled) | Enable Fargate check monitor | `bool` | `false` | no | | [fargate\_check\_evaluation\_window](#input\_fargate\_check\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | diff --git a/aws/ecs-service/README.md b/aws/ecs-service/README.md index 2c561e9..daa61ab 100644 --- a/aws/ecs-service/README.md +++ b/aws/ecs-service/README.md @@ -60,7 +60,7 @@ No modules. | [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `string` | `90` | no | | [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [memory\_utilization\_enabled](#input\_memory\_utilization\_enabled) | Enable Fargate task memory utilization monitor | `bool` | `false` | no | | [memory\_utilization\_evaluation\_window](#input\_memory\_utilization\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | diff --git a/aws/elasticache/README.md b/aws/elasticache/README.md index 7bc29d2..086b2e9 100644 --- a/aws/elasticache/README.md +++ b/aws/elasticache/README.md @@ -68,7 +68,7 @@ No modules. | [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | | [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [evictions\_enabled](#input\_evictions\_enabled) | Enable eviction rate monitor | `bool` | `false` | no | | [evictions\_evaluation\_window](#input\_evictions\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | diff --git a/aws/elasticsearch/README.md b/aws/elasticsearch/README.md index af4b2f6..82c49fc 100644 --- a/aws/elasticsearch/README.md +++ b/aws/elasticsearch/README.md @@ -68,7 +68,7 @@ No modules. | [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `0.9` | no | | [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `0.8` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [free\_storage\_enabled](#input\_free\_storage\_enabled) | Enable free storage monitor | `bool` | `true` | no | | [free\_storage\_evaluation\_window](#input\_free\_storage\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | diff --git a/aws/elb/README.md b/aws/elb/README.md index 6a0756a..776a272 100644 --- a/aws/elb/README.md +++ b/aws/elb/README.md @@ -46,7 +46,7 @@ No modules. | [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:lb"
]
| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [http\_5xx\_backend\_responses\_enabled](#input\_http\_5xx\_backend\_responses\_enabled) | Enable HTTP 5xx response monitor (backend) | `bool` | `false` | no | | [http\_5xx\_backend\_responses\_evaluation\_window](#input\_http\_5xx\_backend\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | diff --git a/aws/lambda/README.md b/aws/lambda/README.md index f07e953..a7403c1 100644 --- a/aws/lambda/README.md +++ b/aws/lambda/README.md @@ -57,7 +57,7 @@ No modules. | [cold\_starts\_threshold\_warning](#input\_cold\_starts\_threshold\_warning) | Warning threshold (count) | `number` | `null` | no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [error\_rate\_enabled](#input\_error\_rate\_enabled) | Enable Lambda error rate monitor | `bool` | `false` | no | | [error\_rate\_evaluation\_window](#input\_error\_rate\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [error\_rate\_no\_data\_window](#input\_error\_rate\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | diff --git a/aws/rds/README.md b/aws/rds/README.md index 00f32b6..2f3e192 100644 --- a/aws/rds/README.md +++ b/aws/rds/README.md @@ -72,7 +72,7 @@ No modules. | [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | | [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | diff --git a/aws/sqs/README.md b/aws/sqs/README.md index 10fdd7c..abd566f 100644 --- a/aws/sqs/README.md +++ b/aws/sqs/README.md @@ -42,7 +42,7 @@ No modules. | [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:queue"
]
| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | diff --git a/aws/vpn/README.md b/aws/vpn/README.md index 1f355c3..9b9bab1 100644 --- a/aws/vpn/README.md +++ b/aws/vpn/README.md @@ -38,7 +38,7 @@ No modules. | [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:vpn"
]
| no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | -| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | n/a | yes | +| [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | diff --git a/common/common.tf b/common/common.tf index 14461a1..3ef2ead 100644 --- a/common/common.tf +++ b/common/common.tf @@ -10,6 +10,7 @@ variable "cost_center" { } variable "env" { + default = null description = "Environment the monitored resource is in (leave blank to omit tag)" type = string } From e9ac59dca675e5a91a8d045ba4391d1a3071a7c5 Mon Sep 17 00:00:00 2001 From: Kevin Date: Tue, 1 Oct 2024 12:26:37 -0400 Subject: [PATCH 03/10] Changing defaults --- aws/alb/main.tf | 4 ++-- aws/ecs-fargate/variables.tf | 2 +- aws/ecs-service/variables.tf | 8 ++++---- aws/elasticsearch/README.md | 8 ++++---- aws/elasticsearch/variables.tf | 12 ++++++------ aws/lambda/variables.tf | 10 +++++----- aws/rds/variables.tf | 4 ++-- 7 files changed, 24 insertions(+), 24 deletions(-) diff --git a/aws/alb/main.tf b/aws/alb/main.tf index 30458f7..85d6c4d 100644 --- a/aws/alb/main.tf +++ b/aws/alb/main.tf @@ -72,7 +72,7 @@ END resource "datadog_monitor" "latency" { count = var.latency_enabled ? 1 : 0 - name = join("", [local.title_prefix, "{{loadbalancer.name}} ALB latency - {{value}}s ", local.title_suffix]) + name = join("", [local.title_prefix, "ALB latency - {{loadbalancer.name}} {{value}}s ", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -101,7 +101,7 @@ END resource "datadog_monitor" "no_healthy_instances" { count = var.no_healthy_instances_enabled ? 1 : 0 - name = join("", [local.title_prefix, "{{loadbalancer.name}} ALB healthy instances is at {{value}}%", local.title_suffix]) + name = join("", [local.title_prefix, "ALB available healthy instances - {{loadbalancer.name}} {{value}}%", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) diff --git a/aws/ecs-fargate/variables.tf b/aws/ecs-fargate/variables.tf index 844ddb0..bced0a0 100644 --- a/aws/ecs-fargate/variables.tf +++ b/aws/ecs-fargate/variables.tf @@ -17,7 +17,7 @@ variable "base_tags" { # Fargate Agent Status ######################################## variable "fargate_check_enabled" { - default = false + default = true description = "Enable Fargate check monitor" type = bool } diff --git a/aws/ecs-service/variables.tf b/aws/ecs-service/variables.tf index ba8fd6e..cd07eb5 100644 --- a/aws/ecs-service/variables.tf +++ b/aws/ecs-service/variables.tf @@ -17,7 +17,7 @@ variable "base_tags" { # ECS service running tasks ######################################## variable "running_tasks_enabled" { - default = false + default = true description = "Enable running tasks monitor" type = bool } @@ -35,7 +35,7 @@ variable "running_tasks_no_data_window" { } variable "running_tasks_threshold_critical" { - default = 0.25 + default = 0.50 description = "Critical threshold (percentage)" type = number } @@ -50,7 +50,7 @@ variable "running_tasks_threshold_warning" { # Service CPU Utilization ######################################## variable "cpu_utilization_enabled" { - default = false + default = true description = "Enable Fargate task CPU utilization monitor" type = bool } @@ -131,7 +131,7 @@ variable "cpu_utilization_anomaly_trigger_window" { } variable "cpu_utilization_anomaly_threshold_critical" { - default = null + default = 0.75 description = "Critical threshold (percent)" type = number } diff --git a/aws/elasticsearch/README.md b/aws/elasticsearch/README.md index 82c49fc..ddac429 100644 --- a/aws/elasticsearch/README.md +++ b/aws/elasticsearch/README.md @@ -45,10 +45,10 @@ No modules. | [alert\_message](#input\_alert\_message) | Message to prepend to alert notifications | `string` | `"Alert"` | no | | [alert\_nodata\_priority](#input\_alert\_nodata\_priority) | Priority for alerts within warning threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | | [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:elasticsearch"
]
| no | -| [cluster\_health\_red\_enabled](#input\_cluster\_health\_red\_enabled) | Enable cluster health\_red monitor | `bool` | `false` | no | +| [cluster\_health\_red\_enabled](#input\_cluster\_health\_red\_enabled) | Enable cluster health\_red monitor | `bool` | `true` | no | | [cluster\_health\_red\_evaluation\_window](#input\_cluster\_health\_red\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cluster\_health\_red\_no\_data\_window](#input\_cluster\_health\_red\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [cluster\_health\_yellow\_enabled](#input\_cluster\_health\_yellow\_enabled) | Enable cluster health monitor | `bool` | `false` | no | +| [cluster\_health\_yellow\_enabled](#input\_cluster\_health\_yellow\_enabled) | Enable cluster health monitor | `bool` | `true` | no | | [cluster\_health\_yellow\_evaluation\_window](#input\_cluster\_health\_yellow\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cluster\_health\_yellow\_no\_data\_window](#input\_cluster\_health\_yellow\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | @@ -73,8 +73,8 @@ No modules. | [free\_storage\_enabled](#input\_free\_storage\_enabled) | Enable free storage monitor | `bool` | `true` | no | | [free\_storage\_evaluation\_window](#input\_free\_storage\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [free\_storage\_no\_data\_window](#input\_free\_storage\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [free\_storage\_threshold\_critical](#input\_free\_storage\_threshold\_critical) | Critical threshold (GB) | `number` | `null` | no | -| [free\_storage\_threshold\_warning](#input\_free\_storage\_threshold\_warning) | Warning threshold (GB) | `number` | `null` | no | +| [free\_storage\_threshold\_critical](#input\_free\_storage\_threshold\_critical) | Critical threshold (%) | `number` | `90` | no | +| [free\_storage\_threshold\_warning](#input\_free\_storage\_threshold\_warning) | Warning threshold (%) | `number` | `80` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | diff --git a/aws/elasticsearch/variables.tf b/aws/elasticsearch/variables.tf index 971cdd4..166adfe 100644 --- a/aws/elasticsearch/variables.tf +++ b/aws/elasticsearch/variables.tf @@ -17,7 +17,7 @@ variable "base_tags" { # ElasticSearch cluster health (red) ######################################## variable "cluster_health_red_enabled" { - default = false + default = true description = "Enable cluster health_red monitor" type = bool } @@ -38,7 +38,7 @@ variable "cluster_health_red_no_data_window" { # ElasticSearch cluster health (yellow) ######################################## variable "cluster_health_yellow_enabled" { - default = false + default = true description = "Enable cluster health monitor" type = bool } @@ -173,13 +173,13 @@ variable "free_storage_evaluation_window" { } variable "free_storage_threshold_critical" { - default = null - description = "Critical threshold (GB)" + default = 90 + description = "Critical threshold for used disk space (%)" type = number } variable "free_storage_threshold_warning" { - default = null - description = "Warning threshold (GB)" + default = 80 + description = "Warning threshold for used disk space (%)" type = number } diff --git a/aws/lambda/variables.tf b/aws/lambda/variables.tf index 8aa64cc..c9eaa99 100644 --- a/aws/lambda/variables.tf +++ b/aws/lambda/variables.tf @@ -17,7 +17,7 @@ variable "base_tags" { # Lambda error rate ######################################## variable "error_rate_enabled" { - default = false + default = true description = "Enable Lambda error rate monitor" type = bool } @@ -50,7 +50,7 @@ variable "error_rate_threshold_warning" { # Lambda timeouts ######################################## variable "timeouts_enabled" { - default = false + default = true description = "Enable timeout count monitor" type = bool } @@ -116,7 +116,7 @@ variable "cold_starts_threshold_warning" { # OOM monitor ######################################## variable "out_of_memory_enabled" { - default = false + default = true description = "Enable out of memory monitor (requires enhanced metrics)" type = bool } @@ -134,7 +134,7 @@ variable "out_of_memory_no_data_window" { } variable "out_of_memory_threshold_critical" { - default = null + default = 5 description = "Critical threshold (count)" type = number } @@ -203,7 +203,7 @@ variable "iterator_age_forecast_no_data_window" { # Lambda throttle rate ######################################## variable "throttle_rate_enabled" { - default = false + default = true description = "Enable Lambda throttle rate monitor" type = bool } diff --git a/aws/rds/variables.tf b/aws/rds/variables.tf index 6e74aa4..fe8cabe 100644 --- a/aws/rds/variables.tf +++ b/aws/rds/variables.tf @@ -17,7 +17,7 @@ variable "base_tags" { # Connection Rate (anomaly detection) ######################################## variable "connection_count_anomaly_enabled" { - default = false + default = true description = "Enable CPU utilization anomaly monitor" type = bool } @@ -80,7 +80,7 @@ variable "connection_count_anomaly_threshold_warning" { # Node CPU Utilization ######################################## variable "cpu_utilization_enabled" { - default = false + default = true description = "Enable CPU utilization monitor" type = bool } From 9c677794390a16ba8e9138e630bb7a801a5a3022 Mon Sep 17 00:00:00 2001 From: Kevin Date: Wed, 2 Oct 2024 11:49:13 -0400 Subject: [PATCH 04/10] add datadog_critical, fix monitor names --- aws/alb/main.tf | 16 ++++++++-------- aws/apigateway/main.tf | 10 +++++----- aws/beanstalk/main.tf | 18 +++++++++--------- aws/ec2/main.tf | 8 ++++---- aws/ecs-cluster/main.tf | 8 ++++---- aws/ecs-fargate/main.tf | 14 +++++++------- aws/ecs-service/main.tf | 10 +++++----- aws/elasticache/main.tf | 14 +++++++------- aws/elasticsearch/main.tf | 14 +++++++------- aws/elb/main.tf | 24 ++++++++++++------------ aws/lambda/main.tf | 14 +++++++------- aws/rds/main.tf | 10 +++++----- aws/rds/variables.tf | 2 +- aws/sqs/main.tf | 4 ++-- aws/vpn/main.tf | 2 +- 15 files changed, 84 insertions(+), 84 deletions(-) diff --git a/aws/alb/main.tf b/aws/alb/main.tf index 85d6c4d..fb8d49f 100644 --- a/aws/alb/main.tf +++ b/aws/alb/main.tf @@ -27,8 +27,8 @@ resource "datadog_monitor" "http_5xx_responses" { query = < ${var.http_5xx_responses_threshold_critical} END @@ -57,8 +57,8 @@ resource "datadog_monitor" "http_5xx_tg_responses" { query = < ${var.http_5xx_tg_responses_threshold_critical} END @@ -88,7 +88,7 @@ resource "datadog_monitor" "latency" { query = < ${var.latency_threshold_critical} END @@ -117,9 +117,9 @@ resource "datadog_monitor" "no_healthy_instances" { query = < ${var.http_5xx_responses_threshold_critical} END @@ -41,7 +41,7 @@ END resource "datadog_monitor" "latency" { count = var.latency_enabled ? 1 : 0 - name = join("", [local.title_prefix, "API Gateway latency - {{host.name}}", local.title_suffix]) + name = join("", [local.title_prefix, "API Gateway latency - {{apiname.name}}", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -57,7 +57,7 @@ resource "datadog_monitor" "latency" { query = < ${var.latency_threshold_critical} END diff --git a/aws/beanstalk/main.tf b/aws/beanstalk/main.tf index f55018b..8f907f9 100644 --- a/aws/beanstalk/main.tf +++ b/aws/beanstalk/main.tf @@ -24,7 +24,7 @@ locals { resource "datadog_monitor" "health" { count = var.health_enabled ? 1 : 0 - name = join("", [local.title_prefix, "Beanstalk Health Events - {{host.name}}", local.title_suffix]) + name = join("", [local.title_prefix, "Beanstalk Health Events - {{environmentname.name}}", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -40,7 +40,7 @@ resource "datadog_monitor" "health" { query = <= ${var.health_threshold_critical} END @@ -53,7 +53,7 @@ END resource "datadog_monitor" "http_5xx_responses" { count = var.http_5xx_responses_enabled ? 1 : 0 - name = join("", [local.title_prefix, "ALB 5xx Responses - {{host.name}}", local.title_suffix]) + name = join("", [local.title_prefix, "ALB 5xx Responses - {{environmentname.name}}", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -69,8 +69,8 @@ resource "datadog_monitor" "http_5xx_responses" { query = < ${var.http_5xx_responses_threshold_critical} END @@ -83,7 +83,7 @@ END resource "datadog_monitor" "latency" { count = var.latency_enabled ? 1 : 0 - name = join("", [local.title_prefix, "Beanstalk Latency - {{host.name}}", local.title_suffix]) + name = join("", [local.title_prefix, "Beanstalk Latency - {{environmentname.name}}", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -98,7 +98,7 @@ resource "datadog_monitor" "latency" { timeout_h = var.timeout_h query = <= ${var.latency_threshold_critical} END @@ -111,7 +111,7 @@ END resource "datadog_monitor" "root_disk_usage" { count = var.root_disk_usage_enabled ? 1 : 0 - name = join("", [local.title_prefix, "Beanstalk Instance Root Disk Usage - {{host.name}}", local.title_suffix]) + name = join("", [local.title_prefix, "Beanstalk Instance Root Disk Usage - {{environmentname.name}}", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -127,7 +127,7 @@ resource "datadog_monitor" "root_disk_usage" { query = <= ${var.root_disk_usage_threshold_critical} END diff --git a/aws/ec2/main.tf b/aws/ec2/main.tf index 3a75582..4c856e7 100644 --- a/aws/ec2/main.tf +++ b/aws/ec2/main.tf @@ -26,7 +26,7 @@ resource "datadog_monitor" "status_failed_check" { query = <= 1 END @@ -53,7 +53,7 @@ resource "datadog_monitor" "status_failed_instance" { query = <= 1 END @@ -80,7 +80,7 @@ resource "datadog_monitor" "status_failed_system" { query = <= 1 END @@ -107,7 +107,7 @@ resource "datadog_monitor" "status_failed_volume" { query = <= 1 END diff --git a/aws/ecs-cluster/main.tf b/aws/ecs-cluster/main.tf index 82da113..879c368 100644 --- a/aws/ecs-cluster/main.tf +++ b/aws/ecs-cluster/main.tf @@ -27,7 +27,7 @@ resource "datadog_monitor" "agent_status" { timeout_h = var.timeout_h query = < ${var.cpu_utilization_threshold_critical} END @@ -84,7 +84,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { query = <= ${var.cpu_utilization_anomaly_threshold_critical} @@ -120,7 +120,7 @@ resource "datadog_monitor" "memory_reservation" { query = < ${var.memory_reservation_threshold_critical} END diff --git a/aws/ecs-fargate/main.tf b/aws/ecs-fargate/main.tf index 7bd1431..4ff73b6 100644 --- a/aws/ecs-fargate/main.tf +++ b/aws/ecs-fargate/main.tf @@ -40,7 +40,7 @@ END resource "datadog_monitor" "cpu_utilization" { count = var.cpu_utilization_enabled ? 1 : 0 - name = join("", [local.title_prefix, "ECS Fargate task CPU utilization", local.title_suffix]) + name = join("", [local.title_prefix, "ECS Fargate task CPU utilization - {{ecs_cluster}} ({{task_family}})", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -56,7 +56,7 @@ resource "datadog_monitor" "cpu_utilization" { query = < ${var.cpu_utilization_threshold_critical} END @@ -69,7 +69,7 @@ END resource "datadog_monitor" "cpu_utilization_anomaly" { count = var.cpu_utilization_anomaly_enabled ? 1 : 0 - name = join("", [local.title_prefix, "ECS service CPU utilization anomalous activity", local.title_suffix]) + name = join("", [local.title_prefix, "ECS service CPU utilization anomalous activity - {{ecs_cluster}} ({{task_family}})", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -85,7 +85,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { query = <= ${var.cpu_utilization_anomaly_threshold_critical} @@ -105,7 +105,7 @@ END resource "datadog_monitor" "memory_utilization" { count = var.memory_utilization_enabled ? 1 : 0 - name = join("", [local.title_prefix, "ECS Fargate task memory utilization", local.title_suffix]) + name = join("", [local.title_prefix, "ECS Fargate task memory utilization - {{ecs_cluster}} ({{task_family}})", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -121,8 +121,8 @@ resource "datadog_monitor" "memory_utilization" { query = <= ${var.memory_utilization_threshold_critical} END diff --git a/aws/ecs-service/main.tf b/aws/ecs-service/main.tf index 0365e9b..5107c84 100644 --- a/aws/ecs-service/main.tf +++ b/aws/ecs-service/main.tf @@ -28,8 +28,8 @@ resource "datadog_monitor" "running_tasks" { query = <= ${var.cpu_utilization_threshold_critical} END @@ -87,7 +87,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { query = <= ${var.cpu_utilization_anomaly_threshold_critical} @@ -123,7 +123,7 @@ resource "datadog_monitor" "memory_utilization" { query = <= ${var.memory_utilization_threshold_critical} END diff --git a/aws/elasticache/main.tf b/aws/elasticache/main.tf index 3f7c8a5..b2b5007 100644 --- a/aws/elasticache/main.tf +++ b/aws/elasticache/main.tf @@ -27,7 +27,7 @@ resource "datadog_monitor" "cpu_utilization" { query = <= ${var.cpu_utilization_threshold_critical} END @@ -56,7 +56,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { query = <= ${var.cpu_utilization_anomaly_threshold_critical} @@ -86,7 +86,7 @@ resource "datadog_monitor" "evictions" { query = <= ${var.evictions_threshold_critical} END @@ -115,7 +115,7 @@ resource "datadog_monitor" "hit_rate" { query = <= ${var.hit_rate_threshold_critical} END @@ -144,7 +144,7 @@ resource "datadog_monitor" "hit_rate_anomaly" { query = <= ${var.hit_rate_anomaly_threshold_critical} @@ -174,7 +174,7 @@ resource "datadog_monitor" "max_connections" { query = <= ${var.max_connections_threshold_critical} END @@ -203,7 +203,7 @@ resource "datadog_monitor" "swap_usage" { query = <= 1 END @@ -55,7 +55,7 @@ resource "datadog_monitor" "cluster_health_yellow" { query = <= 1 END @@ -83,7 +83,7 @@ resource "datadog_monitor" "cpu_utilization" { query = <= ${var.cpu_utilization_threshold_critical} END @@ -147,10 +147,10 @@ resource "datadog_monitor" "free_storage" { timeout_h = var.timeout_h query = < ${var.free_storage_threshold_critical} EOQ diff --git a/aws/elb/main.tf b/aws/elb/main.tf index 182c7e2..b1a0a9c 100644 --- a/aws/elb/main.tf +++ b/aws/elb/main.tf @@ -11,7 +11,7 @@ locals { resource "datadog_monitor" "http_5xx_responses" { count = var.http_5xx_responses_enabled ? 1 : 0 - name = join("", [local.title_prefix, "ELB 5xx Responses - {{host.name}}", local.title_suffix]) + name = join("", [local.title_prefix, "ELB 5xx Responses - {{loadbalancername.name}}", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -27,8 +27,8 @@ resource "datadog_monitor" "http_5xx_responses" { query = < ${var.http_5xx_responses_threshold_critical} END @@ -41,7 +41,7 @@ END resource "datadog_monitor" "http_5xx_backend_responses" { count = var.http_5xx_backend_responses_enabled ? 1 : 0 - name = join("", [local.title_prefix, "ELB Backend 5xx Responses - {{host.name}}", local.title_suffix]) + name = join("", [local.title_prefix, "ELB Backend 5xx Responses - {{loadbalancername.name}}", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -57,8 +57,8 @@ resource "datadog_monitor" "http_5xx_backend_responses" { query = < ${var.http_5xx_backend_responses_threshold_critical} END @@ -72,7 +72,7 @@ END resource "datadog_monitor" "latency" { count = var.latency_enabled ? 1 : 0 - name = join("", [local.title_prefix, "ELB backend latency - {{host.name}}", local.title_suffix]) + name = join("", [local.title_prefix, "ELB backend latency - {{loadbalancername.name}}", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -88,7 +88,7 @@ resource "datadog_monitor" "latency" { query = < ${var.latency_threshold_critical} END @@ -101,7 +101,7 @@ END resource "datadog_monitor" "no_healthy_instances" { count = var.no_healthy_instances_enabled ? 1 : 0 - name = join("", [local.title_prefix, "ALB healthy instances - {{host.name}}", local.title_suffix]) + name = join("", [local.title_prefix, "ALB healthy instances - {{loadbalancername.name}}", local.title_suffix]) include_tags = true message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) @@ -117,9 +117,9 @@ resource "datadog_monitor" "no_healthy_instances" { query = < ${var.error_rate_threshold_critical} END @@ -59,8 +59,8 @@ resource "datadog_monitor" "timeouts" { query = < ${var.timeouts_threshold_critical} END @@ -89,8 +89,8 @@ resource "datadog_monitor" "cold_starts" { query = < ${var.cold_starts_threshold_critical} END @@ -149,7 +149,7 @@ resource "datadog_monitor" "iterator_age" { query = < ${var.iterator_age_threshold_critical} END diff --git a/aws/rds/main.tf b/aws/rds/main.tf index bbb3292..0668c52 100644 --- a/aws/rds/main.tf +++ b/aws/rds/main.tf @@ -27,7 +27,7 @@ resource "datadog_monitor" "connection_count_anomaly" { query = <= ${var.connection_count_anomaly_threshold_critical} @@ -63,7 +63,7 @@ resource "datadog_monitor" "cpu_utilization" { query = <= ${var.cpu_utilization_threshold_critical} END @@ -92,7 +92,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { query = <= ${var.cpu_utilization_anomaly_threshold_critical} @@ -129,8 +129,8 @@ resource "datadog_monitor" "used_storage" { query = <= ${var.used_storage_threshold_critical} END diff --git a/aws/rds/variables.tf b/aws/rds/variables.tf index fe8cabe..1ac8ab5 100644 --- a/aws/rds/variables.tf +++ b/aws/rds/variables.tf @@ -65,7 +65,7 @@ variable "connection_count_anomaly_trigger_window" { } variable "connection_count_anomaly_threshold_critical" { - default = null + default = 0.75 description = "Critical threshold (percent)" type = number } diff --git a/aws/sqs/main.tf b/aws/sqs/main.tf index edbfc91..579f9bd 100644 --- a/aws/sqs/main.tf +++ b/aws/sqs/main.tf @@ -27,7 +27,7 @@ resource "datadog_monitor" "oldest_message" { query = < ${var.oldest_message_threshold_critical} END @@ -56,7 +56,7 @@ resource "datadog_monitor" "queue_depth" { query = < ${var.queue_depth_threshold_critical} END diff --git a/aws/vpn/main.tf b/aws/vpn/main.tf index 304e91b..6d9bfc2 100644 --- a/aws/vpn/main.tf +++ b/aws/vpn/main.tf @@ -27,7 +27,7 @@ resource "datadog_monitor" "tunnel_state" { query = < Date: Thu, 3 Oct 2024 13:16:34 -0400 Subject: [PATCH 05/10] change to datadog_managed tag and don't include tags --- aws/alb/main.tf | 24 ++++++++++++------------ aws/apigateway/main.tf | 10 +++++----- aws/beanstalk/main.tf | 18 +++++++++--------- aws/ec2/main.tf | 16 ++++++++-------- aws/ecs-cluster/main.tf | 16 ++++++++-------- aws/ecs-fargate/main.tf | 16 ++++++++-------- aws/ecs-service/main.tf | 18 +++++++++--------- aws/elasticache/main.tf | 28 ++++++++++++++-------------- aws/elasticsearch/main.tf | 22 +++++++++++----------- aws/elb/main.tf | 24 ++++++++++++------------ aws/lambda/main.tf | 28 ++++++++++++++-------------- aws/rds/main.tf | 18 +++++++++--------- aws/sqs/main.tf | 8 ++++---- aws/vpn/main.tf | 4 ++-- 14 files changed, 125 insertions(+), 125 deletions(-) diff --git a/aws/alb/main.tf b/aws/alb/main.tf index fb8d49f..a771c77 100644 --- a/aws/alb/main.tf +++ b/aws/alb/main.tf @@ -12,7 +12,7 @@ resource "datadog_monitor" "http_5xx_responses" { count = var.http_5xx_responses_enabled ? 1 : 0 name = join("", [local.title_prefix, "ALB 5xx Responses - {{loadbalancer.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,8 +27,8 @@ resource "datadog_monitor" "http_5xx_responses" { query = < ${var.http_5xx_responses_threshold_critical} END @@ -42,7 +42,7 @@ resource "datadog_monitor" "http_5xx_tg_responses" { count = var.http_5xx_tg_responses_enabled ? 1 : 0 name = join("", [local.title_prefix, "ALB Target Group 5xx Responses - {{loadbalancer.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -57,8 +57,8 @@ resource "datadog_monitor" "http_5xx_tg_responses" { query = < ${var.http_5xx_tg_responses_threshold_critical} END @@ -73,7 +73,7 @@ resource "datadog_monitor" "latency" { count = var.latency_enabled ? 1 : 0 name = join("", [local.title_prefix, "ALB latency - {{loadbalancer.name}} {{value}}s ", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -88,7 +88,7 @@ resource "datadog_monitor" "latency" { query = < ${var.latency_threshold_critical} END @@ -102,7 +102,7 @@ resource "datadog_monitor" "no_healthy_instances" { count = var.no_healthy_instances_enabled ? 1 : 0 name = join("", [local.title_prefix, "ALB available healthy instances - {{loadbalancer.name}} {{value}}%", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -117,9 +117,9 @@ resource "datadog_monitor" "no_healthy_instances" { query = < ${var.http_5xx_responses_threshold_critical} END @@ -42,7 +42,7 @@ resource "datadog_monitor" "latency" { count = var.latency_enabled ? 1 : 0 name = join("", [local.title_prefix, "API Gateway latency - {{apiname.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -57,7 +57,7 @@ resource "datadog_monitor" "latency" { query = < ${var.latency_threshold_critical} END diff --git a/aws/beanstalk/main.tf b/aws/beanstalk/main.tf index 8f907f9..26bbb60 100644 --- a/aws/beanstalk/main.tf +++ b/aws/beanstalk/main.tf @@ -25,7 +25,7 @@ resource "datadog_monitor" "health" { count = var.health_enabled ? 1 : 0 name = join("", [local.title_prefix, "Beanstalk Health Events - {{environmentname.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "metric alert" @@ -40,7 +40,7 @@ resource "datadog_monitor" "health" { query = <= ${var.health_threshold_critical} END @@ -54,7 +54,7 @@ resource "datadog_monitor" "http_5xx_responses" { count = var.http_5xx_responses_enabled ? 1 : 0 name = join("", [local.title_prefix, "ALB 5xx Responses - {{environmentname.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -69,8 +69,8 @@ resource "datadog_monitor" "http_5xx_responses" { query = < ${var.http_5xx_responses_threshold_critical} END @@ -84,7 +84,7 @@ resource "datadog_monitor" "latency" { count = var.latency_enabled ? 1 : 0 name = join("", [local.title_prefix, "Beanstalk Latency - {{environmentname.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -98,7 +98,7 @@ resource "datadog_monitor" "latency" { timeout_h = var.timeout_h query = <= ${var.latency_threshold_critical} END @@ -112,7 +112,7 @@ resource "datadog_monitor" "root_disk_usage" { count = var.root_disk_usage_enabled ? 1 : 0 name = join("", [local.title_prefix, "Beanstalk Instance Root Disk Usage - {{environmentname.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -127,7 +127,7 @@ resource "datadog_monitor" "root_disk_usage" { query = <= ${var.root_disk_usage_threshold_critical} END diff --git a/aws/ec2/main.tf b/aws/ec2/main.tf index 4c856e7..7772c28 100644 --- a/aws/ec2/main.tf +++ b/aws/ec2/main.tf @@ -12,7 +12,7 @@ resource "datadog_monitor" "status_failed_check" { count = var.status_failed_check_enabled ? 1 : 0 name = join("", [local.title_prefix, "EC2 instance status - status check failure - {{name.name}}({{instance_id.name}})", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -26,7 +26,7 @@ resource "datadog_monitor" "status_failed_check" { query = <= 1 END @@ -39,7 +39,7 @@ resource "datadog_monitor" "status_failed_instance" { count = var.status_failed_instance_enabled ? 1 : 0 name = join("", [local.title_prefix, "EC2 instance status - instance failure - {{name.name}}({{instance_id.name}})", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -53,7 +53,7 @@ resource "datadog_monitor" "status_failed_instance" { query = <= 1 END @@ -66,7 +66,7 @@ resource "datadog_monitor" "status_failed_system" { count = var.status_failed_system_enabled ? 1 : 0 name = join("", [local.title_prefix, "EC2 instance status - host failure - {{name.name}}({{instance_id.name}})", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -80,7 +80,7 @@ resource "datadog_monitor" "status_failed_system" { query = <= 1 END @@ -93,7 +93,7 @@ resource "datadog_monitor" "status_failed_volume" { count = var.status_failed_volume_enabled ? 1 : 0 name = join("", [local.title_prefix, "EC2 instance status - volume failure - {{name.name}}({{instance_id.name}})", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -107,7 +107,7 @@ resource "datadog_monitor" "status_failed_volume" { query = <= 1 END diff --git a/aws/ecs-cluster/main.tf b/aws/ecs-cluster/main.tf index 879c368..28906b1 100644 --- a/aws/ecs-cluster/main.tf +++ b/aws/ecs-cluster/main.tf @@ -13,7 +13,7 @@ resource "datadog_monitor" "agent_status" { count = var.agent_status_enabled ? 1 : 0 name = join("", [local.title_prefix, "ECS Agent disconnected - {{clustername.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "service check" @@ -27,7 +27,7 @@ resource "datadog_monitor" "agent_status" { timeout_h = var.timeout_h query = < ${var.cpu_utilization_threshold_critical} END @@ -69,7 +69,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { count = var.cpu_utilization_anomaly_enabled ? 1 : 0 name = join("", [local.title_prefix, "ECS cluster CPU utilization anomalous activity - {{clustername.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -84,7 +84,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { query = <= ${var.cpu_utilization_anomaly_threshold_critical} @@ -105,7 +105,7 @@ resource "datadog_monitor" "memory_reservation" { count = var.memory_reservation_enabled ? 1 : 0 name = join("", [local.title_prefix, "ECS Cluster Memory Reservation High - {{clustername.name}} - {{value}}%", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -120,7 +120,7 @@ resource "datadog_monitor" "memory_reservation" { query = < ${var.memory_reservation_threshold_critical} END diff --git a/aws/ecs-fargate/main.tf b/aws/ecs-fargate/main.tf index 4ff73b6..1425541 100644 --- a/aws/ecs-fargate/main.tf +++ b/aws/ecs-fargate/main.tf @@ -13,7 +13,7 @@ resource "datadog_monitor" "fargate_check" { count = var.fargate_check_enabled ? 1 : 0 name = join("", [local.title_prefix, "Fargate service not responding", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "service check" @@ -41,7 +41,7 @@ resource "datadog_monitor" "cpu_utilization" { count = var.cpu_utilization_enabled ? 1 : 0 name = join("", [local.title_prefix, "ECS Fargate task CPU utilization - {{ecs_cluster}} ({{task_family}})", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -56,7 +56,7 @@ resource "datadog_monitor" "cpu_utilization" { query = < ${var.cpu_utilization_threshold_critical} END @@ -70,7 +70,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { count = var.cpu_utilization_anomaly_enabled ? 1 : 0 name = join("", [local.title_prefix, "ECS service CPU utilization anomalous activity - {{ecs_cluster}} ({{task_family}})", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -85,7 +85,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { query = <= ${var.cpu_utilization_anomaly_threshold_critical} @@ -106,7 +106,7 @@ resource "datadog_monitor" "memory_utilization" { count = var.memory_utilization_enabled ? 1 : 0 name = join("", [local.title_prefix, "ECS Fargate task memory utilization - {{ecs_cluster}} ({{task_family}})", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -121,8 +121,8 @@ resource "datadog_monitor" "memory_utilization" { query = <= ${var.memory_utilization_threshold_critical} END diff --git a/aws/ecs-service/main.tf b/aws/ecs-service/main.tf index 5107c84..225cb7e 100644 --- a/aws/ecs-service/main.tf +++ b/aws/ecs-service/main.tf @@ -13,7 +13,7 @@ resource "datadog_monitor" "running_tasks" { count = var.running_tasks_enabled ? 1 : 0 name = join("", [local.title_prefix, "ECS service failed tasks - {{servicename.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -28,8 +28,8 @@ resource "datadog_monitor" "running_tasks" { query = <= ${var.cpu_utilization_threshold_critical} END @@ -72,7 +72,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { count = var.cpu_utilization_anomaly_enabled ? 1 : 0 name = join("", [local.title_prefix, "ECS service CPU utilization anomalous activity - {{servicename.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -87,7 +87,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { query = <= ${var.cpu_utilization_anomaly_threshold_critical} @@ -108,7 +108,7 @@ resource "datadog_monitor" "memory_utilization" { count = var.memory_utilization_enabled ? 1 : 0 name = join("", [local.title_prefix, "ECS Service memory utilization - {{servicename.name}} - {{value}}%", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -123,7 +123,7 @@ resource "datadog_monitor" "memory_utilization" { query = <= ${var.memory_utilization_threshold_critical} END diff --git a/aws/elasticache/main.tf b/aws/elasticache/main.tf index b2b5007..5eb5688 100644 --- a/aws/elasticache/main.tf +++ b/aws/elasticache/main.tf @@ -12,7 +12,7 @@ resource "datadog_monitor" "cpu_utilization" { count = var.cpu_utilization_enabled ? 1 : 0 name = join("", [local.title_prefix, "Elasticache CPU Utilization - {{cacheclusterid.name}} - {{value}}%", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,7 +27,7 @@ resource "datadog_monitor" "cpu_utilization" { query = <= ${var.cpu_utilization_threshold_critical} END @@ -41,7 +41,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { count = var.cpu_utilization_anomaly_enabled ? 1 : 0 name = join("", [local.title_prefix, "Elasticache CPU utilization anomalous activity - {{cacheclusterid.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -56,7 +56,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { query = <= ${var.cpu_utilization_anomaly_threshold_critical} @@ -71,7 +71,7 @@ resource "datadog_monitor" "evictions" { count = var.evictions_enabled ? 1 : 0 name = join("", [local.title_prefix, "Elasticache evictions - {{cacheclusterid.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -86,7 +86,7 @@ resource "datadog_monitor" "evictions" { query = <= ${var.evictions_threshold_critical} END @@ -100,7 +100,7 @@ resource "datadog_monitor" "hit_rate" { count = var.hit_rate_enabled ? 1 : 0 name = join("", [local.title_prefix, "Elasticache cache hit rate - {{cacheclusterid.name}} - {{value}}% ", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -115,7 +115,7 @@ resource "datadog_monitor" "hit_rate" { query = <= ${var.hit_rate_threshold_critical} END @@ -129,7 +129,7 @@ resource "datadog_monitor" "hit_rate_anomaly" { count = var.hit_rate_anomaly_enabled ? 1 : 0 name = join("", [local.title_prefix, "Elasticache cache hit rate anomalous activity - {{cacheclusterid.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -144,7 +144,7 @@ resource "datadog_monitor" "hit_rate_anomaly" { query = <= ${var.hit_rate_anomaly_threshold_critical} @@ -159,7 +159,7 @@ resource "datadog_monitor" "max_connections" { count = var.max_connections_enabled ? 1 : 0 name = join("", [local.title_prefix, "Elasticache max connections reached - {{cacheclusterid.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -174,7 +174,7 @@ resource "datadog_monitor" "max_connections" { query = <= ${var.max_connections_threshold_critical} END @@ -188,7 +188,7 @@ resource "datadog_monitor" "swap_usage" { count = var.swap_usage_enabled ? 1 : 0 name = join("", [local.title_prefix, "Elasticache swap usage - {{cacheclusterid.name}} - {{value}}MB", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -203,7 +203,7 @@ resource "datadog_monitor" "swap_usage" { query = <= 1 END @@ -40,7 +40,7 @@ resource "datadog_monitor" "cluster_health_yellow" { count = var.cluster_health_yellow_enabled ? 1 : 0 name = join("", [local.title_prefix, "ElasticSearch cluster health yellow - {{name.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -55,7 +55,7 @@ resource "datadog_monitor" "cluster_health_yellow" { query = <= 1 END @@ -68,7 +68,7 @@ resource "datadog_monitor" "cpu_utilization" { count = var.cpu_utilization_enabled ? 1 : 0 name = join("", [local.title_prefix, "ElasticSearch CPU Utilization - {{name.name}} - {{value}}%", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -83,7 +83,7 @@ resource "datadog_monitor" "cpu_utilization" { query = <= ${var.cpu_utilization_threshold_critical} END @@ -97,7 +97,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { count = var.cpu_utilization_anomaly_enabled ? 1 : 0 name = join("", [local.title_prefix, "ElasticSearch CPU utilization anomalous activity - {{name.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -133,7 +133,7 @@ resource "datadog_monitor" "free_storage" { count = var.free_storage_enabled ? 1 : 0 name = join("", [local.title_prefix, "ElasticSearch cluster storage - {{name.name}} - {{value}}% used", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -148,9 +148,9 @@ resource "datadog_monitor" "free_storage" { query = < ${var.free_storage_threshold_critical} EOQ diff --git a/aws/elb/main.tf b/aws/elb/main.tf index b1a0a9c..428a10f 100644 --- a/aws/elb/main.tf +++ b/aws/elb/main.tf @@ -12,7 +12,7 @@ resource "datadog_monitor" "http_5xx_responses" { count = var.http_5xx_responses_enabled ? 1 : 0 name = join("", [local.title_prefix, "ELB 5xx Responses - {{loadbalancername.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,8 +27,8 @@ resource "datadog_monitor" "http_5xx_responses" { query = < ${var.http_5xx_responses_threshold_critical} END @@ -42,7 +42,7 @@ resource "datadog_monitor" "http_5xx_backend_responses" { count = var.http_5xx_backend_responses_enabled ? 1 : 0 name = join("", [local.title_prefix, "ELB Backend 5xx Responses - {{loadbalancername.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -57,8 +57,8 @@ resource "datadog_monitor" "http_5xx_backend_responses" { query = < ${var.http_5xx_backend_responses_threshold_critical} END @@ -73,7 +73,7 @@ resource "datadog_monitor" "latency" { count = var.latency_enabled ? 1 : 0 name = join("", [local.title_prefix, "ELB backend latency - {{loadbalancername.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -88,7 +88,7 @@ resource "datadog_monitor" "latency" { query = < ${var.latency_threshold_critical} END @@ -102,7 +102,7 @@ resource "datadog_monitor" "no_healthy_instances" { count = var.no_healthy_instances_enabled ? 1 : 0 name = join("", [local.title_prefix, "ALB healthy instances - {{loadbalancername.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -117,9 +117,9 @@ resource "datadog_monitor" "no_healthy_instances" { query = < ${var.error_rate_threshold_critical} END @@ -44,7 +44,7 @@ resource "datadog_monitor" "timeouts" { count = var.timeouts_enabled ? 1 : 0 name = join("", [local.title_prefix, "Lambda timeouts - {{functionname.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -59,8 +59,8 @@ resource "datadog_monitor" "timeouts" { query = < ${var.timeouts_threshold_critical} END @@ -74,7 +74,7 @@ resource "datadog_monitor" "cold_starts" { count = var.cold_starts_enabled ? 1 : 0 name = join("", [local.title_prefix, "Lambda cold starts - {{functionname.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -89,8 +89,8 @@ resource "datadog_monitor" "cold_starts" { query = < ${var.cold_starts_threshold_critical} END @@ -104,7 +104,7 @@ resource "datadog_monitor" "out_of_memory" { count = var.out_of_memory_enabled ? 1 : 0 name = join("", [local.title_prefix, "Lambda out of memory - {{functionname.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -134,7 +134,7 @@ resource "datadog_monitor" "iterator_age" { count = var.iterator_age_enabled ? 1 : 0 name = join("", [local.title_prefix, "Lambda iterator age - {{functionname.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -149,7 +149,7 @@ resource "datadog_monitor" "iterator_age" { query = < ${var.iterator_age_threshold_critical} END @@ -163,7 +163,7 @@ resource "datadog_monitor" "iterator_age_forecast" { count = var.iterator_age_forecast_enabled ? 1 : 0 name = join("", [local.title_prefix, "Lambda stream data loss forecasted - {{functionname.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -191,7 +191,7 @@ resource "datadog_monitor" "throttle_rate" { count = var.throttle_rate_enabled ? 1 : 0 name = join("", [local.title_prefix, "Lambda throttle rate - {{functionname.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/rds/main.tf b/aws/rds/main.tf index 0668c52..4211915 100644 --- a/aws/rds/main.tf +++ b/aws/rds/main.tf @@ -12,7 +12,7 @@ resource "datadog_monitor" "connection_count_anomaly" { count = var.connection_count_anomaly_enabled ? 1 : 0 name = join("", [local.title_prefix, "RDS connection count anomalous activity - {{dbinstanceidentifier.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,7 +27,7 @@ resource "datadog_monitor" "connection_count_anomaly" { query = <= ${var.connection_count_anomaly_threshold_critical} @@ -48,7 +48,7 @@ resource "datadog_monitor" "cpu_utilization" { count = var.cpu_utilization_enabled ? 1 : 0 name = join("", [local.title_prefix, "RDS CPU Utilization - {{dbinstanceidentifier.name}} - {{value}}%", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -63,7 +63,7 @@ resource "datadog_monitor" "cpu_utilization" { query = <= ${var.cpu_utilization_threshold_critical} END @@ -77,7 +77,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { count = var.cpu_utilization_anomaly_enabled ? 1 : 0 name = join("", [local.title_prefix, "RDS CPU utilization anomalous activity - {{dbinstanceidentifier.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -92,7 +92,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { query = <= ${var.cpu_utilization_anomaly_threshold_critical} @@ -113,7 +113,7 @@ resource "datadog_monitor" "used_storage" { count = var.used_storage_enabled ? 1 : 0 name = join("", [local.title_prefix, "RDS instance storage - {{dbinstanceidentifier.name}} - {{value}}% used", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -129,8 +129,8 @@ resource "datadog_monitor" "used_storage" { query = <= ${var.used_storage_threshold_critical} END diff --git a/aws/sqs/main.tf b/aws/sqs/main.tf index 579f9bd..db17ca6 100644 --- a/aws/sqs/main.tf +++ b/aws/sqs/main.tf @@ -12,7 +12,7 @@ resource "datadog_monitor" "oldest_message" { count = var.oldest_message_enabled ? 1 : 0 name = join("", [local.title_prefix, "Oldest queued message - {{queuename.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,7 +27,7 @@ resource "datadog_monitor" "oldest_message" { query = < ${var.oldest_message_threshold_critical} END @@ -41,7 +41,7 @@ resource "datadog_monitor" "queue_depth" { count = var.queue_depth_enabled ? 1 : 0 name = join("", [local.title_prefix, "Queue depth - {{queuename.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -56,7 +56,7 @@ resource "datadog_monitor" "queue_depth" { query = < ${var.queue_depth_threshold_critical} END diff --git a/aws/vpn/main.tf b/aws/vpn/main.tf index 6d9bfc2..bd4df6a 100644 --- a/aws/vpn/main.tf +++ b/aws/vpn/main.tf @@ -12,7 +12,7 @@ resource "datadog_monitor" "tunnel_state" { count = var.tunnel_state_enabled ? 1 : 0 name = join("", [local.title_prefix, "VPN tunnel state - {{host.name}}", local.title_suffix]) - include_tags = true + include_tags = false message = local.query_alert_base_message tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -27,7 +27,7 @@ resource "datadog_monitor" "tunnel_state" { query = < Date: Wed, 16 Oct 2024 11:23:38 -0400 Subject: [PATCH 06/10] Add ability to turn off message on AWS monitors --- README.md | 2 +- aws/alb/main.tf | 10 +-- aws/alb/variables.tf | 24 +++++++ aws/apigateway/main.tf | 6 +- aws/apigateway/variables.tf | 12 ++++ aws/beanstalk/main.tf | 10 +-- aws/beanstalk/variables.tf | 24 +++++++ aws/ec2/main.tf | 10 +-- aws/ec2/variables.tf | 24 +++++++ aws/ecs-cluster/main.tf | 12 ++-- aws/ecs-cluster/variables.tf | 24 +++++++ aws/ecs-fargate/main.tf | 10 +-- aws/ecs-fargate/variables.tf | 24 +++++++ aws/ecs-service/main.tf | 10 +-- aws/ecs-service/variables.tf | 25 ++++++++ aws/elasticache/main.tf | 16 ++--- aws/elasticache/variables.tf | 42 ++++++++++++ aws/elasticsearch/main.tf | 16 ++--- aws/elasticsearch/variables.tf | 30 +++++++++ aws/elb/main.tf | 10 +-- aws/elb/variables.tf | 25 ++++++++ aws/lambda/main.tf | 16 ++--- aws/lambda/variables.tf | 42 ++++++++++++ aws/rds/main.tf | 10 +-- aws/rds/variables.tf | 24 +++++++ aws/sqs/.terraform.lock.hcl | 2 + aws/sqs/main.tf | 6 +- aws/sqs/variables.tf | 12 ++++ aws/vpn/.terraform.lock.hcl | 2 + common/common.tf | 114 +++++++++++++++++++++++++++------ 30 files changed, 503 insertions(+), 91 deletions(-) diff --git a/README.md b/README.md index 3c31d8a..80c209f 100644 --- a/README.md +++ b/README.md @@ -52,4 +52,4 @@ No inputs. ## Outputs No outputs. - \ No newline at end of file + diff --git a/aws/alb/main.tf b/aws/alb/main.tf index a771c77..2681e66 100644 --- a/aws/alb/main.tf +++ b/aws/alb/main.tf @@ -4,7 +4,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } @@ -13,7 +13,7 @@ resource "datadog_monitor" "http_5xx_responses" { name = join("", [local.title_prefix, "ALB 5xx Responses - {{loadbalancer.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.http_5xx_responses_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -43,7 +43,7 @@ resource "datadog_monitor" "http_5xx_tg_responses" { name = join("", [local.title_prefix, "ALB Target Group 5xx Responses - {{loadbalancer.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.http_5xx_tg_responses_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -74,7 +74,7 @@ resource "datadog_monitor" "latency" { name = join("", [local.title_prefix, "ALB latency - {{loadbalancer.name}} {{value}}s ", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.latency_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -103,7 +103,7 @@ resource "datadog_monitor" "no_healthy_instances" { name = join("", [local.title_prefix, "ALB available healthy instances - {{loadbalancer.name}} {{value}}%", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.no_healthy_instances_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/alb/variables.tf b/aws/alb/variables.tf index 4d2eedf..0637625 100644 --- a/aws/alb/variables.tf +++ b/aws/alb/variables.tf @@ -46,6 +46,12 @@ variable "http_5xx_responses_threshold_warning" { type = number } +variable "http_5xx_responses_use_message" { + description = "Whether to use the query alert base message" + type = bool + default = false +} + ######################################## # HTTP 5xx Response Codes (Target Group) ######################################## @@ -79,6 +85,12 @@ variable "http_5xx_tg_responses_threshold_warning" { type = number } +variable "http_5xx_tg_responses_use_message" { + description = "Whether to use the query alert base message" + type = bool + default = false +} + ######################################## # Latency Instances ######################################## @@ -112,6 +124,12 @@ variable "latency_threshold_warning" { type = number } +variable "latency_use_message" { + description = "Whether to use the query alert base message" + type = bool + default = false +} + ######################################## # No Healthy Instances ######################################## @@ -144,3 +162,9 @@ variable "no_healthy_instances_threshold_warning" { description = "Warning threshold (percentage)" type = number } + +variable "no_healthy_instances_use_message" { + description = "Whether to use the query alert base message" + type = bool + default = true +} diff --git a/aws/apigateway/main.tf b/aws/apigateway/main.tf index 01ac1a7..f624851 100644 --- a/aws/apigateway/main.tf +++ b/aws/apigateway/main.tf @@ -4,7 +4,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } @@ -13,7 +13,7 @@ resource "datadog_monitor" "http_5xx_responses" { name = join("", [local.title_prefix, "API Gateway 5xx Responses - {{apiname.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.http_5xx_responses_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -43,7 +43,7 @@ resource "datadog_monitor" "latency" { name = join("", [local.title_prefix, "API Gateway latency - {{apiname.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.latency_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/apigateway/variables.tf b/aws/apigateway/variables.tf index 14d282a..d5eb215 100644 --- a/aws/apigateway/variables.tf +++ b/aws/apigateway/variables.tf @@ -46,6 +46,12 @@ variable "http_5xx_responses_threshold_warning" { type = number } +variable "http_5xx_responses_use_message" { + description = "Whether to use the query alert base message for HTTP 5xx responses monitor" + type = bool + default = false +} + ######################################## # Latency Instances ######################################## @@ -78,3 +84,9 @@ variable "latency_threshold_warning" { description = "Warning threshold (seconds)" type = number } + +variable "latency_use_message" { + description = "Whether to use the query alert base message for the latency monitor" + type = bool + default = false +} diff --git a/aws/beanstalk/main.tf b/aws/beanstalk/main.tf index 26bbb60..7fe3814 100644 --- a/aws/beanstalk/main.tf +++ b/aws/beanstalk/main.tf @@ -17,7 +17,7 @@ locals { latency_metric = local.latency_metric_map[var.latency_measurement] - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } @@ -26,7 +26,7 @@ resource "datadog_monitor" "health" { name = join("", [local.title_prefix, "Beanstalk Health Events - {{environmentname.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.health_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "metric alert" @@ -55,7 +55,7 @@ resource "datadog_monitor" "http_5xx_responses" { name = join("", [local.title_prefix, "ALB 5xx Responses - {{environmentname.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.http_5xx_responses_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -85,7 +85,7 @@ resource "datadog_monitor" "latency" { name = join("", [local.title_prefix, "Beanstalk Latency - {{environmentname.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.latency_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -113,7 +113,7 @@ resource "datadog_monitor" "root_disk_usage" { name = join("", [local.title_prefix, "Beanstalk Instance Root Disk Usage - {{environmentname.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.root_disk_usage_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/beanstalk/variables.tf b/aws/beanstalk/variables.tf index 451d74d..c537346 100644 --- a/aws/beanstalk/variables.tf +++ b/aws/beanstalk/variables.tf @@ -68,6 +68,12 @@ Warning threshold ( END } +variable "health_use_message" { + description = "Whether to use the query alert base message for health monitor" + type = bool + default = false +} + ######################################## # HTTP 5xx Responses ######################################## @@ -101,6 +107,12 @@ variable "http_5xx_responses_threshold_warning" { type = number } +variable "http_5xx_responses_use_message" { + description = "Whether to use the query alert base message for HTTP 5xx responses monitor" + type = bool + default = false +} + ######################################## # Latency Instances ######################################## @@ -153,6 +165,12 @@ variable "latency_threshold_warning" { type = number } +variable "latency_use_message" { + description = "Whether to use the query alert base message for latency monitor" + type = bool + default = false +} + ######################################## # Root FS Disk Usage ######################################## @@ -185,3 +203,9 @@ variable "root_disk_usage_threshold_warning" { description = "Warning threshold (percent)" type = number } + +variable "root_disk_usage_use_message" { + description = "Whether to use the query alert base message for root disk usage monitor" + type = bool + default = false +} diff --git a/aws/ec2/main.tf b/aws/ec2/main.tf index 7772c28..337c979 100644 --- a/aws/ec2/main.tf +++ b/aws/ec2/main.tf @@ -4,7 +4,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } @@ -13,7 +13,7 @@ resource "datadog_monitor" "status_failed_check" { name = join("", [local.title_prefix, "EC2 instance status - status check failure - {{name.name}}({{instance_id.name}})", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.status_failed_check_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -40,7 +40,7 @@ resource "datadog_monitor" "status_failed_instance" { name = join("", [local.title_prefix, "EC2 instance status - instance failure - {{name.name}}({{instance_id.name}})", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.status_failed_instance_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -67,7 +67,7 @@ resource "datadog_monitor" "status_failed_system" { name = join("", [local.title_prefix, "EC2 instance status - host failure - {{name.name}}({{instance_id.name}})", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.status_failed_system_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -94,7 +94,7 @@ resource "datadog_monitor" "status_failed_volume" { name = join("", [local.title_prefix, "EC2 instance status - volume failure - {{name.name}}({{instance_id.name}})", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.status_failed_volume_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/ec2/variables.tf b/aws/ec2/variables.tf index b27bf0d..6aaed78 100644 --- a/aws/ec2/variables.tf +++ b/aws/ec2/variables.tf @@ -34,6 +34,12 @@ variable "status_failed_check_no_data_window" { type = number } +variable "status_failed_check_use_message" { + description = "Whether to use the query alert base message for ec2 instance status check monitor" + type = bool + default = false +} + ######################################## # Instance status check ######################################## @@ -55,6 +61,12 @@ variable "status_failed_instance_no_data_window" { type = number } +variable "status_failed_instance_use_message" { + description = "Whether to use the query alert base message for instance status check monitor" + type = bool + default = false +} + ##################################### # system host status check ######################################## @@ -76,6 +88,12 @@ variable "status_failed_system_no_data_window" { type = number } +variable "status_failed_system_use_message" { + description = "Whether to use the query alert base message for instance system failure monitor" + type = bool + default = false +} + ##################################### # Attached volume status check ######################################## @@ -96,3 +114,9 @@ variable "status_failed_volume_no_data_window" { description = "No data threshold (in minutes, 0 to disable)" type = number } + +variable "status_failed_volume_use_message" { + description = "Whether to use the query alert base message for attached volume status monitor" + type = bool + default = false +} diff --git a/aws/ecs-cluster/main.tf b/aws/ecs-cluster/main.tf index 28906b1..60e5208 100644 --- a/aws/ecs-cluster/main.tf +++ b/aws/ecs-cluster/main.tf @@ -5,7 +5,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } @@ -14,9 +14,9 @@ resource "datadog_monitor" "agent_status" { name = join("", [local.title_prefix, "ECS Agent disconnected - {{clustername.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.agent_status_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) - type = "service check" + type = "service check" evaluation_delay = var.evaluation_delay new_group_delay = var.new_group_delay @@ -41,7 +41,7 @@ resource "datadog_monitor" "cpu_utilization" { name = join("", [local.title_prefix, "ECS Cluster CPU Utilization - {{clustername.name}} - {{value}}%", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cpu_utilization_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -70,7 +70,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { name = join("", [local.title_prefix, "ECS cluster CPU utilization anomalous activity - {{clustername.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cpu_utilization_anomaly_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -106,7 +106,7 @@ resource "datadog_monitor" "memory_reservation" { name = join("", [local.title_prefix, "ECS Cluster Memory Reservation High - {{clustername.name}} - {{value}}%", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.memory_reservation_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/ecs-cluster/variables.tf b/aws/ecs-cluster/variables.tf index e6cd277..6671c12 100644 --- a/aws/ecs-cluster/variables.tf +++ b/aws/ecs-cluster/variables.tf @@ -46,6 +46,12 @@ variable "agent_status_threshold_warning" { type = number } +variable "agent_status_use_message" { + description = "Whether to use the query alert base message for agent status monitor" + type = bool + default = false +} + ######################################## # Cluster CPU Utilization ######################################## @@ -79,6 +85,12 @@ variable "cpu_utilization_threshold_warning" { type = number } +variable "cpu_utilization_use_message" { + description = "Whether to use the query alert base message for CPU utilization monitor" + type = bool + default = false +} + ######################################## # CPU Utilization (anomaly detection) ######################################## @@ -142,6 +154,12 @@ variable "cpu_utilization_anomaly_threshold_warning" { type = number } +variable "cpu_utilization_anomaly_use_message" { + description = "Whether to use the query alert base message for CPU utilization anomaly monitor" + type = bool + default = false +} + ######################################## # Cluster Memory Reservation ######################################## @@ -173,3 +191,9 @@ variable "memory_reservation_threshold_warning" { description = "Warning threshold (percent)" type = number } + +variable "memory_reservation_use_message" { + description = "Whether to use the query alert base message for memory reservation monitor" + type = bool + default = false +} diff --git a/aws/ecs-fargate/main.tf b/aws/ecs-fargate/main.tf index 1425541..5b192a1 100644 --- a/aws/ecs-fargate/main.tf +++ b/aws/ecs-fargate/main.tf @@ -5,7 +5,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } @@ -14,7 +14,7 @@ resource "datadog_monitor" "fargate_check" { name = join("", [local.title_prefix, "Fargate service not responding", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.fargate_check_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "service check" @@ -42,7 +42,7 @@ resource "datadog_monitor" "cpu_utilization" { name = join("", [local.title_prefix, "ECS Fargate task CPU utilization - {{ecs_cluster}} ({{task_family}})", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cpu_utilization_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -71,7 +71,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { name = join("", [local.title_prefix, "ECS service CPU utilization anomalous activity - {{ecs_cluster}} ({{task_family}})", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cpu_utilization_anomaly_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -107,7 +107,7 @@ resource "datadog_monitor" "memory_utilization" { name = join("", [local.title_prefix, "ECS Fargate task memory utilization - {{ecs_cluster}} ({{task_family}})", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.memory_utilization_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/ecs-fargate/variables.tf b/aws/ecs-fargate/variables.tf index bced0a0..272f46e 100644 --- a/aws/ecs-fargate/variables.tf +++ b/aws/ecs-fargate/variables.tf @@ -52,6 +52,12 @@ variable "fargate_check_threshold_warning" { type = number } +variable "fargate_check_use_message" { + description = "Whether to use the query alert base message for Fargate check monitor" + type = bool + default = false +} + ######################################## # Fargate Task CPU Utilization ######################################## @@ -85,6 +91,12 @@ variable "cpu_utilization_threshold_warning" { type = number } +variable "cpu_utilization_use_message" { + description = "Whether to use the query alert base message for CPU utilization monitor" + type = bool + default = false +} + ######################################## # CPU Utilization (anomaly detection) ######################################## @@ -148,6 +160,12 @@ variable "cpu_utilization_anomaly_threshold_warning" { type = number } +variable "cpu_utilization_anomaly_use_message" { + description = "Whether to use the query alert base message for CPU utilization anomaly monitor" + type = bool + default = false +} + ######################################## # Fargate Task Memory Reservation ######################################## @@ -179,3 +197,9 @@ variable "memory_utilization_threshold_warning" { description = "Warning threshold (percent)" type = number } + +variable "memory_utilization_use_message" { + description = "Whether to use the query alert base message for memory utilization monitor" + type = bool + default = false +} diff --git a/aws/ecs-service/main.tf b/aws/ecs-service/main.tf index 225cb7e..677893b 100644 --- a/aws/ecs-service/main.tf +++ b/aws/ecs-service/main.tf @@ -5,7 +5,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } @@ -14,7 +14,7 @@ resource "datadog_monitor" "running_tasks" { name = join("", [local.title_prefix, "ECS service failed tasks - {{servicename.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.running_tasks_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -44,7 +44,7 @@ resource "datadog_monitor" "cpu_utilization" { name = join("", [local.title_prefix, "ECS Service CPU Utilization - {{servicename.name}} - {{value}}%", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cpu_utilization_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -73,7 +73,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { name = join("", [local.title_prefix, "ECS service CPU utilization anomalous activity - {{servicename.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cpu_utilization_anomaly_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -109,7 +109,7 @@ resource "datadog_monitor" "memory_utilization" { name = join("", [local.title_prefix, "ECS Service memory utilization - {{servicename.name}} - {{value}}%", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.memory_utilization_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/ecs-service/variables.tf b/aws/ecs-service/variables.tf index cd07eb5..0c7baef 100644 --- a/aws/ecs-service/variables.tf +++ b/aws/ecs-service/variables.tf @@ -46,6 +46,12 @@ variable "running_tasks_threshold_warning" { type = number } +variable "running_tasks_use_message" { + description = "Whether to use the query alert base message for running tasks monitor" + type = bool + default = true +} + ######################################## # Service CPU Utilization ######################################## @@ -79,6 +85,12 @@ variable "cpu_utilization_threshold_warning" { type = number } +variable "cpu_utilization_use_message" { + description = "Whether to use the query alert base message for CPU utilization monitor" + type = bool + default = false +} + ######################################## # CPU Utilization (anomaly detection) ######################################## @@ -142,6 +154,13 @@ variable "cpu_utilization_anomaly_threshold_warning" { type = number } + +variable "cpu_utilization_anomaly_use_message" { + description = "Whether to use the query alert base message for CPU utilization anomaly monitor" + type = bool + default = false +} + ######################################## # Service Memory Reservation ######################################## @@ -173,3 +192,9 @@ variable "memory_utilization_threshold_warning" { description = "Warning threshold (percent)" type = number } + +variable "memory_utilization_use_message" { + description = "Whether to use the query alert base message for memory utilization monitor" + type = bool + default = false +} diff --git a/aws/elasticache/main.tf b/aws/elasticache/main.tf index 5eb5688..2ad69b1 100644 --- a/aws/elasticache/main.tf +++ b/aws/elasticache/main.tf @@ -4,7 +4,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } @@ -13,7 +13,7 @@ resource "datadog_monitor" "cpu_utilization" { name = join("", [local.title_prefix, "Elasticache CPU Utilization - {{cacheclusterid.name}} - {{value}}%", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cpu_utilization_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -42,7 +42,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { name = join("", [local.title_prefix, "Elasticache CPU utilization anomalous activity - {{cacheclusterid.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cpu_utilization_anomaly_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -72,7 +72,7 @@ resource "datadog_monitor" "evictions" { name = join("", [local.title_prefix, "Elasticache evictions - {{cacheclusterid.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.evictions_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -101,7 +101,7 @@ resource "datadog_monitor" "hit_rate" { name = join("", [local.title_prefix, "Elasticache cache hit rate - {{cacheclusterid.name}} - {{value}}% ", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.hit_rate_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -130,7 +130,7 @@ resource "datadog_monitor" "hit_rate_anomaly" { name = join("", [local.title_prefix, "Elasticache cache hit rate anomalous activity - {{cacheclusterid.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.hit_rate_anomaly_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -160,7 +160,7 @@ resource "datadog_monitor" "max_connections" { name = join("", [local.title_prefix, "Elasticache max connections reached - {{cacheclusterid.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.max_connections_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -189,7 +189,7 @@ resource "datadog_monitor" "swap_usage" { name = join("", [local.title_prefix, "Elasticache swap usage - {{cacheclusterid.name}} - {{value}}MB", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.swap_usage_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/elasticache/variables.tf b/aws/elasticache/variables.tf index 0ef1148..da5dd70 100644 --- a/aws/elasticache/variables.tf +++ b/aws/elasticache/variables.tf @@ -46,6 +46,12 @@ variable "cpu_utilization_threshold_warning" { type = number } +variable "cpu_utilization_use_message" { + description = "Whether to use the query alert base message for CPU utilization monitor" + type = bool + default = false +} + ######################################## # CPU Utilization (anomaly detection) ######################################## @@ -109,6 +115,12 @@ variable "cpu_utilization_anomaly_threshold_warning" { type = number } +variable "cpu_utilization_anomaly_use_message" { + description = "Whether to use the query alert base message for CPU utilization anomaly monitor" + type = bool + default = false +} + ######################################## # Elasticache Evictions ######################################## @@ -142,6 +154,12 @@ variable "evictions_threshold_warning" { type = number } +variable "evictions_use_message" { + description = "Whether to use the query alert base message for evictions monitor" + type = bool + default = false +} + ######################################## # Cache hit rate ######################################## @@ -175,6 +193,12 @@ variable "hit_rate_threshold_warning" { type = number } +variable "hit_rate_use_message" { + description = "Whether to use the query alert base message for hit rate monitor" + type = bool + default = false +} + ######################################## # Cache hit rate (anomaly detection) ######################################## @@ -214,6 +238,12 @@ variable "hit_rate_anomaly_threshold_critical" { type = number } +variable "hit_rate_anomaly_use_message" { + description = "Whether to use the query alert base message for hit rate anomaly monitor" + type = bool + default = false +} + ######################################## # Max Connections ######################################## @@ -247,6 +277,12 @@ variable "max_connections_threshold_warning" { type = number } +variable "max_connections_use_message" { + description = "Whether to use the query alert base message for max connections monitor" + type = bool + default = false +} + ######################################## # Swap usage (by node) ######################################## @@ -279,3 +315,9 @@ variable "swap_usage_threshold_warning" { description = "Warning threshold (bytes)" type = number } + +variable "swap_usage_use_message" { + description = "Whether to use the query alert base message for swap usage monitor" + type = bool + default = false +} diff --git a/aws/elasticsearch/main.tf b/aws/elasticsearch/main.tf index 8dab8dd..479754c 100644 --- a/aws/elasticsearch/main.tf +++ b/aws/elasticsearch/main.tf @@ -4,7 +4,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } @@ -13,7 +13,7 @@ resource "datadog_monitor" "cluster_health_red" { name = join("", [local.title_prefix, "ElasticSearch cluster health red - {{name.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cluster_health_red_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -41,7 +41,7 @@ resource "datadog_monitor" "cluster_health_yellow" { name = join("", [local.title_prefix, "ElasticSearch cluster health yellow - {{name.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cluster_health_yellow_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -69,7 +69,7 @@ resource "datadog_monitor" "cpu_utilization" { name = join("", [local.title_prefix, "ElasticSearch CPU Utilization - {{name.name}} - {{value}}%", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cpu_utilization_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -98,7 +98,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { name = join("", [local.title_prefix, "ElasticSearch CPU utilization anomalous activity - {{name.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cpu_utilization_anomaly_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -134,7 +134,7 @@ resource "datadog_monitor" "free_storage" { name = join("", [local.title_prefix, "ElasticSearch cluster storage - {{name.name}} - {{value}}% used", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.free_storage_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -148,8 +148,8 @@ resource "datadog_monitor" "free_storage" { query = < ${var.free_storage_threshold_critical} EOQ diff --git a/aws/elasticsearch/variables.tf b/aws/elasticsearch/variables.tf index 166adfe..9cc6ab5 100644 --- a/aws/elasticsearch/variables.tf +++ b/aws/elasticsearch/variables.tf @@ -34,6 +34,12 @@ variable "cluster_health_red_no_data_window" { type = number } +variable "cluster_health_red_use_message" { + description = "Whether to use the query alert base message for cluster health red monitor" + type = bool + default = true +} + ####################################### # ElasticSearch cluster health (yellow) ######################################## @@ -55,6 +61,12 @@ variable "cluster_health_yellow_no_data_window" { type = number } +variable "cluster_health_yellow_use_message" { + description = "Whether to use the query alert base message for cluster health yellow monitor" + type = bool + default = false +} + ######################################## # Node CPU Utilization ######################################## @@ -88,6 +100,12 @@ variable "cpu_utilization_threshold_warning" { type = number } +variable "cpu_utilization_use_message" { + description = "Whether to use the query alert base message for CPU utilization monitor" + type = bool + default = false +} + ######################################## # CPU Utilization (anomaly detection) ######################################## @@ -151,6 +169,12 @@ variable "cpu_utilization_anomaly_threshold_warning" { type = number } +variable "cpu_utilization_anomaly_use_message" { + description = "Whether to use the query alert base message for CPU utilization anomaly monitor" + type = bool + default = false +} + ######################################## # ElasticSearch cluster free storage ######################################## @@ -183,3 +207,9 @@ variable "free_storage_threshold_warning" { description = "Warning threshold for used disk space (%)" type = number } + +variable "free_storage_use_message" { + description = "Whether to use the query alert base message for free storage monitor" + type = bool + default = true +} diff --git a/aws/elb/main.tf b/aws/elb/main.tf index 428a10f..dfce887 100644 --- a/aws/elb/main.tf +++ b/aws/elb/main.tf @@ -4,7 +4,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } @@ -13,7 +13,7 @@ resource "datadog_monitor" "http_5xx_responses" { name = join("", [local.title_prefix, "ELB 5xx Responses - {{loadbalancername.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.http_5xx_responses_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -43,7 +43,7 @@ resource "datadog_monitor" "http_5xx_backend_responses" { name = join("", [local.title_prefix, "ELB Backend 5xx Responses - {{loadbalancername.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.http_5xx_backend_responses_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -74,7 +74,7 @@ resource "datadog_monitor" "latency" { name = join("", [local.title_prefix, "ELB backend latency - {{loadbalancername.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.latency_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -103,7 +103,7 @@ resource "datadog_monitor" "no_healthy_instances" { name = join("", [local.title_prefix, "ALB healthy instances - {{loadbalancername.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.no_healthy_instances_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/elb/variables.tf b/aws/elb/variables.tf index d2235be..13357e9 100644 --- a/aws/elb/variables.tf +++ b/aws/elb/variables.tf @@ -46,6 +46,12 @@ variable "http_5xx_responses_threshold_warning" { type = number } +variable "http_5xx_responses_use_message" { + description = "Whether to use the query alert base message for HTTP 5xx responses monitor" + type = bool + default = false +} + ######################################## # HTTP 5xx Response Codes (backend) ######################################## @@ -79,6 +85,12 @@ variable "http_5xx_backend_responses_threshold_warning" { type = number } +variable "http_5xx_backend_responses_use_message" { + description = "Whether to use the query alert base message for HTTP 5xx backend responses monitor" + type = bool + default = false +} + ######################################## # Latency (backend) ######################################## @@ -112,6 +124,12 @@ variable "latency_threshold_warning" { type = number } +variable "latency_use_message" { + description = "Whether to use the query alert base message for latency monitor" + type = bool + default = false +} + ######################################## # No Healthy Instances ######################################## @@ -144,3 +162,10 @@ variable "no_healthy_instances_threshold_warning" { description = "Warning threshold (percentage)" type = number } + + +variable "no_healthy_instances_use_message" { + description = "Whether to use the query alert base message for no healthy instances monitor" + type = bool + default = true +} diff --git a/aws/lambda/main.tf b/aws/lambda/main.tf index b6ddf13..e37a8f4 100644 --- a/aws/lambda/main.tf +++ b/aws/lambda/main.tf @@ -4,7 +4,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" cold_start_query_filter = local.query_filter == "{*}" ? "{cold_start:true}" : replace(local.query_filter, "{", "{cold_star:true,") @@ -15,7 +15,7 @@ resource "datadog_monitor" "error_rate" { name = join("", [local.title_prefix, "Lambda error rate - {{functionname.name}} - {{value}}%", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.error_rate_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -45,7 +45,7 @@ resource "datadog_monitor" "timeouts" { name = join("", [local.title_prefix, "Lambda timeouts - {{functionname.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.timeouts_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -75,7 +75,7 @@ resource "datadog_monitor" "cold_starts" { name = join("", [local.title_prefix, "Lambda cold starts - {{functionname.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cold_starts_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -105,7 +105,7 @@ resource "datadog_monitor" "out_of_memory" { name = join("", [local.title_prefix, "Lambda out of memory - {{functionname.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.out_of_memory_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -135,7 +135,7 @@ resource "datadog_monitor" "iterator_age" { name = join("", [local.title_prefix, "Lambda iterator age - {{functionname.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.iterator_age_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -164,7 +164,7 @@ resource "datadog_monitor" "iterator_age_forecast" { name = join("", [local.title_prefix, "Lambda stream data loss forecasted - {{functionname.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.iterator_age_forecast_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -192,7 +192,7 @@ resource "datadog_monitor" "throttle_rate" { name = join("", [local.title_prefix, "Lambda throttle rate - {{functionname.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.throttle_rate_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/lambda/variables.tf b/aws/lambda/variables.tf index c9eaa99..4332d90 100644 --- a/aws/lambda/variables.tf +++ b/aws/lambda/variables.tf @@ -46,6 +46,12 @@ variable "error_rate_threshold_warning" { type = number } +variable "error_rate_use_message" { + description = "Whether to use the query alert base message for error rate monitor" + type = bool + default = true +} + ######################################## # Lambda timeouts ######################################## @@ -79,6 +85,12 @@ variable "timeouts_threshold_warning" { type = number } +variable "timeouts_use_message" { + description = "Whether to use the query alert base message for timeouts monitor" + type = bool + default = false +} + ######################################## # Cold start monitor ######################################## @@ -112,6 +124,12 @@ variable "cold_starts_threshold_warning" { type = number } +variable "cold_starts_use_message" { + description = "Whether to use the query alert base message for cold starts monitor" + type = bool + default = false +} + ######################################## # OOM monitor ######################################## @@ -145,6 +163,12 @@ variable "out_of_memory_threshold_warning" { type = number } +variable "out_of_memory_use_message" { + description = "Whether to use the query alert base message for out of memory monitor" + type = bool + default = false +} + ######################################## # Iterator Age monitor ######################################## @@ -178,6 +202,12 @@ variable "iterator_age_threshold_warning" { type = number } +variable "iterator_age_use_message" { + description = "Whether to use the query alert base message for iterator age monitor" + type = bool + default = false +} + ######################################## # Iterator Age forecast data loss ######################################## @@ -199,6 +229,12 @@ variable "iterator_age_forecast_no_data_window" { type = number } +variable "iterator_age_forecast_use_message" { + description = "Whether to use the query alert base message for iterator age forecast monitor" + type = bool + default = false +} + ######################################## # Lambda throttle rate ######################################## @@ -231,3 +267,9 @@ variable "throttle_rate_threshold_warning" { description = "Warning threshold (percentage, 0-100)" type = number } + +variable "throttle_rate_use_message" { + description = "Whether to use the query alert base message for throttle rate monitor" + type = bool + default = false +} diff --git a/aws/rds/main.tf b/aws/rds/main.tf index 4211915..c64956c 100644 --- a/aws/rds/main.tf +++ b/aws/rds/main.tf @@ -4,7 +4,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } @@ -13,7 +13,7 @@ resource "datadog_monitor" "connection_count_anomaly" { name = join("", [local.title_prefix, "RDS connection count anomalous activity - {{dbinstanceidentifier.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.connection_count_anomaly_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -49,7 +49,7 @@ resource "datadog_monitor" "cpu_utilization" { name = join("", [local.title_prefix, "RDS CPU Utilization - {{dbinstanceidentifier.name}} - {{value}}%", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cpu_utilization_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -78,7 +78,7 @@ resource "datadog_monitor" "cpu_utilization_anomaly" { name = join("", [local.title_prefix, "RDS CPU utilization anomalous activity - {{dbinstanceidentifier.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.cpu_utilization_anomaly_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -114,7 +114,7 @@ resource "datadog_monitor" "used_storage" { name = join("", [local.title_prefix, "RDS instance storage - {{dbinstanceidentifier.name}} - {{value}}% used", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.used_storage_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/rds/variables.tf b/aws/rds/variables.tf index 1ac8ab5..64f2191 100644 --- a/aws/rds/variables.tf +++ b/aws/rds/variables.tf @@ -76,6 +76,12 @@ variable "connection_count_anomaly_threshold_warning" { type = number } +variable "connection_count_anomaly_use_message" { + description = "Whether to use the query alert base message for connection count anomaly monitor" + type = bool + default = true +} + ######################################## # Node CPU Utilization ######################################## @@ -109,6 +115,12 @@ variable "cpu_utilization_threshold_warning" { type = number } +variable "cpu_utilization_use_message" { + description = "Whether to use the query alert base message for CPU utilization monitor" + type = bool + default = false +} + ######################################## # CPU Utilization (anomaly detection) ######################################## @@ -172,6 +184,12 @@ variable "cpu_utilization_anomaly_threshold_warning" { type = number } +variable "cpu_utilization_anomaly_use_message" { + description = "Whether to use the query alert base message for CPU utilization anomaly monitor" + type = bool + default = false +} + ######################################## # ElasticSearch cluster used storage ######################################## @@ -204,3 +222,9 @@ variable "used_storage_threshold_warning" { description = "Warning threshold (percentage, 0-100)" type = number } + +variable "used_storage_use_message" { + description = "Whether to use the query alert base message for used storage monitor" + type = bool + default = true +} diff --git a/aws/sqs/.terraform.lock.hcl b/aws/sqs/.terraform.lock.hcl index 5fa8913..f4429ee 100644 --- a/aws/sqs/.terraform.lock.hcl +++ b/aws/sqs/.terraform.lock.hcl @@ -5,6 +5,7 @@ provider "registry.terraform.io/datadog/datadog" { version = "3.44.0" constraints = ">= 3.37.0" hashes = [ + "h1:gapxzCRcnTGm4HLO1zuoelGC15+0LEYceGNWGh69JLE=", "h1:neJ/si/8CotiW8ulfjU6dFmb1bpzbTjhfHLTlCvdynw=", "zh:12119fe0cafbe7e05c32d4101a804d479ae756e19512c789c67cb3c51420ac98", "zh:35267ecc27de00e449893df9a37481f38b8fe24d14fe94198cd68966f1aa586f", @@ -27,6 +28,7 @@ provider "registry.terraform.io/hashicorp/null" { version = "3.2.2" constraints = ">= 3.1.0" hashes = [ + "h1:IMVAUHKoydFrlPrl9OzasDnw/8ntZFerCC9iXw1rXQY=", "h1:vWAsYRd7MjYr3adj8BVKRohVfHpWQdvkIwUQ2Jf5FVM=", "zh:3248aae6a2198f3ec8394218d05bd5e42be59f43a3a7c0b71c66ec0df08b69e7", "zh:32b1aaa1c3013d33c245493f4a65465eab9436b454d250102729321a44c8ab9a", diff --git a/aws/sqs/main.tf b/aws/sqs/main.tf index db17ca6..6c98447 100644 --- a/aws/sqs/main.tf +++ b/aws/sqs/main.tf @@ -4,7 +4,7 @@ locals { monitor_warn_default_priority = null monitor_nodata_default_priority = null - title_prefix = "${var.title_prefix == null ? "" : "[${var.title_prefix}]"}" + title_prefix = var.title_prefix == null ? "" : "[${var.title_prefix}]" title_suffix = var.title_suffix == null ? "" : " (${var.title_suffix})" } @@ -13,7 +13,7 @@ resource "datadog_monitor" "oldest_message" { name = join("", [local.title_prefix, "Oldest queued message - {{queuename.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.oldest_message_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" @@ -42,7 +42,7 @@ resource "datadog_monitor" "queue_depth" { name = join("", [local.title_prefix, "Queue depth - {{queuename.name}}", local.title_suffix]) include_tags = false - message = local.query_alert_base_message + message = var.queue_depth_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags) type = "query alert" diff --git a/aws/sqs/variables.tf b/aws/sqs/variables.tf index 0a4b1c5..4bb5de0 100644 --- a/aws/sqs/variables.tf +++ b/aws/sqs/variables.tf @@ -46,6 +46,12 @@ variable "oldest_message_threshold_warning" { type = number } +variable "oldest_message_use_message" { + description = "Whether to use the query alert base message for oldest message monitor" + type = bool + default = false +} + ######################################## # Lambda queue_depth ######################################## @@ -78,3 +84,9 @@ variable "queue_depth_threshold_warning" { description = "Warning threshold (count)" type = number } + +variable "queue_depth_use_message" { + description = "Whether to use the query alert base message for queue depth monitor" + type = bool + default = false +} diff --git a/aws/vpn/.terraform.lock.hcl b/aws/vpn/.terraform.lock.hcl index 5fa8913..f4429ee 100644 --- a/aws/vpn/.terraform.lock.hcl +++ b/aws/vpn/.terraform.lock.hcl @@ -5,6 +5,7 @@ provider "registry.terraform.io/datadog/datadog" { version = "3.44.0" constraints = ">= 3.37.0" hashes = [ + "h1:gapxzCRcnTGm4HLO1zuoelGC15+0LEYceGNWGh69JLE=", "h1:neJ/si/8CotiW8ulfjU6dFmb1bpzbTjhfHLTlCvdynw=", "zh:12119fe0cafbe7e05c32d4101a804d479ae756e19512c789c67cb3c51420ac98", "zh:35267ecc27de00e449893df9a37481f38b8fe24d14fe94198cd68966f1aa586f", @@ -27,6 +28,7 @@ provider "registry.terraform.io/hashicorp/null" { version = "3.2.2" constraints = ">= 3.1.0" hashes = [ + "h1:IMVAUHKoydFrlPrl9OzasDnw/8ntZFerCC9iXw1rXQY=", "h1:vWAsYRd7MjYr3adj8BVKRohVfHpWQdvkIwUQ2Jf5FVM=", "zh:3248aae6a2198f3ec8394218d05bd5e42be59f43a3a7c0b71c66ec0df08b69e7", "zh:32b1aaa1c3013d33c245493f4a65465eab9436b454d250102729321a44c8ab9a", diff --git a/common/common.tf b/common/common.tf index 3ef2ead..6d90a63 100644 --- a/common/common.tf +++ b/common/common.tf @@ -161,6 +161,24 @@ variable "notify_recovery_override" { type = list(string) } +variable "notify_crit_override" { + default = [] + description = "List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise)" + type = list(string) +} + +variable "notify_nonprod_override" { + default = [] + description = "List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise)" + type = list(string) +} + +variable "notify_prod_override" { + default = [] + description = "List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise)" + type = list(string) +} + locals { # tag related locals @@ -214,14 +232,17 @@ locals { var.runbook_link != null ? ["**Runbook**: ${var.runbook_link}"] : [] )) - alert_priority = coalesce(var.alert_critical_priority, local.monitor_alert_default_priority, "P2") - warn_priority = coalesce(var.alert_critical_priority, local.monitor_warn_default_priority, "P2") - nodata_priority = coalesce(var.alert_critical_priority, local.monitor_nodata_default_priority, "P2") + alert_priority = coalesce(var.alert_critical_priority, "P2") + warn_priority = coalesce(var.warn_priority, "P2") + nodata_priority = coalesce(var.alert_nodata_priority, "P2") notify_on_alert = join(" ", coalescelist(var.notify_alert_override, var.notify_default)) notify_on_warn = join(" ", coalescelist(var.notify_warn_override, var.notify_default)) notify_on_nodata = join(" ", coalescelist(var.notify_nodata_override, var.notify_default)) notify_on_recovery = join(" ", coalescelist(var.notify_recovery_override, var.notify_default)) + notify_on_crit = join(" ", coalescelist(var.notify_crit_override, var.notify_default)) + notify_on_nonprod = join(" ", coalescelist(var.notify_nonprod_override, var.notify_default)) + notify_on_prod = join(" ", coalescelist(var.notify_prod_override, var.notify_default)) log_alert_base_message = < Date: Wed, 16 Oct 2024 11:27:03 -0400 Subject: [PATCH 07/10] readme updates --- aws/alb/README.md | 7 +++++++ aws/apigateway/README.md | 5 +++++ aws/beanstalk/README.md | 7 +++++++ aws/ec2/README.md | 7 +++++++ aws/ecs-cluster/README.md | 7 +++++++ aws/ecs-fargate/README.md | 9 ++++++++- aws/ecs-service/README.md | 15 +++++++++++---- aws/elasticache/README.md | 10 ++++++++++ aws/elasticsearch/README.md | 12 ++++++++++-- aws/elb/README.md | 7 +++++++ aws/lambda/README.md | 20 +++++++++++++++----- aws/rds/README.md | 13 ++++++++++--- aws/sqs/README.md | 5 +++++ aws/vpn/README.md | 3 +++ 14 files changed, 112 insertions(+), 15 deletions(-) diff --git a/aws/alb/README.md b/aws/alb/README.md index f3df56a..039a2bb 100644 --- a/aws/alb/README.md +++ b/aws/alb/README.md @@ -53,16 +53,19 @@ No modules. | [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | | [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | +| [http\_5xx\_responses\_use\_message](#input\_http\_5xx\_responses\_use\_message) | Whether to use the query alert base message | `bool` | `false` | no | | [http\_5xx\_tg\_responses\_enabled](#input\_http\_5xx\_tg\_responses\_enabled) | Enable HTTP 5xx response monitor (target group) | `bool` | `false` | no | | [http\_5xx\_tg\_responses\_evaluation\_window](#input\_http\_5xx\_tg\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [http\_5xx\_tg\_responses\_no\_data\_window](#input\_http\_5xx\_tg\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [http\_5xx\_tg\_responses\_threshold\_critical](#input\_http\_5xx\_tg\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | | [http\_5xx\_tg\_responses\_threshold\_warning](#input\_http\_5xx\_tg\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | +| [http\_5xx\_tg\_responses\_use\_message](#input\_http\_5xx\_tg\_responses\_use\_message) | Whether to use the query alert base message | `bool` | `false` | no | | [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | | [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | | [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | +| [latency\_use\_message](#input\_latency\_use\_message) | Whether to use the query alert base message | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | @@ -71,10 +74,14 @@ No modules. | [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [no\_healthy\_instances\_threshold\_critical](#input\_no\_healthy\_instances\_threshold\_critical) | Critical threshold (percentage) | `number` | `0` | no | | [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage) | `number` | `null` | no | +| [no\_healthy\_instances\_use\_message](#input\_no\_healthy\_instances\_use\_message) | Whether to use the query alert base message | `bool` | `true` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | diff --git a/aws/apigateway/README.md b/aws/apigateway/README.md index 5acf4df..52cd15d 100644 --- a/aws/apigateway/README.md +++ b/aws/apigateway/README.md @@ -49,18 +49,23 @@ No modules. | [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `0.75` | no | | [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `0.25` | no | +| [http\_5xx\_responses\_use\_message](#input\_http\_5xx\_responses\_use\_message) | Whether to use the query alert base message for HTTP 5xx responses monitor | `bool` | `false` | no | | [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | | [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | | [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | +| [latency\_use\_message](#input\_latency\_use\_message) | Whether to use the query alert base message for the latency monitor | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | diff --git a/aws/beanstalk/README.md b/aws/beanstalk/README.md index 15156eb..84f314b 100644 --- a/aws/beanstalk/README.md +++ b/aws/beanstalk/README.md @@ -53,24 +53,30 @@ No modules. | [health\_no\_data\_window](#input\_health\_no\_data\_window) | No date threshold (minutes) | `number` | `20` | no | | [health\_threshold\_critical](#input\_health\_threshold\_critical) | Critical threshold (
0 = OK
1 = Info
5 = Unknown
10 = No data
15 = Warning
20 = Degraded
25 = Severe
) | `number` | `25` | no | | [health\_threshold\_warning](#input\_health\_threshold\_warning) | Warning threshold (
0 = OK
1 = Info
5 = Unknown
10 = No data
15 = Warning
20 = Degraded
25 = Severe
) | `number` | `20` | no | +| [health\_use\_message](#input\_health\_use\_message) | Whether to use the query alert base message for health monitor | `bool` | `false` | no | | [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | | [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage) | `number` | `75` | no | | [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage) | `number` | `25` | no | +| [http\_5xx\_responses\_use\_message](#input\_http\_5xx\_responses\_use\_message) | Whether to use the query alert base message for HTTP 5xx responses monitor | `bool` | `false` | no | | [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | | [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [latency\_measurement](#input\_latency\_measurement) | Latency Measurement

Valid options:
* p10
* p50
* p75
* p85
* p90
* p95
* p99
* p99\_9 | `string` | `"p50"` | no | | [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | | [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | +| [latency\_use\_message](#input\_latency\_use\_message) | Whether to use the query alert base message for latency monitor | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | @@ -79,6 +85,7 @@ No modules. | [root\_disk\_usage\_no\_data\_window](#input\_root\_disk\_usage\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [root\_disk\_usage\_threshold\_critical](#input\_root\_disk\_usage\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | | [root\_disk\_usage\_threshold\_warning](#input\_root\_disk\_usage\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | +| [root\_disk\_usage\_use\_message](#input\_root\_disk\_usage\_use\_message) | Whether to use the query alert base message for root disk usage monitor | `bool` | `false` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | diff --git a/aws/ec2/README.md b/aws/ec2/README.md index de24d42..7679e19 100644 --- a/aws/ec2/README.md +++ b/aws/ec2/README.md @@ -49,9 +49,12 @@ No modules. | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | @@ -60,15 +63,19 @@ No modules. | [status\_failed\_check\_enabled](#input\_status\_failed\_check\_enabled) | Enable ec2 instance status check monitor | `bool` | `true` | no | | [status\_failed\_check\_evaluation\_window](#input\_status\_failed\_check\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [status\_failed\_check\_no\_data\_window](#input\_status\_failed\_check\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [status\_failed\_check\_use\_message](#input\_status\_failed\_check\_use\_message) | Whether to use the query alert base message for ec2 instance status check monitor | `bool` | `false` | no | | [status\_failed\_instance\_enabled](#input\_status\_failed\_instance\_enabled) | Enable instance status check monitor | `bool` | `true` | no | | [status\_failed\_instance\_evaluation\_window](#input\_status\_failed\_instance\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [status\_failed\_instance\_no\_data\_window](#input\_status\_failed\_instance\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [status\_failed\_instance\_use\_message](#input\_status\_failed\_instance\_use\_message) | Whether to use the query alert base message for instance status check monitor | `bool` | `false` | no | | [status\_failed\_system\_enabled](#input\_status\_failed\_system\_enabled) | Enable instance system failure monitor | `bool` | `true` | no | | [status\_failed\_system\_evaluation\_window](#input\_status\_failed\_system\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [status\_failed\_system\_no\_data\_window](#input\_status\_failed\_system\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [status\_failed\_system\_use\_message](#input\_status\_failed\_system\_use\_message) | Whether to use the query alert base message for instance system failure monitor | `bool` | `false` | no | | [status\_failed\_volume\_enabled](#input\_status\_failed\_volume\_enabled) | Enable attached volume status monitor | `bool` | `true` | no | | [status\_failed\_volume\_evaluation\_window](#input\_status\_failed\_volume\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [status\_failed\_volume\_no\_data\_window](#input\_status\_failed\_volume\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [status\_failed\_volume\_use\_message](#input\_status\_failed\_volume\_use\_message) | Whether to use the query alert base message for attached volume status monitor | `bool` | `false` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | | [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | diff --git a/aws/ecs-cluster/README.md b/aws/ecs-cluster/README.md index 99e2faf..cdbab68 100644 --- a/aws/ecs-cluster/README.md +++ b/aws/ecs-cluster/README.md @@ -44,6 +44,7 @@ No modules. | [agent\_status\_no\_data\_window](#input\_agent\_status\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [agent\_status\_threshold\_critical](#input\_agent\_status\_threshold\_critical) | Critical threshold | `number` | `5` | no | | [agent\_status\_threshold\_warning](#input\_agent\_status\_threshold\_warning) | Warning threshold | `number` | `3` | no | +| [agent\_status\_use\_message](#input\_agent\_status\_use\_message) | Whether to use the query alert base message for agent status monitor | `bool` | `false` | no | | [alert\_critical\_priority](#input\_alert\_critical\_priority) | Priority for alerts within critical threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | | [alert\_message](#input\_alert\_message) | Message to prepend to alert notifications | `string` | `"Alert"` | no | | [alert\_nodata\_priority](#input\_alert\_nodata\_priority) | Priority for alerts within warning threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | @@ -59,11 +60,13 @@ No modules. | [cpu\_utilization\_anomaly\_threshold\_critical](#input\_cpu\_utilization\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_threshold\_warning](#input\_cpu\_utilization\_anomaly\_threshold\_warning) | Warning threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_trigger\_window](#input\_cpu\_utilization\_anomaly\_trigger\_window) | Trigger window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [cpu\_utilization\_anomaly\_use\_message](#input\_cpu\_utilization\_anomaly\_use\_message) | Whether to use the query alert base message for CPU utilization anomaly monitor | `bool` | `false` | no | | [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Enable cluster CPU utilization monitor | `bool` | `false` | no | | [cpu\_utilization\_evaluation\_window](#input\_cpu\_utilization\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cpu\_utilization\_no\_data\_window](#input\_cpu\_utilization\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | | [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | +| [cpu\_utilization\_use\_message](#input\_cpu\_utilization\_use\_message) | Whether to use the query alert base message for CPU utilization monitor | `bool` | `false` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | @@ -72,13 +75,17 @@ No modules. | [memory\_reservation\_no\_data\_window](#input\_memory\_reservation\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [memory\_reservation\_threshold\_critical](#input\_memory\_reservation\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | | [memory\_reservation\_threshold\_warning](#input\_memory\_reservation\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | +| [memory\_reservation\_use\_message](#input\_memory\_reservation\_use\_message) | Whether to use the query alert base message for memory reservation monitor | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | diff --git a/aws/ecs-fargate/README.md b/aws/ecs-fargate/README.md index eefe028..9977961 100644 --- a/aws/ecs-fargate/README.md +++ b/aws/ecs-fargate/README.md @@ -54,32 +54,39 @@ No modules. | [cpu\_utilization\_anomaly\_threshold\_critical](#input\_cpu\_utilization\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_threshold\_warning](#input\_cpu\_utilization\_anomaly\_threshold\_warning) | Warning threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_trigger\_window](#input\_cpu\_utilization\_anomaly\_trigger\_window) | Trigger window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [cpu\_utilization\_anomaly\_use\_message](#input\_cpu\_utilization\_anomaly\_use\_message) | Whether to use the query alert base message for CPU utilization anomaly monitor | `bool` | `false` | no | | [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Enable Fargate task CPU utilization monitor | `bool` | `false` | no | | [cpu\_utilization\_evaluation\_window](#input\_cpu\_utilization\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cpu\_utilization\_no\_data\_window](#input\_cpu\_utilization\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | | [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | +| [cpu\_utilization\_use\_message](#input\_cpu\_utilization\_use\_message) | Whether to use the query alert base message for CPU utilization monitor | `bool` | `false` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | -| [fargate\_check\_enabled](#input\_fargate\_check\_enabled) | Enable Fargate check monitor | `bool` | `false` | no | +| [fargate\_check\_enabled](#input\_fargate\_check\_enabled) | Enable Fargate check monitor | `bool` | `true` | no | | [fargate\_check\_evaluation\_window](#input\_fargate\_check\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [fargate\_check\_group\_by](#input\_fargate\_check\_group\_by) | Tag to group alerts by (will result in multiple alerts being generated based on tag cardinality) | `string` | `"*"` | no | | [fargate\_check\_no\_data\_window](#input\_fargate\_check\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [fargate\_check\_threshold\_critical](#input\_fargate\_check\_threshold\_critical) | Critical threshold | `number` | `5` | no | | [fargate\_check\_threshold\_warning](#input\_fargate\_check\_threshold\_warning) | Warning threshold | `number` | `3` | no | +| [fargate\_check\_use\_message](#input\_fargate\_check\_use\_message) | Whether to use the query alert base message for Fargate check monitor | `bool` | `false` | no | | [memory\_utilization\_enabled](#input\_memory\_utilization\_enabled) | Enable Fargate task memory utilization monitor | `bool` | `false` | no | | [memory\_utilization\_evaluation\_window](#input\_memory\_utilization\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | | [memory\_utilization\_no\_data\_window](#input\_memory\_utilization\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [memory\_utilization\_threshold\_critical](#input\_memory\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | | [memory\_utilization\_threshold\_warning](#input\_memory\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | +| [memory\_utilization\_use\_message](#input\_memory\_utilization\_use\_message) | Whether to use the query alert base message for memory utilization monitor | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | diff --git a/aws/ecs-service/README.md b/aws/ecs-service/README.md index daa61ab..c7db7ba 100644 --- a/aws/ecs-service/README.md +++ b/aws/ecs-service/README.md @@ -51,14 +51,16 @@ No modules. | [cpu\_utilization\_anomaly\_recovery\_window](#input\_cpu\_utilization\_anomaly\_recovery\_window) | Recovery window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | | [cpu\_utilization\_anomaly\_rollup](#input\_cpu\_utilization\_anomaly\_rollup) | Rollup interval (must be sized based on evaluation window/span and seasonaility) | `number` | `60` | no | | [cpu\_utilization\_anomaly\_seasonality](#input\_cpu\_utilization\_anomaly\_seasonality) | Seasonaility (hourly, daily, weekly) | `string` | `"weekly"` | no | -| [cpu\_utilization\_anomaly\_threshold\_critical](#input\_cpu\_utilization\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `null` | no | +| [cpu\_utilization\_anomaly\_threshold\_critical](#input\_cpu\_utilization\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `0.75` | no | | [cpu\_utilization\_anomaly\_threshold\_warning](#input\_cpu\_utilization\_anomaly\_threshold\_warning) | Warning threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_trigger\_window](#input\_cpu\_utilization\_anomaly\_trigger\_window) | Trigger window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | -| [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Enable Fargate task CPU utilization monitor | `bool` | `false` | no | +| [cpu\_utilization\_anomaly\_use\_message](#input\_cpu\_utilization\_anomaly\_use\_message) | Whether to use the query alert base message for CPU utilization anomaly monitor | `bool` | `false` | no | +| [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Enable Fargate task CPU utilization monitor | `bool` | `true` | no | | [cpu\_utilization\_evaluation\_window](#input\_cpu\_utilization\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cpu\_utilization\_no\_data\_window](#input\_cpu\_utilization\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `string` | `90` | no | | [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | +| [cpu\_utilization\_use\_message](#input\_cpu\_utilization\_use\_message) | Whether to use the query alert base message for CPU utilization monitor | `bool` | `false` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | @@ -67,22 +69,27 @@ No modules. | [memory\_utilization\_no\_data\_window](#input\_memory\_utilization\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [memory\_utilization\_threshold\_critical](#input\_memory\_utilization\_threshold\_critical) | Critical threshold (percent) | `string` | `0.9` | no | | [memory\_utilization\_threshold\_warning](#input\_memory\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `0.8` | no | +| [memory\_utilization\_use\_message](#input\_memory\_utilization\_use\_message) | Whether to use the query alert base message for memory utilization monitor | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | -| [running\_tasks\_enabled](#input\_running\_tasks\_enabled) | Enable running tasks monitor | `bool` | `false` | no | +| [running\_tasks\_enabled](#input\_running\_tasks\_enabled) | Enable running tasks monitor | `bool` | `true` | no | | [running\_tasks\_evaluation\_window](#input\_running\_tasks\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [running\_tasks\_no\_data\_window](#input\_running\_tasks\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [running\_tasks\_threshold\_critical](#input\_running\_tasks\_threshold\_critical) | Critical threshold (percentage) | `number` | `0.25` | no | +| [running\_tasks\_threshold\_critical](#input\_running\_tasks\_threshold\_critical) | Critical threshold (percentage) | `number` | `0.5` | no | | [running\_tasks\_threshold\_warning](#input\_running\_tasks\_threshold\_warning) | Warning threshold (percentage) | `number` | `null` | no | +| [running\_tasks\_use\_message](#input\_running\_tasks\_use\_message) | Whether to use the query alert base message for running tasks monitor | `bool` | `true` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | diff --git a/aws/elasticache/README.md b/aws/elasticache/README.md index 086b2e9..67890f6 100644 --- a/aws/elasticache/README.md +++ b/aws/elasticache/README.md @@ -62,11 +62,13 @@ No modules. | [cpu\_utilization\_anomaly\_threshold\_critical](#input\_cpu\_utilization\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_threshold\_warning](#input\_cpu\_utilization\_anomaly\_threshold\_warning) | Warning threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_trigger\_window](#input\_cpu\_utilization\_anomaly\_trigger\_window) | Trigger window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [cpu\_utilization\_anomaly\_use\_message](#input\_cpu\_utilization\_anomaly\_use\_message) | Whether to use the query alert base message for CPU utilization anomaly monitor | `bool` | `false` | no | | [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Enable CPU utilization monitor | `bool` | `false` | no | | [cpu\_utilization\_evaluation\_window](#input\_cpu\_utilization\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cpu\_utilization\_no\_data\_window](#input\_cpu\_utilization\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | | [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | +| [cpu\_utilization\_use\_message](#input\_cpu\_utilization\_use\_message) | Whether to use the query alert base message for CPU utilization monitor | `bool` | `false` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | @@ -75,29 +77,36 @@ No modules. | [evictions\_no\_data\_window](#input\_evictions\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [evictions\_threshold\_critical](#input\_evictions\_threshold\_critical) | Critical threshold (count) | `number` | `null` | no | | [evictions\_threshold\_warning](#input\_evictions\_threshold\_warning) | Warning threshold (count) | `number` | `null` | no | +| [evictions\_use\_message](#input\_evictions\_use\_message) | Whether to use the query alert base message for evictions monitor | `bool` | `false` | no | | [hit\_rate\_anomaly\_deviations](#input\_hit\_rate\_anomaly\_deviations) | Standard deviations | `number` | `2` | no | | [hit\_rate\_anomaly\_enabled](#input\_hit\_rate\_anomaly\_enabled) | Enable cache hit rate anomaly monitor | `bool` | `false` | no | | [hit\_rate\_anomaly\_evaluation\_window](#input\_hit\_rate\_anomaly\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | | [hit\_rate\_anomaly\_no\_data\_window](#input\_hit\_rate\_anomaly\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [hit\_rate\_anomaly\_seasonality](#input\_hit\_rate\_anomaly\_seasonality) | Seasonaility (hourly, daily, weekly) | `string` | `"daily"` | no | | [hit\_rate\_anomaly\_threshold\_critical](#input\_hit\_rate\_anomaly\_threshold\_critical) | Critical threshold (percentage) | `number` | `null` | no | +| [hit\_rate\_anomaly\_use\_message](#input\_hit\_rate\_anomaly\_use\_message) | Whether to use the query alert base message for hit rate anomaly monitor | `bool` | `false` | no | | [hit\_rate\_enabled](#input\_hit\_rate\_enabled) | Enable cache hit rate monitor | `bool` | `false` | no | | [hit\_rate\_evaluation\_window](#input\_hit\_rate\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [hit\_rate\_no\_data\_window](#input\_hit\_rate\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [hit\_rate\_threshold\_critical](#input\_hit\_rate\_threshold\_critical) | Critical threshold (percentage) | `number` | `null` | no | | [hit\_rate\_threshold\_warning](#input\_hit\_rate\_threshold\_warning) | Warning threshold (percentage) | `number` | `null` | no | +| [hit\_rate\_use\_message](#input\_hit\_rate\_use\_message) | Whether to use the query alert base message for hit rate monitor | `bool` | `false` | no | | [max\_connections\_enabled](#input\_max\_connections\_enabled) | Enable max connections monitor | `bool` | `false` | no | | [max\_connections\_evaluation\_window](#input\_max\_connections\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [max\_connections\_no\_data\_window](#input\_max\_connections\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [max\_connections\_threshold\_critical](#input\_max\_connections\_threshold\_critical) | Critical threshold (connections) | `number` | `64000` | no | | [max\_connections\_threshold\_warning](#input\_max\_connections\_threshold\_warning) | Warning threshold (connections) | `number` | `60000` | no | +| [max\_connections\_use\_message](#input\_max\_connections\_use\_message) | Whether to use the query alert base message for max connections monitor | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | @@ -108,6 +117,7 @@ No modules. | [swap\_usage\_no\_data\_window](#input\_swap\_usage\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [swap\_usage\_threshold\_critical](#input\_swap\_usage\_threshold\_critical) | Critical threshold (bytes) | `number` | `52428800` | no | | [swap\_usage\_threshold\_warning](#input\_swap\_usage\_threshold\_warning) | Warning threshold (bytes) | `number` | `null` | no | +| [swap\_usage\_use\_message](#input\_swap\_usage\_use\_message) | Whether to use the query alert base message for swap usage monitor | `bool` | `false` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | | [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | diff --git a/aws/elasticsearch/README.md b/aws/elasticsearch/README.md index ddac429..20ad716 100644 --- a/aws/elasticsearch/README.md +++ b/aws/elasticsearch/README.md @@ -48,9 +48,11 @@ No modules. | [cluster\_health\_red\_enabled](#input\_cluster\_health\_red\_enabled) | Enable cluster health\_red monitor | `bool` | `true` | no | | [cluster\_health\_red\_evaluation\_window](#input\_cluster\_health\_red\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cluster\_health\_red\_no\_data\_window](#input\_cluster\_health\_red\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [cluster\_health\_red\_use\_message](#input\_cluster\_health\_red\_use\_message) | Whether to use the query alert base message for cluster health red monitor | `bool` | `true` | no | | [cluster\_health\_yellow\_enabled](#input\_cluster\_health\_yellow\_enabled) | Enable cluster health monitor | `bool` | `true` | no | | [cluster\_health\_yellow\_evaluation\_window](#input\_cluster\_health\_yellow\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cluster\_health\_yellow\_no\_data\_window](#input\_cluster\_health\_yellow\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | +| [cluster\_health\_yellow\_use\_message](#input\_cluster\_health\_yellow\_use\_message) | Whether to use the query alert base message for cluster health yellow monitor | `bool` | `false` | no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [cpu\_utilization\_anomaly\_deviations](#input\_cpu\_utilization\_anomaly\_deviations) | Standard deviations | `number` | `4` | no | | [cpu\_utilization\_anomaly\_enabled](#input\_cpu\_utilization\_anomaly\_enabled) | Enable CPU utilization anomaly monitor | `bool` | `false` | no | @@ -62,26 +64,32 @@ No modules. | [cpu\_utilization\_anomaly\_threshold\_critical](#input\_cpu\_utilization\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_threshold\_warning](#input\_cpu\_utilization\_anomaly\_threshold\_warning) | Warning threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_trigger\_window](#input\_cpu\_utilization\_anomaly\_trigger\_window) | Trigger window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [cpu\_utilization\_anomaly\_use\_message](#input\_cpu\_utilization\_anomaly\_use\_message) | Whether to use the query alert base message for CPU utilization anomaly monitor | `bool` | `false` | no | | [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Enable CPU utilization monitor | `bool` | `false` | no | | [cpu\_utilization\_evaluation\_window](#input\_cpu\_utilization\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cpu\_utilization\_no\_data\_window](#input\_cpu\_utilization\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `0.9` | no | | [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `0.8` | no | +| [cpu\_utilization\_use\_message](#input\_cpu\_utilization\_use\_message) | Whether to use the query alert base message for CPU utilization monitor | `bool` | `false` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [free\_storage\_enabled](#input\_free\_storage\_enabled) | Enable free storage monitor | `bool` | `true` | no | | [free\_storage\_evaluation\_window](#input\_free\_storage\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [free\_storage\_no\_data\_window](#input\_free\_storage\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | -| [free\_storage\_threshold\_critical](#input\_free\_storage\_threshold\_critical) | Critical threshold (%) | `number` | `90` | no | -| [free\_storage\_threshold\_warning](#input\_free\_storage\_threshold\_warning) | Warning threshold (%) | `number` | `80` | no | +| [free\_storage\_threshold\_critical](#input\_free\_storage\_threshold\_critical) | Critical threshold for used disk space (%) | `number` | `90` | no | +| [free\_storage\_threshold\_warning](#input\_free\_storage\_threshold\_warning) | Warning threshold for used disk space (%) | `number` | `80` | no | +| [free\_storage\_use\_message](#input\_free\_storage\_use\_message) | Whether to use the query alert base message for free storage monitor | `bool` | `true` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | diff --git a/aws/elb/README.md b/aws/elb/README.md index 776a272..a0edca2 100644 --- a/aws/elb/README.md +++ b/aws/elb/README.md @@ -53,16 +53,19 @@ No modules. | [http\_5xx\_backend\_responses\_no\_data\_window](#input\_http\_5xx\_backend\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [http\_5xx\_backend\_responses\_threshold\_critical](#input\_http\_5xx\_backend\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | | [http\_5xx\_backend\_responses\_threshold\_warning](#input\_http\_5xx\_backend\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | +| [http\_5xx\_backend\_responses\_use\_message](#input\_http\_5xx\_backend\_responses\_use\_message) | Whether to use the query alert base message for HTTP 5xx backend responses monitor | `bool` | `false` | no | | [http\_5xx\_responses\_enabled](#input\_http\_5xx\_responses\_enabled) | Enable HTTP 5xx response monitor | `bool` | `false` | no | | [http\_5xx\_responses\_evaluation\_window](#input\_http\_5xx\_responses\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [http\_5xx\_responses\_no\_data\_window](#input\_http\_5xx\_responses\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [http\_5xx\_responses\_threshold\_critical](#input\_http\_5xx\_responses\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | | [http\_5xx\_responses\_threshold\_warning](#input\_http\_5xx\_responses\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | +| [http\_5xx\_responses\_use\_message](#input\_http\_5xx\_responses\_use\_message) | Whether to use the query alert base message for HTTP 5xx responses monitor | `bool` | `false` | no | | [latency\_enabled](#input\_latency\_enabled) | Enable latency monitor | `bool` | `false` | no | | [latency\_evaluation\_window](#input\_latency\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [latency\_no\_data\_window](#input\_latency\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [latency\_threshold\_critical](#input\_latency\_threshold\_critical) | Critical threshold (seconds) | `number` | `null` | no | | [latency\_threshold\_warning](#input\_latency\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | +| [latency\_use\_message](#input\_latency\_use\_message) | Whether to use the query alert base message for latency monitor | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | @@ -71,10 +74,14 @@ No modules. | [no\_healthy\_instances\_no\_data\_window](#input\_no\_healthy\_instances\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [no\_healthy\_instances\_threshold\_critical](#input\_no\_healthy\_instances\_threshold\_critical) | Warning threshold (percentage) | `number` | `0` | no | | [no\_healthy\_instances\_threshold\_warning](#input\_no\_healthy\_instances\_threshold\_warning) | Warning threshold (percentage) | `number` | `null` | no | +| [no\_healthy\_instances\_use\_message](#input\_no\_healthy\_instances\_use\_message) | Whether to use the query alert base message for no healthy instances monitor | `bool` | `true` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | diff --git a/aws/lambda/README.md b/aws/lambda/README.md index a7403c1..77489b5 100644 --- a/aws/lambda/README.md +++ b/aws/lambda/README.md @@ -55,52 +55,62 @@ No modules. | [cold\_starts\_no\_data\_window](#input\_cold\_starts\_no\_data\_window) | No data threshold (in minutes, null to disable) | `number` | `null` | no | | [cold\_starts\_threshold\_critical](#input\_cold\_starts\_threshold\_critical) | Critical threshold (count) | `number` | `null` | no | | [cold\_starts\_threshold\_warning](#input\_cold\_starts\_threshold\_warning) | Warning threshold (count) | `number` | `null` | no | +| [cold\_starts\_use\_message](#input\_cold\_starts\_use\_message) | Whether to use the query alert base message for cold starts monitor | `bool` | `false` | no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | -| [error\_rate\_enabled](#input\_error\_rate\_enabled) | Enable Lambda error rate monitor | `bool` | `false` | no | +| [error\_rate\_enabled](#input\_error\_rate\_enabled) | Enable Lambda error rate monitor | `bool` | `true` | no | | [error\_rate\_evaluation\_window](#input\_error\_rate\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [error\_rate\_no\_data\_window](#input\_error\_rate\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [error\_rate\_threshold\_critical](#input\_error\_rate\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | | [error\_rate\_threshold\_warning](#input\_error\_rate\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | +| [error\_rate\_use\_message](#input\_error\_rate\_use\_message) | Whether to use the query alert base message for error rate monitor | `bool` | `true` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | | [iterator\_age\_enabled](#input\_iterator\_age\_enabled) | Enable iterator age monitor | `bool` | `false` | no | | [iterator\_age\_evaluation\_window](#input\_iterator\_age\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | | [iterator\_age\_forecast\_enabled](#input\_iterator\_age\_forecast\_enabled) | Enable iterator age monitor | `bool` | `false` | no | | [iterator\_age\_forecast\_evaluation\_window](#input\_iterator\_age\_forecast\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1d"` | no | | [iterator\_age\_forecast\_no\_data\_window](#input\_iterator\_age\_forecast\_no\_data\_window) | No data threshold (in minutes, null to disable) | `number` | `null` | no | +| [iterator\_age\_forecast\_use\_message](#input\_iterator\_age\_forecast\_use\_message) | Whether to use the query alert base message for iterator age forecast monitor | `bool` | `false` | no | | [iterator\_age\_no\_data\_window](#input\_iterator\_age\_no\_data\_window) | No data threshold (in minutes, null to disable) | `number` | `null` | no | | [iterator\_age\_threshold\_critical](#input\_iterator\_age\_threshold\_critical) | Critical threshold (milliseconds) | `number` | `86400000` | no | | [iterator\_age\_threshold\_warning](#input\_iterator\_age\_threshold\_warning) | Warning threshold (milliseconds) | `number` | `null` | no | +| [iterator\_age\_use\_message](#input\_iterator\_age\_use\_message) | Whether to use the query alert base message for iterator age monitor | `bool` | `false` | no | | [monitor\_exclude\_tags](#input\_monitor\_exclude\_tags) | Tags to be excluded in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | -| [out\_of\_memory\_enabled](#input\_out\_of\_memory\_enabled) | Enable out of memory monitor (requires enhanced metrics) | `bool` | `false` | no | +| [out\_of\_memory\_enabled](#input\_out\_of\_memory\_enabled) | Enable out of memory monitor (requires enhanced metrics) | `bool` | `true` | no | | [out\_of\_memory\_evaluation\_window](#input\_out\_of\_memory\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_4h"` | no | | [out\_of\_memory\_no\_data\_window](#input\_out\_of\_memory\_no\_data\_window) | No data threshold (in minutes, null to disable) | `number` | `null` | no | -| [out\_of\_memory\_threshold\_critical](#input\_out\_of\_memory\_threshold\_critical) | Critical threshold (count) | `number` | `null` | no | +| [out\_of\_memory\_threshold\_critical](#input\_out\_of\_memory\_threshold\_critical) | Critical threshold (count) | `number` | `5` | no | | [out\_of\_memory\_threshold\_warning](#input\_out\_of\_memory\_threshold\_warning) | Warning threshold (count) | `number` | `null` | no | +| [out\_of\_memory\_use\_message](#input\_out\_of\_memory\_use\_message) | Whether to use the query alert base message for out of memory monitor | `bool` | `false` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [team](#input\_team) | Team supporting the monitored resource (leave blank to omit tag) | `string` | `null` | no | -| [throttle\_rate\_enabled](#input\_throttle\_rate\_enabled) | Enable Lambda throttle rate monitor | `bool` | `false` | no | +| [throttle\_rate\_enabled](#input\_throttle\_rate\_enabled) | Enable Lambda throttle rate monitor | `bool` | `true` | no | | [throttle\_rate\_evaluation\_window](#input\_throttle\_rate\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [throttle\_rate\_no\_data\_window](#input\_throttle\_rate\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [throttle\_rate\_threshold\_critical](#input\_throttle\_rate\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `75` | no | | [throttle\_rate\_threshold\_warning](#input\_throttle\_rate\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `25` | no | +| [throttle\_rate\_use\_message](#input\_throttle\_rate\_use\_message) | Whether to use the query alert base message for throttle rate monitor | `bool` | `false` | no | | [timeout\_h](#input\_timeout\_h) | Auto-resolve alert in specified hours if condition no longer matches | `number` | `0` | no | -| [timeouts\_enabled](#input\_timeouts\_enabled) | Enable timeout count monitor | `bool` | `false` | no | +| [timeouts\_enabled](#input\_timeouts\_enabled) | Enable timeout count monitor | `bool` | `true` | no | | [timeouts\_evaluation\_window](#input\_timeouts\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [timeouts\_no\_data\_window](#input\_timeouts\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [timeouts\_threshold\_critical](#input\_timeouts\_threshold\_critical) | Critical threshold (count) | `number` | `75` | no | | [timeouts\_threshold\_warning](#input\_timeouts\_threshold\_warning) | Warning threshold (count) | `number` | `25` | no | +| [timeouts\_use\_message](#input\_timeouts\_use\_message) | Whether to use the query alert base message for timeouts monitor | `bool` | `false` | no | | [title\_prefix](#input\_title\_prefix) | Prefix all alerts with specified value in brackets | `string` | `null` | no | | [title\_suffix](#input\_title\_suffix) | Suffix all alerts with specified value in parenthesis | `string` | `null` | no | | [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | diff --git a/aws/rds/README.md b/aws/rds/README.md index 2f3e192..130995c 100644 --- a/aws/rds/README.md +++ b/aws/rds/README.md @@ -46,15 +46,16 @@ No modules. | [alert\_nodata\_priority](#input\_alert\_nodata\_priority) | Priority for alerts within warning threshold (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | | [base\_tags](#input\_base\_tags) | Base tags (key:value format) to add to this type of check (combined with `local.tags` and `var.additional_tags`, generally you should not change this) | `list(string)` |
[
"resource:rds"
]
| no | | [connection\_count\_anomaly\_deviations](#input\_connection\_count\_anomaly\_deviations) | Standard deviations | `number` | `3` | no | -| [connection\_count\_anomaly\_enabled](#input\_connection\_count\_anomaly\_enabled) | Enable CPU utilization anomaly monitor | `bool` | `false` | no | +| [connection\_count\_anomaly\_enabled](#input\_connection\_count\_anomaly\_enabled) | Enable CPU utilization anomaly monitor | `bool` | `true` | no | | [connection\_count\_anomaly\_evaluation\_window](#input\_connection\_count\_anomaly\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | | [connection\_count\_anomaly\_no\_data\_window](#input\_connection\_count\_anomaly\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [connection\_count\_anomaly\_recovery\_window](#input\_connection\_count\_anomaly\_recovery\_window) | Recovery window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | | [connection\_count\_anomaly\_rollup](#input\_connection\_count\_anomaly\_rollup) | Rollup interval (must be sized based on evaluation window/span and seasonaility) | `number` | `60` | no | | [connection\_count\_anomaly\_seasonality](#input\_connection\_count\_anomaly\_seasonality) | Seasonaility (hourly, daily, weekly) | `string` | `"weekly"` | no | -| [connection\_count\_anomaly\_threshold\_critical](#input\_connection\_count\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `null` | no | +| [connection\_count\_anomaly\_threshold\_critical](#input\_connection\_count\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `0.75` | no | | [connection\_count\_anomaly\_threshold\_warning](#input\_connection\_count\_anomaly\_threshold\_warning) | Warning threshold (percent) | `number` | `null` | no | | [connection\_count\_anomaly\_trigger\_window](#input\_connection\_count\_anomaly\_trigger\_window) | Trigger window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | +| [connection\_count\_anomaly\_use\_message](#input\_connection\_count\_anomaly\_use\_message) | Whether to use the query alert base message for connection count anomaly monitor | `bool` | `true` | no | | [cost\_center](#input\_cost\_center) | Cost Center of the monitored resource (leave blank to omit tag) | `string` | `null` | no | | [cpu\_utilization\_anomaly\_deviations](#input\_cpu\_utilization\_anomaly\_deviations) | Standard deviations | `number` | `4` | no | | [cpu\_utilization\_anomaly\_enabled](#input\_cpu\_utilization\_anomaly\_enabled) | Enable CPU utilization anomaly monitor | `bool` | `false` | no | @@ -66,11 +67,13 @@ No modules. | [cpu\_utilization\_anomaly\_threshold\_critical](#input\_cpu\_utilization\_anomaly\_threshold\_critical) | Critical threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_threshold\_warning](#input\_cpu\_utilization\_anomaly\_threshold\_warning) | Warning threshold (percent) | `number` | `null` | no | | [cpu\_utilization\_anomaly\_trigger\_window](#input\_cpu\_utilization\_anomaly\_trigger\_window) | Trigger window for anomaly monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_1h"` | no | -| [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Enable CPU utilization monitor | `bool` | `false` | no | +| [cpu\_utilization\_anomaly\_use\_message](#input\_cpu\_utilization\_anomaly\_use\_message) | Whether to use the query alert base message for CPU utilization anomaly monitor | `bool` | `false` | no | +| [cpu\_utilization\_enabled](#input\_cpu\_utilization\_enabled) | Enable CPU utilization monitor | `bool` | `true` | no | | [cpu\_utilization\_evaluation\_window](#input\_cpu\_utilization\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [cpu\_utilization\_no\_data\_window](#input\_cpu\_utilization\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [cpu\_utilization\_threshold\_critical](#input\_cpu\_utilization\_threshold\_critical) | Critical threshold (percent) | `number` | `90` | no | | [cpu\_utilization\_threshold\_warning](#input\_cpu\_utilization\_threshold\_warning) | Warning threshold (percent) | `number` | `80` | no | +| [cpu\_utilization\_use\_message](#input\_cpu\_utilization\_use\_message) | Whether to use the query alert base message for CPU utilization monitor | `bool` | `false` | no | | [dashboard\_link](#input\_dashboard\_link) | Dashboard link to include in message | `string` | `null` | no | | [env](#input\_env) | Environment the monitored resource is in (leave blank to omit tag) | `string` | `null` | no | | [evaluation\_delay](#input\_evaluation\_delay) | Monitor evaluation delay (see [https://docs.datadoghq.com/monitors/configuration/?tab=thresholdalert#set-alert-conditions](Datadog Docs)) | `number` | `900` | no | @@ -78,9 +81,12 @@ No modules. | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | @@ -95,6 +101,7 @@ No modules. | [used\_storage\_no\_data\_window](#input\_used\_storage\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [used\_storage\_threshold\_critical](#input\_used\_storage\_threshold\_critical) | Critical threshold (percentage, 0-100) | `number` | `90` | no | | [used\_storage\_threshold\_warning](#input\_used\_storage\_threshold\_warning) | Warning threshold (percentage, 0-100) | `number` | `80` | no | +| [used\_storage\_use\_message](#input\_used\_storage\_use\_message) | Whether to use the query alert base message for used storage monitor | `bool` | `true` | no | | [warn\_priority](#input\_warn\_priority) | Priority for alerts with no data (P1-P5, uses monitor defaults if not specified) | `string` | `null` | no | ## Outputs diff --git a/aws/sqs/README.md b/aws/sqs/README.md index abd566f..2d27fa4 100644 --- a/aws/sqs/README.md +++ b/aws/sqs/README.md @@ -48,9 +48,12 @@ No modules. | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [oldest\_message\_enabled](#input\_oldest\_message\_enabled) | Enable oldest queued message monitor | `bool` | `false` | no | @@ -58,11 +61,13 @@ No modules. | [oldest\_message\_no\_data\_window](#input\_oldest\_message\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [oldest\_message\_threshold\_critical](#input\_oldest\_message\_threshold\_critical) | Critical threshold (seconds) | `number` | `75` | no | | [oldest\_message\_threshold\_warning](#input\_oldest\_message\_threshold\_warning) | Warning threshold (seconds) | `number` | `null` | no | +| [oldest\_message\_use\_message](#input\_oldest\_message\_use\_message) | Whether to use the query alert base message for oldest message monitor | `bool` | `false` | no | | [queue\_depth\_enabled](#input\_queue\_depth\_enabled) | Enable queue depth count monitor | `bool` | `false` | no | | [queue\_depth\_evaluation\_window](#input\_queue\_depth\_evaluation\_window) | Evaluation window for monitor (`last_?m` (1, 5, 10, 15, or 30), `last_?h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | | [queue\_depth\_no\_data\_window](#input\_queue\_depth\_no\_data\_window) | No data threshold (in minutes, 0 to disable) | `number` | `10` | no | | [queue\_depth\_threshold\_critical](#input\_queue\_depth\_threshold\_critical) | Critical threshold (count) | `number` | `null` | no | | [queue\_depth\_threshold\_warning](#input\_queue\_depth\_threshold\_warning) | Warning threshold (count) | `number` | `null` | no | +| [queue\_depth\_use\_message](#input\_queue\_depth\_use\_message) | Whether to use the query alert base message for queue depth monitor | `bool` | `false` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | | [runbook\_link](#input\_runbook\_link) | Runbook link to include in message | `string` | `null` | no | | [service](#input\_service) | Service associated with the monitored resource (leave blank to omit tag) | `string` | `null` | no | diff --git a/aws/vpn/README.md b/aws/vpn/README.md index 9b9bab1..662a44a 100644 --- a/aws/vpn/README.md +++ b/aws/vpn/README.md @@ -44,9 +44,12 @@ No modules. | [monitor\_include\_tags](#input\_monitor\_include\_tags) | Tags to be included in the monitoring query. Specify in key:value format | `list(string)` | `[]` | no | | [new\_group\_delay](#input\_new\_group\_delay) | Delay in seconds before generating alerts for a new resource | `number` | `300` | no | | [notify\_alert\_override](#input\_notify\_alert\_override) | List of notifications for alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_crit\_override](#input\_notify\_crit\_override) | List of notifications for 24x7 alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_default](#input\_notify\_default) | List of alert notifications (can be overridden based on alert type) | `list(string)` | n/a | yes | | [notify\_no\_data](#input\_notify\_no\_data) | Alert if no matching data is found | `bool` | `false` | no | | [notify\_nodata\_override](#input\_notify\_nodata\_override) | List of notifications for no data (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_nonprod\_override](#input\_notify\_nonprod\_override) | List of notifications for non-prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | +| [notify\_prod\_override](#input\_notify\_prod\_override) | List of notifications for 12x5 prod alerts in critical threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_recovery\_override](#input\_notify\_recovery\_override) | List of notifications for alert recovery (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [notify\_warn\_override](#input\_notify\_warn\_override) | List of notifications for alerts in warning threshold (uses `notify_default` otherwise) | `list(string)` | `[]` | no | | [renotify\_interval](#input\_renotify\_interval) | Interval in minutes to re-send notifications about an alert | `number` | `0` | no | From 2dcfd553e3c0dbd8502d7deab51d765968c8f717 Mon Sep 17 00:00:00 2001 From: Kevin Date: Thu, 17 Oct 2024 11:03:02 -0400 Subject: [PATCH 08/10] changing some defaults --- aws/alb/variables.tf | 6 +++--- aws/elasticsearch/variables.tf | 2 +- common/common.tf | 6 +----- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/aws/alb/variables.tf b/aws/alb/variables.tf index 0637625..dff2e8d 100644 --- a/aws/alb/variables.tf +++ b/aws/alb/variables.tf @@ -17,7 +17,7 @@ variable "base_tags" { # HTTP 5xx Response Codes (ALB) ######################################## variable "http_5xx_responses_enabled" { - default = false + default = true description = "Enable HTTP 5xx response monitor" type = bool } @@ -56,7 +56,7 @@ variable "http_5xx_responses_use_message" { # HTTP 5xx Response Codes (Target Group) ######################################## variable "http_5xx_tg_responses_enabled" { - default = false + default = true description = "Enable HTTP 5xx response monitor (target group)" type = bool } @@ -95,7 +95,7 @@ variable "http_5xx_tg_responses_use_message" { # Latency Instances ######################################## variable "latency_enabled" { - default = false + default = true description = "Enable latency monitor" type = bool } diff --git a/aws/elasticsearch/variables.tf b/aws/elasticsearch/variables.tf index 9cc6ab5..d251705 100644 --- a/aws/elasticsearch/variables.tf +++ b/aws/elasticsearch/variables.tf @@ -71,7 +71,7 @@ variable "cluster_health_yellow_use_message" { # Node CPU Utilization ######################################## variable "cpu_utilization_enabled" { - default = false + default = true description = "Enable CPU utilization monitor" type = bool } diff --git a/common/common.tf b/common/common.tf index 6d90a63..878a37c 100644 --- a/common/common.tf +++ b/common/common.tf @@ -286,9 +286,6 @@ END query_alert_base_message = < Date: Mon, 21 Oct 2024 12:50:14 -0400 Subject: [PATCH 09/10] ALB latency default --- aws/alb/variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws/alb/variables.tf b/aws/alb/variables.tf index dff2e8d..e9e4912 100644 --- a/aws/alb/variables.tf +++ b/aws/alb/variables.tf @@ -113,7 +113,7 @@ variable "latency_no_data_window" { } variable "latency_threshold_critical" { - default = null + default = 3 description = "Critical threshold (seconds)" type = number } From 370d8c5ae5176d45d71c85026e7e6dc215b93f6f Mon Sep 17 00:00:00 2001 From: Kevin Date: Mon, 21 Oct 2024 15:08:07 -0400 Subject: [PATCH 10/10] errant space --- aws/alb/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws/alb/main.tf b/aws/alb/main.tf index 2681e66..e5449ca 100644 --- a/aws/alb/main.tf +++ b/aws/alb/main.tf @@ -72,7 +72,7 @@ END resource "datadog_monitor" "latency" { count = var.latency_enabled ? 1 : 0 - name = join("", [local.title_prefix, "ALB latency - {{loadbalancer.name}} {{value}}s ", local.title_suffix]) + name = join("", [local.title_prefix, "ALB latency - {{loadbalancer.name}} {{value}}s", local.title_suffix]) include_tags = false message = var.latency_use_message ? local.query_alert_base_message : "" tags = concat(local.common_tags, var.base_tags, var.additional_tags)