Skip to content

Commit c5838cd

Browse files
authored
[MongoDB]: Add alerting rule templates (#15866)
1 parent a6de7f8 commit c5838cd

9 files changed

+197
-3
lines changed

packages/mongodb/changelog.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
- version: "1.23.0"
2+
changes:
3+
- description: Add alerting rule templates.
4+
type: enhancement
5+
link: https://github.com/elastic/integrations/pull/15866
16
- version: "1.22.0"
27
changes:
38
- description: Allow @custom pipeline access to event.original without setting preserve_original_event.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"id": "mongodb-cache-usage-high",
3+
"type": "alerting_rule_template",
4+
"attributes": {
5+
"name": "[MongoDB] WiredTiger cache pressure",
6+
"tags": [
7+
"MongoDB"
8+
],
9+
"ruleTypeId": ".es-query",
10+
"schedule": {
11+
"interval": "1m"
12+
},
13+
"params": {
14+
"searchType": "esqlQuery",
15+
"timeWindowSize": 5,
16+
"timeWindowUnit": "m",
17+
"esqlQuery": {
18+
"esql": "// Alert when WiredTiger cache utilization exceeds 85% over the configured time window.\n// Aggregates per instance (service.address) using averaged cache stats.\nFROM metrics-mongodb.status-*\n| STATS cache_used=AVG(mongodb.status.wired_tiger.cache.used.bytes),\n cache_max=AVG(mongodb.status.wired_tiger.cache.maximum.bytes) BY service.address\n| WHERE cache_max > 0\n| EVAL cache_usage_pct = (cache_used / cache_max) * 100\n| WHERE cache_usage_pct > 85"
19+
},
20+
"groupBy": "row",
21+
"termSize": 5,
22+
"timeField": "@timestamp"
23+
},
24+
"alertDelay": {
25+
"active": 1
26+
}
27+
},
28+
"managed": true,
29+
"coreMigrationVersion": "8.8.0",
30+
"typeMigrationVersion": "10.1.0"
31+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"id": "mongodb-connection-usage-high",
3+
"type": "alerting_rule_template",
4+
"attributes": {
5+
"name": "[MongoDB Availability] High connection usage",
6+
"tags": [
7+
"MongoDB"
8+
],
9+
"ruleTypeId": ".es-query",
10+
"schedule": {
11+
"interval": "1m"
12+
},
13+
"params": {
14+
"searchType": "esqlQuery",
15+
"timeWindowSize": 5,
16+
"timeWindowUnit": "m",
17+
"esqlQuery": {
18+
"esql": "// Alert when current connections exceed 80% of total available capacity.\n// Aggregates average current/available connections per service.address.\nFROM metrics-mongodb.status-*\n| STATS current_conn=AVG(mongodb.status.connections.current),\n available_conn=AVG(mongodb.status.connections.available) BY service.address\n| EVAL total_conn = current_conn + available_conn\n| WHERE total_conn > 0\n| EVAL connection_usage_pct = (current_conn / total_conn) * 100\n| WHERE connection_usage_pct > 80"
19+
},
20+
"groupBy": "row",
21+
"termSize": 5,
22+
"timeField": "@timestamp"
23+
},
24+
"alertDelay": {
25+
"active": 1
26+
}
27+
},
28+
"managed": true,
29+
"coreMigrationVersion": "8.8.0",
30+
"typeMigrationVersion": "10.1.0"
31+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"id": "mongodb-oplog-headroom-critical",
3+
"type": "alerting_rule_template",
4+
"attributes": {
5+
"name": "[MongoDB] Oplog headroom critically low",
6+
"tags": [
7+
"MongoDB"
8+
],
9+
"ruleTypeId": ".es-query",
10+
"schedule": {
11+
"interval": "1m"
12+
},
13+
"params": {
14+
"searchType": "esqlQuery",
15+
"timeWindowSize": 5,
16+
"timeWindowUnit": "m",
17+
"esqlQuery": {
18+
"esql": "// Alert when oplog headroom drops below 15 minutes, risking replication stalls.\n// Guards against negative values and groups by replica set name.\nFROM metrics-mongodb.replstatus-*\n| STATS oplog_headroom_min=MIN(mongodb.replstatus.headroom.min) BY mongodb.replstatus.set_name\n| WHERE oplog_headroom_min IS NOT NULL\n| EVAL oplog_headroom_min = CASE(oplog_headroom_min >= 0, oplog_headroom_min, 0)\n| EVAL oplog_headroom_minutes = TO_DOUBLE(oplog_headroom_min) / 60.0\n| WHERE oplog_headroom_minutes < 15"
19+
},
20+
"groupBy": "row",
21+
"termSize": 5,
22+
"timeField": "@timestamp"
23+
},
24+
"alertDelay": {
25+
"active": 1
26+
}
27+
},
28+
"managed": true,
29+
"coreMigrationVersion": "8.8.0",
30+
"typeMigrationVersion": "10.1.0"
31+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"id": "mongodb-replica-member-down",
3+
"type": "alerting_rule_template",
4+
"attributes": {
5+
"name": "[MongoDB] Replica member down",
6+
"tags": [
7+
"MongoDB"
8+
],
9+
"ruleTypeId": ".es-query",
10+
"schedule": {
11+
"interval": "1m"
12+
},
13+
"params": {
14+
"searchType": "esqlQuery",
15+
"timeWindowSize": 5,
16+
"timeWindowUnit": "m",
17+
"esqlQuery": {
18+
"esql": "// Alert when replica sets report members in the down state within the configured time window.\n// Groups by replica set name to isolate environments.\nFROM metrics-mongodb.replstatus-*\n| STATS members_down=MAX(mongodb.replstatus.members.down.count) BY mongodb.replstatus.set_name\n| WHERE members_down > 0"
19+
},
20+
"groupBy": "row",
21+
"termSize": 5,
22+
"timeField": "@timestamp"
23+
},
24+
"alertDelay": {
25+
"active": 1
26+
}
27+
},
28+
"managed": true,
29+
"coreMigrationVersion": "8.8.0",
30+
"typeMigrationVersion": "10.1.0"
31+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"id": "mongodb-replication-lag-high",
3+
"type": "alerting_rule_template",
4+
"attributes": {
5+
"name": "[MongoDB] High replication lag",
6+
"tags": [
7+
"MongoDB"
8+
],
9+
"ruleTypeId": ".es-query",
10+
"schedule": {
11+
"interval": "1m"
12+
},
13+
"params": {
14+
"searchType": "esqlQuery",
15+
"timeWindowSize": 5,
16+
"timeWindowUnit": "m",
17+
"esqlQuery": {
18+
"esql": "// Alert when maximum replication lag exceeds 10 seconds for any replica set.\n// Aggregates per replica set name.\nFROM metrics-mongodb.replstatus-*\n| STATS replication_lag=MAX(mongodb.replstatus.lag.max) BY mongodb.replstatus.set_name\n| WHERE replication_lag > 10"
19+
},
20+
"groupBy": "row",
21+
"termSize": 5,
22+
"timeField": "@timestamp"
23+
},
24+
"alertDelay": {
25+
"active": 1
26+
}
27+
},
28+
"managed": true,
29+
"coreMigrationVersion": "8.8.0",
30+
"typeMigrationVersion": "10.1.0"
31+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"id": "mongodb-unhealthy-replica-members",
3+
"type": "alerting_rule_template",
4+
"attributes": {
5+
"name": "[MongoDB] Unhealthy replica members",
6+
"tags": [
7+
"MongoDB"
8+
],
9+
"ruleTypeId": ".es-query",
10+
"schedule": {
11+
"interval": "1m"
12+
},
13+
"params": {
14+
"searchType": "esqlQuery",
15+
"timeWindowSize": 5,
16+
"timeWindowUnit": "m",
17+
"esqlQuery": {
18+
"esql": "// Alert when replica sets report unhealthy members during the window.\n// Groups by replica set name for clarity.\nFROM metrics-mongodb.replstatus-*\n| STATS unhealthy_members=MAX(mongodb.replstatus.members.unhealthy.count) BY mongodb.replstatus.set_name\n| WHERE unhealthy_members > 0"
19+
},
20+
"groupBy": "row",
21+
"termSize": 5,
22+
"timeField": "@timestamp"
23+
},
24+
"alertDelay": {
25+
"active": 1
26+
}
27+
},
28+
"managed": true,
29+
"coreMigrationVersion": "8.8.0",
30+
"typeMigrationVersion": "10.1.0"
31+
}

packages/mongodb/manifest.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: mongodb
22
title: MongoDB
3-
version: "1.22.0"
3+
version: "1.23.0"
44
description: Collect logs and metrics from MongoDB instances with Elastic Agent.
55
type: integration
66
categories:
@@ -11,10 +11,10 @@ icons:
1111
title: logo mongodb
1212
size: 32x32
1313
type: image/svg+xml
14-
format_version: "3.0.2"
14+
format_version: "3.4.0"
1515
conditions:
1616
kibana:
17-
version: "^8.13.0 || ^9.0.0"
17+
version: "^8.19.0 || ^9.2.1"
1818
elastic:
1919
subscription: basic
2020
screenshots:

packages/mongodb/validation.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
errors:
2+
exclude_checks:
3+
- JSE00001

0 commit comments

Comments
 (0)