Skip to content

Commit 6f58205

Browse files
authored
Merge pull request #2143 from kube-logging/fix/wrong-metric-names
fix: syslogng metric names in prom rules
2 parents 2fa5997 + 6320f55 commit 6f58205

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

pkg/resources/syslogng/prometheusrules.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ func (r *Reconciler) prometheusRules() (runtime.Object, reconciler.DesiredState,
5353
},
5454
{
5555
Alert: "SyslogNGQueueLength",
56-
Expr: intstr.FromString(fmt.Sprintf("rate(syslog_ng_status_buffer_queue_length{%s}[5m]) > 0.3", nsJobLabel)),
56+
Expr: intstr.FromString(fmt.Sprintf("max(syslogng_memory_queue_events{%s}) / max(syslogng_memory_queue_capacity{%s}) > 0.3", nsJobLabel, nsJobLabel)),
5757
For: prometheus_operator.Duration("1m"),
5858
Labels: map[string]string{
5959
"rulegroup": ruleGroupName,
@@ -62,12 +62,12 @@ func (r *Reconciler) prometheusRules() (runtime.Object, reconciler.DesiredState,
6262
},
6363
Annotations: map[string]string{
6464
"summary": `syslog-ng node are failing`,
65-
"description": `In the last 5 minutes, syslog-ng queues increased 30%. Current value is "{{ $value }}".`,
65+
"description": `Syslog-ng queue usage is above 30%. Current value is "{{ $value }}".`,
6666
},
6767
},
6868
{
6969
Alert: "SyslogNGQueueLength",
70-
Expr: intstr.FromString(fmt.Sprintf("rate(syslog_ng_status_buffer_queue_length{%s}[5m]) > 0.5", nsJobLabel)),
70+
Expr: intstr.FromString(fmt.Sprintf("max(syslogng_memory_queue_events{%s}) / max(syslogng_memory_queue_capacity{%s}) > 0.5", nsJobLabel, nsJobLabel)),
7171
For: prometheus_operator.Duration("1m"),
7272
Labels: map[string]string{
7373
"rulegroup": ruleGroupName,
@@ -76,12 +76,12 @@ func (r *Reconciler) prometheusRules() (runtime.Object, reconciler.DesiredState,
7676
},
7777
Annotations: map[string]string{
7878
"summary": `Syslog-NG nodes buffer queue length are critical`,
79-
"description": `In the last 5 minutes, Syslog-NG queues increased 50%. Current value is "{{ $value }}".`,
79+
"description": `Syslog-ng queue usage is above 50%. Current value is "{{ $value }}".`,
8080
},
8181
},
8282
{
8383
Alert: "SyslogNGRecordsCountsHigh",
84-
Expr: intstr.FromString(fmt.Sprintf("sum(rate(syslog_ng_output_status_emit_records{%[1]s}[5m])) by (job,pod,namespace) > (3 * sum(rate(syslog_ng_output_status_emit_records{%[1]s}[15m])) by (job,pod,namespace))", nsJobLabel)),
84+
Expr: intstr.FromString(fmt.Sprintf("sum(rate(syslogng_output_events_total{%[1]s}[5m])) by (job,pod,namespace) > (3 * sum(rate(syslogng_output_events_total{%[1]s}[15m])) by (job,pod,namespace))", nsJobLabel)),
8585
For: prometheus_operator.Duration("1m"),
8686
Labels: map[string]string{
8787
"rulegroup": ruleGroupName,
@@ -95,7 +95,7 @@ func (r *Reconciler) prometheusRules() (runtime.Object, reconciler.DesiredState,
9595
},
9696
{
9797
Alert: "SyslogNGRetry",
98-
Expr: intstr.FromString(fmt.Sprintf("increase(syslog_ng_status_retry_count{%s}[10m]) > 0", nsJobLabel)),
98+
Expr: intstr.FromString(fmt.Sprintf("max(syslogng_output_event_retries_total{%s}) > 0", nsJobLabel)),
9999
For: prometheus_operator.Duration("20m"),
100100
Labels: map[string]string{
101101
"rulegroup": ruleGroupName,
@@ -109,7 +109,7 @@ func (r *Reconciler) prometheusRules() (runtime.Object, reconciler.DesiredState,
109109
},
110110
{
111111
Alert: "SyslogNGOutputError",
112-
Expr: intstr.FromString(fmt.Sprintf("increase(syslog_ng_output_status_num_errors{%s}[10m]) > 0", nsJobLabel)),
112+
Expr: intstr.FromString(fmt.Sprintf("increase(syslogng_output_events_total{%s,result=\"dropped\"}[10m]) > 0", nsJobLabel)),
113113
For: prometheus_operator.Duration("1s"),
114114
Labels: map[string]string{
115115
"rulegroup": ruleGroupName,
@@ -123,7 +123,7 @@ func (r *Reconciler) prometheusRules() (runtime.Object, reconciler.DesiredState,
123123
},
124124
{
125125
Alert: "SyslogNGPredictedBufferGrowth",
126-
Expr: intstr.FromString(fmt.Sprintf("predict_linear(syslog_ng_output_status_buffer_total_bytes{%[1]s}[10m], 600) > syslog_ng_output_status_buffer_total_bytes{%[1]s}", nsJobLabel)),
126+
Expr: intstr.FromString(fmt.Sprintf("predict_linear(syslogng_memory_queue_memory_usage_bytes{%[1]s}[10m], 600) > syslogng_memory_queue_memory_usage_bytes{%[1]s}", nsJobLabel)),
127127
For: prometheus_operator.Duration("10m"),
128128
Labels: map[string]string{
129129
"rulegroup": ruleGroupName,

0 commit comments

Comments
 (0)