From f6b54970f556da452eac526500c1c352c99be17b Mon Sep 17 00:00:00 2001
From: Richard Klose <richard.klose@wiit.cloud>
Date: Fri, 21 Feb 2025 15:29:06 +0100
Subject: [PATCH 1/2] feat: add support for _health_report

In elasticsearch 8.7 a new endpoint for cluster health has been added. See https://www.elastic.co/docs/api/doc/elasticsearch/v8/operation/operation-health-report

Signed-off-by: Richard Klose <richard.klose@wiit.cloud>
---
 README.md                        |  26 ++
 collector/health_report.go       | 472 +++++++++++++++++++++++++++++++
 collector/health_report_test.go  | 169 +++++++++++
 fixtures/healthreport/8.7.0.json | 111 ++++++++
 4 files changed, 778 insertions(+)
 create mode 100644 collector/health_report.go
 create mode 100644 collector/health_report_test.go
 create mode 100644 fixtures/healthreport/8.7.0.json

diff --git a/README.md b/README.md
index 3ae31ed8..9cc6f173 100644
--- a/README.md
+++ b/README.md
@@ -60,6 +60,7 @@ elasticsearch_exporter --help
 | es.ilm                  | 1.6.0                 | If true, query index lifecycle policies for indices in the cluster.
 | es.shards               | 1.0.3rc1              | If true, query stats for all indices in the cluster, including shard-level stats (implies `es.indices=true`).                                                                                                                                                                                                                                                                         | false |
 | collector.snapshots     | 1.0.4rc1              | If true, query stats for the cluster snapshots. (As of v1.7.0, this flag has replaced "es.snapshots").                                                                                                                                                                                                                                                                                | false |
+| collector.health-report | 1.9.0                 | If true, query the health report (requires elasticsearch 8.7.0 or later)                                                                                                                                                                                                                                                                                                              | false |
 | es.slm                  |                       | If true, query stats for SLM.                                                                                                                                                                                                                                                                                                                                                         | false |
 | es.data_stream          |                       | If true, query state for Data Steams.                                                                                                                                                                                                                                                                                                                                                 | false |
 | es.timeout              | 1.0.2                 | Timeout for trying to get stats from Elasticsearch. (ex: 20s)                                                                                                                                                                                                                                                                                                                         | 5s |
@@ -270,6 +271,31 @@ Further Information
 | elasticsearch_data_stream_stats_json_parse_failures                  | counter    | 0           | Number of parsing failures for Data Stream stats                                                    |
 | elasticsearch_data_stream_backing_indices_total                      | gauge      | 1           | Number of backing indices for Data Stream                                                           |
 | elasticsearch_data_stream_store_size_bytes                           | gauge      | 1           | Current size of data stream backing indices in bytes                                                |
+| elasticsearch_health_report_creating_primaries                       | gauge      | 1           | The number of creating primary shards                                                               |
+| elasticsearch_health_report_creating_replicas                        | gauge      | 1           | The number of creating replica shards                                                               |
+| elasticsearch_health_report_data_stream_lifecycle_status             | gauge      | 1           | Data stream lifecycle status                                                                        |
+| elasticsearch_health_report_disk_status                              | gauge      | 3           | disk status                                                                                         |
+| elasticsearch_health_report_ilm_policies                             | gauge      | 1           | The number of ILM Policies                                                                          |
+| elasticsearch_health_report_ilm_stagnating_indices                   | gauge      | 1           | The number of stagnating indices                                                                    |
+| elasticsearch_health_report_ilm_status                               | gauge      | 3           | ILM status                                                                                          |
+| elasticsearch_health_report_initializing_primaries                   | gauge      | 1           | The number of initializing primary shards                                                           |
+| elasticsearch_health_report_initializing_replicas                    | gauge      | 1           | The number of initializing replica shards                                                           |
+| elasticsearch_health_report_master_is_stable_status                  | gauge      | 3           | Master is stable status                                                                             |
+| elasticsearch_health_report_max_shards_in_cluster_data               | gauge      | 1           | The number of maximum shards in a cluster                                                           |
+| elasticsearch_health_report_max_shards_in_cluster_frozen             | gauge      | 1           | The number of maximum frozen shards in a cluster                                                    |
+| elasticsearch_health_report_repository_integrity_status              | gauge      | 3           | Repository integrity status                                                                         |
+| elasticsearch_health_report_restarting_primaries                     | gauge      | 1           | The number of restarting primary shards                                                             |
+| elasticsearch_health_report_restarting_replicas                      | gauge      | 1           | The number of restarting replica shards                                                             |
+| elasticsearch_health_report_shards_availabilty_status                | gauge      | 3           | Shards availabilty status                                                                           |
+| elasticsearch_health_report_shards_capacity_status                   | gauge      | 3           | Shards capacity status                                                                              |
+| elasticsearch_health_report_slm_policies                             | gauge      | 1           | The number of SLM policies                                                                          |
+| elasticsearch_health_report_slm_status                               | gauge      | 3           | SLM status                                                                                          |
+| elasticsearch_health_report_started_primaries                        | gauge      | 1           | The number of started primary shards                                                                |
+| elasticsearch_health_report_started_replicas                         | gauge      | 1           | The number of started replica shards                                                                |
+| elasticsearch_health_report_status                                   | gauge      | 3           | Overall cluster status                                                                              |
+| elasticsearch_health_report_total_repositories                       | gauge      | 1           | The number snapshot repositories                                                                    |
+| elasticsearch_health_report_unassigned_primaries                     | gauge      | 1           | The number of unassigned primary shards                                                             |
+| elasticsearch_health_report_unassigned_replicas                      | gauge      | 1           | The number of unassigned replica shards                                                             |
 
 ### Alerts & Recording Rules
 
diff --git a/collector/health_report.go b/collector/health_report.go
new file mode 100644
index 00000000..4933d98c
--- /dev/null
+++ b/collector/health_report.go
@@ -0,0 +1,472 @@
+// Copyright 2025 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package collector
+
+import (
+	"context"
+	"encoding/json"
+	"log/slog"
+	"net/http"
+	"net/url"
+
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+var (
+	statusColors              = []string{"green", "yellow", "red"}
+	defaultHealthReportLabels = []string{"cluster"}
+)
+
+var (
+	healthReportTotalRepositories = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "total_repositories"),
+		"The number of snapshot repositories",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportMaxShardsInClusterData = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "max_shards_in_cluster_data"),
+		"The number of maximum shards in a cluster",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportMaxShardsInClusterFrozen = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "max_shards_in_cluster_frozen"),
+		"The number of maximum frozen shards in a cluster",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportRestartingReplicas = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "restarting_replicas"),
+		"The number of restarting replica shards",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportCreatingPrimaries = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "creating_primaries"),
+		"The number of creating primary shards",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportInitializingReplicas = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "initializing_replicas"),
+		"The number of initializing replica shards",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportUnassignedReplicas = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "unassigned_replicas"),
+		"The number of unassigned replica shards",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportStartedPrimaries = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "started_primaries"),
+		"The number of started primary shards",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportRestartingPrimaries = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "restarting_primaries"),
+		"The number of restarting primary shards",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportInitializingPrimaries = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "initializing_primaries"),
+		"The number of initializing primary shards",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportCreatingReplicas = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "creating_replicas"),
+		"The number of creating replica shards",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportStartedReplicas = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "started_replicas"),
+		"The number of started replica shards",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportUnassignedPrimaries = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "unassigned_primaries"),
+		"The number of unassigned primary shards",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportSlmPolicies = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "slm_policies"),
+		"The number of SLM policies",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportIlmPolicies = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "ilm_policies"),
+		"The number of ILM Policies",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportIlmStagnatingIndices = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "ilm_stagnating_indices"),
+		"The number of stagnating indices",
+		defaultHealthReportLabels, nil,
+	)
+	healthReportStatus = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "status"),
+		"Overall cluster status",
+		[]string{"cluster", "color"}, nil,
+	)
+	healthReportMasterIsStableStatus = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "master_is_stable_status"),
+		"Master is stable status",
+		[]string{"cluster", "color"}, nil,
+	)
+	healthReportRepositoryIntegrityStatus = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "repository_integrity_status"),
+		"Repository integrity status",
+		[]string{"cluster", "color"}, nil,
+	)
+	healthReportDiskStatus = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "disk_status"),
+		"Disk status",
+		[]string{"cluster", "color"}, nil,
+	)
+	healthReportShardsCapacityStatus = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "shards_capacity_status"),
+		"Shards capacity status",
+		[]string{"cluster", "color"}, nil,
+	)
+	healthReportShardsAvailabiltystatus = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "shards_availabilty_status"),
+		"Shards availabilty status",
+		[]string{"cluster", "color"}, nil,
+	)
+	healthReportDataStreamLifecycleStatus = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "data_stream_lifecycle_status"),
+		"Data stream lifecycle status",
+		[]string{"cluster", "color"}, nil,
+	)
+	healthReportSlmStatus = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "slm_status"),
+		"SLM status",
+		[]string{"cluster", "color"}, nil,
+	)
+	healthReportIlmStatus = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "health_report", "ilm_status"),
+		"ILM status",
+		[]string{"cluster", "color"}, nil,
+	)
+)
+
+func init() {
+	registerCollector("health-report", defaultDisabled, NewHealthReport)
+}
+
+type HealthReport struct {
+	logger *slog.Logger
+	client *http.Client
+	url    *url.URL
+}
+
+func NewHealthReport(logger *slog.Logger, url *url.URL, client *http.Client) (Collector, error) {
+	return &HealthReport{
+		logger: logger,
+		client: client,
+		url:    url,
+	}, nil
+}
+
+type HealthReportResponse struct {
+	ClusterName string                 `json:"cluster_name"`
+	Status      string                 `json:"status"`
+	Indicators  HealthReportIndicators `json:"indicators"`
+}
+
+type HealthReportIndicators struct {
+	MasterIsStable      HealthReportMasterIsStable      `json:"master_is_stable"`
+	RepositoryIntegrity HealthReportRepositoryIntegrity `json:"repository_integrity"`
+	Disk                HealthReportDisk                `json:"disk"`
+	ShardsCapacity      HealthReportShardsCapacity      `json:"shards_capacity"`
+	ShardsAvailability  HealthReportShardsAvailability  `json:"shards_availability"`
+	DataStreamLifecycle HealthReportDataStreamLifecycle `json:"data_stream_lifecycle"`
+	Slm                 HealthReportSlm                 `json:"slm"`
+	Ilm                 HealthReportIlm                 `json:"ilm"`
+}
+
+type HealthReportMasterIsStable struct {
+	Status  string                            `json:"status"`
+	Symptom string                            `json:"symptom"`
+	Details HealthReportMasterIsStableDetails `json:"details"`
+}
+
+type HealthReportMasterIsStableDetails struct {
+	CurrentMaster HealthReportMasterIsStableDetailsNode   `json:"current_master"`
+	RecentMasters []HealthReportMasterIsStableDetailsNode `json:"recent_masters"`
+}
+
+type HealthReportMasterIsStableDetailsNode struct {
+	NodeID string `json:"node_id"`
+	Name   string `json:"name"`
+}
+
+type HealthReportRepositoryIntegrity struct {
+	Status  string                                  `json:"status"`
+	Symptom string                                  `json:"symptom"`
+	Details HealthReportRepositoriyIntegrityDetails `json:"details"`
+}
+
+type HealthReportRepositoriyIntegrityDetails struct {
+	TotalRepositories int `json:"total_repositories"`
+}
+
+type HealthReportDisk struct {
+	Status  string                  `json:"status"`
+	Symptom string                  `json:"symptom"`
+	Details HealthReportDiskDetails `json:"details"`
+}
+
+type HealthReportDiskDetails struct {
+	IndicesWithReadonlyBlock     int `json:"indices_with_readonly_block"`
+	NodesWithEnoughDiskSpace     int `json:"nodes_with_enough_disk_space"`
+	NodesWithUnknownDiskStatus   int `json:"nodes_with_unknown_disk_status"`
+	NodesOverHighWatermark       int `json:"nodes_over_high_watermark"`
+	NodesOverFloodStageWatermark int `json:"nodes_over_flood_stage_watermark"`
+}
+
+type HealthReportShardsCapacity struct {
+	Status  string                            `json:"status"`
+	Symptom string                            `json:"symptom"`
+	Details HealthReportShardsCapacityDetails `json:"details"`
+}
+
+type HealthReportShardsCapacityDetails struct {
+	Data   HealthReportShardsCapacityDetailsMaxShards `json:"data"`
+	Frozen HealthReportShardsCapacityDetailsMaxShards `json:"frozen"`
+}
+
+type HealthReportShardsCapacityDetailsMaxShards struct {
+	MaxShardsInCluster int `json:"max_shards_in_cluster"`
+}
+
+type HealthReportShardsAvailability struct {
+	Status  string                                `json:"status"`
+	Symptom string                                `json:"symptom"`
+	Details HealthReportShardsAvailabilityDetails `json:"details"`
+}
+
+type HealthReportShardsAvailabilityDetails struct {
+	RestartingReplicas    int `json:"restarting_replicas"`
+	CreatingPrimaries     int `json:"creating_primaries"`
+	InitializingReplicas  int `json:"initializing_replicas"`
+	UnassignedReplicas    int `json:"unassigned_replicas"`
+	StartedPrimaries      int `json:"started_primaries"`
+	RestartingPrimaries   int `json:"restarting_primaries"`
+	InitializingPrimaries int `json:"initializing_primaries"`
+	CreatingReplicas      int `json:"creating_replicas"`
+	StartedReplicas       int `json:"started_replicas"`
+	UnassignedPrimaries   int `json:"unassigned_primaries"`
+}
+
+type HealthReportDataStreamLifecycle struct {
+	Status  string `json:"status"`
+	Symptom string `json:"symptom"`
+}
+
+type HealthReportSlm struct {
+	Status  string                 `json:"status"`
+	Symptom string                 `json:"symptom"`
+	Details HealthReportSlmDetails `json:"details"`
+}
+
+type HealthReportSlmDetails struct {
+	SlmStatus string `json:"slm_status"`
+	Policies  int    `json:"policies"`
+}
+
+type HealthReportIlm struct {
+	Status  string                 `json:"status"`
+	Symptom string                 `json:"symptom"`
+	Details HealthReportIlmDetails `json:"details"`
+}
+
+type HealthReportIlmDetails struct {
+	Policies          int    `json:"policies"`
+	StagnatingIndices int    `json:"stagnating_indices"`
+	IlmStatus         string `json:"ilm_status"`
+}
+
+func statusValue(value string, color string) float64 {
+	if value == color {
+		return 1
+	}
+	return 0
+}
+
+func (c *HealthReport) Update(ctx context.Context, ch chan<- prometheus.Metric) error {
+	u := c.url.ResolveReference(&url.URL{Path: "/_health_report"})
+	var healthReportResponse HealthReportResponse
+
+	resp, err := getURL(ctx, c.client, c.logger, u.String())
+	if err != nil {
+		return err
+	}
+
+	err = json.Unmarshal(resp, &healthReportResponse)
+	if err != nil {
+		return err
+	}
+
+	ch <- prometheus.MustNewConstMetric(
+		healthReportTotalRepositories,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.RepositoryIntegrity.Details.TotalRepositories),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportMaxShardsInClusterData,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.ShardsCapacity.Details.Data.MaxShardsInCluster),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportMaxShardsInClusterFrozen,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.ShardsCapacity.Details.Frozen.MaxShardsInCluster),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportRestartingReplicas,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.ShardsAvailability.Details.RestartingReplicas),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportCreatingPrimaries,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.ShardsAvailability.Details.CreatingPrimaries),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportInitializingReplicas,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.ShardsAvailability.Details.InitializingReplicas),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportUnassignedReplicas,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.ShardsAvailability.Details.UnassignedReplicas),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportStartedPrimaries,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.ShardsAvailability.Details.StartedPrimaries),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportRestartingPrimaries,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.ShardsAvailability.Details.RestartingPrimaries),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportInitializingPrimaries,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.ShardsAvailability.Details.InitializingPrimaries),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportCreatingReplicas,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.ShardsAvailability.Details.CreatingReplicas),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportStartedReplicas,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.ShardsAvailability.Details.StartedReplicas),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportUnassignedPrimaries,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.ShardsAvailability.Details.UnassignedPrimaries),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportSlmPolicies,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.Slm.Details.Policies),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportIlmPolicies,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.Ilm.Details.Policies),
+		healthReportResponse.ClusterName,
+	)
+	ch <- prometheus.MustNewConstMetric(
+		healthReportIlmStagnatingIndices,
+		prometheus.GaugeValue,
+		float64(healthReportResponse.Indicators.Ilm.Details.StagnatingIndices),
+		healthReportResponse.ClusterName,
+	)
+
+	for _, color := range statusColors {
+		ch <- prometheus.MustNewConstMetric(
+			healthReportStatus,
+			prometheus.GaugeValue,
+			statusValue(healthReportResponse.Status, color),
+			healthReportResponse.ClusterName, color,
+		)
+		ch <- prometheus.MustNewConstMetric(
+			healthReportMasterIsStableStatus,
+			prometheus.GaugeValue,
+			statusValue(healthReportResponse.Indicators.MasterIsStable.Status, color),
+			healthReportResponse.ClusterName, color,
+		)
+		ch <- prometheus.MustNewConstMetric(
+			healthReportRepositoryIntegrityStatus,
+			prometheus.GaugeValue,
+			statusValue(healthReportResponse.Indicators.RepositoryIntegrity.Status, color),
+			healthReportResponse.ClusterName, color,
+		)
+		ch <- prometheus.MustNewConstMetric(
+			healthReportDiskStatus,
+			prometheus.GaugeValue,
+			statusValue(healthReportResponse.Indicators.Disk.Status, color),
+			healthReportResponse.ClusterName, color,
+		)
+		ch <- prometheus.MustNewConstMetric(
+			healthReportShardsCapacityStatus,
+			prometheus.GaugeValue,
+			statusValue(healthReportResponse.Indicators.ShardsCapacity.Status, color),
+			healthReportResponse.ClusterName, color,
+		)
+		ch <- prometheus.MustNewConstMetric(
+			healthReportShardsAvailabiltystatus,
+			prometheus.GaugeValue,
+			statusValue(healthReportResponse.Indicators.ShardsAvailability.Status, color),
+			healthReportResponse.ClusterName, color,
+		)
+		ch <- prometheus.MustNewConstMetric(
+			healthReportDataStreamLifecycleStatus,
+			prometheus.GaugeValue,
+			statusValue(healthReportResponse.Indicators.DataStreamLifecycle.Status, color),
+			healthReportResponse.ClusterName, color,
+		)
+		ch <- prometheus.MustNewConstMetric(
+			healthReportSlmStatus,
+			prometheus.GaugeValue,
+			statusValue(healthReportResponse.Indicators.Slm.Status, color),
+			healthReportResponse.ClusterName, color,
+		)
+		ch <- prometheus.MustNewConstMetric(
+			healthReportIlmStatus,
+			prometheus.GaugeValue,
+			statusValue(healthReportResponse.Indicators.Ilm.Status, color),
+			healthReportResponse.ClusterName, color,
+		)
+	}
+
+	return nil
+}
diff --git a/collector/health_report_test.go b/collector/health_report_test.go
new file mode 100644
index 00000000..012afbfd
--- /dev/null
+++ b/collector/health_report_test.go
@@ -0,0 +1,169 @@
+// Copyright 2025 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package collector
+
+import (
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"os"
+	"strings"
+	"testing"
+
+	"github.com/prometheus/client_golang/prometheus/testutil"
+	"github.com/prometheus/common/promslog"
+)
+
+func TestHealthReport(t *testing.T) {
+	// Testcases created using:
+	//  docker run -d -p 9200:9200 elasticsearch:VERSION
+	//  curl -XPUT http://localhost:9200/twitter
+	//  curl http://localhost:9200/_health_report
+
+	tests := []struct {
+		name string
+		file string
+		want string
+	}{
+		{
+			name: "8.7.0",
+			file: "../fixtures/healthreport/8.7.0.json",
+			want: `
+				# HELP elasticsearch_health_report_creating_primaries The number of creating primary shards
+				# TYPE elasticsearch_health_report_creating_primaries gauge
+				elasticsearch_health_report_creating_primaries{cluster="docker-cluster"} 0
+				# HELP elasticsearch_health_report_creating_replicas The number of creating replica shards
+				# TYPE elasticsearch_health_report_creating_replicas gauge
+				elasticsearch_health_report_creating_replicas{cluster="docker-cluster"} 0
+				# HELP elasticsearch_health_report_data_stream_lifecycle_status Data stream lifecycle status
+				# TYPE elasticsearch_health_report_data_stream_lifecycle_status gauge
+				elasticsearch_health_report_data_stream_lifecycle_status{cluster="docker-cluster",color="green"} 1
+				elasticsearch_health_report_data_stream_lifecycle_status{cluster="docker-cluster",color="red"} 0
+				elasticsearch_health_report_data_stream_lifecycle_status{cluster="docker-cluster",color="yellow"} 0
+				# HELP elasticsearch_health_report_disk_status Disk status
+				# TYPE elasticsearch_health_report_disk_status gauge
+				elasticsearch_health_report_disk_status{cluster="docker-cluster",color="green"} 1
+				elasticsearch_health_report_disk_status{cluster="docker-cluster",color="red"} 0
+				elasticsearch_health_report_disk_status{cluster="docker-cluster",color="yellow"} 0
+				# HELP elasticsearch_health_report_ilm_policies The number of ILM Policies
+				# TYPE elasticsearch_health_report_ilm_policies gauge
+				elasticsearch_health_report_ilm_policies{cluster="docker-cluster"} 17
+				# HELP elasticsearch_health_report_ilm_stagnating_indices The number of stagnating indices
+				# TYPE elasticsearch_health_report_ilm_stagnating_indices gauge
+				elasticsearch_health_report_ilm_stagnating_indices{cluster="docker-cluster"} 0
+				# HELP elasticsearch_health_report_ilm_status ILM status
+				# TYPE elasticsearch_health_report_ilm_status gauge
+				elasticsearch_health_report_ilm_status{cluster="docker-cluster",color="green"} 1
+				elasticsearch_health_report_ilm_status{cluster="docker-cluster",color="red"} 0
+				elasticsearch_health_report_ilm_status{cluster="docker-cluster",color="yellow"} 0
+				# HELP elasticsearch_health_report_initializing_primaries The number of initializing primary shards
+				# TYPE elasticsearch_health_report_initializing_primaries gauge
+				elasticsearch_health_report_initializing_primaries{cluster="docker-cluster"} 0
+				# HELP elasticsearch_health_report_initializing_replicas The number of initializing replica shards
+				# TYPE elasticsearch_health_report_initializing_replicas gauge
+				elasticsearch_health_report_initializing_replicas{cluster="docker-cluster"} 0
+				# HELP elasticsearch_health_report_master_is_stable_status Master is stable status
+				# TYPE elasticsearch_health_report_master_is_stable_status gauge
+				elasticsearch_health_report_master_is_stable_status{cluster="docker-cluster",color="green"} 1
+				elasticsearch_health_report_master_is_stable_status{cluster="docker-cluster",color="red"} 0
+				elasticsearch_health_report_master_is_stable_status{cluster="docker-cluster",color="yellow"} 0
+				# HELP elasticsearch_health_report_max_shards_in_cluster_data The number of maximum shards in a cluster
+				# TYPE elasticsearch_health_report_max_shards_in_cluster_data gauge
+				elasticsearch_health_report_max_shards_in_cluster_data{cluster="docker-cluster"} 13500
+				# HELP elasticsearch_health_report_max_shards_in_cluster_frozen The number of maximum frozen shards in a cluster
+				# TYPE elasticsearch_health_report_max_shards_in_cluster_frozen gauge
+				elasticsearch_health_report_max_shards_in_cluster_frozen{cluster="docker-cluster"} 9000
+				# HELP elasticsearch_health_report_repository_integrity_status Repository integrity status
+				# TYPE elasticsearch_health_report_repository_integrity_status gauge
+				elasticsearch_health_report_repository_integrity_status{cluster="docker-cluster",color="green"} 1
+				elasticsearch_health_report_repository_integrity_status{cluster="docker-cluster",color="red"} 0
+				elasticsearch_health_report_repository_integrity_status{cluster="docker-cluster",color="yellow"} 0
+				# HELP elasticsearch_health_report_restarting_primaries The number of restarting primary shards
+				# TYPE elasticsearch_health_report_restarting_primaries gauge
+				elasticsearch_health_report_restarting_primaries{cluster="docker-cluster"} 0
+				# HELP elasticsearch_health_report_restarting_replicas The number of restarting replica shards
+				# TYPE elasticsearch_health_report_restarting_replicas gauge
+				elasticsearch_health_report_restarting_replicas{cluster="docker-cluster"} 0
+				# HELP elasticsearch_health_report_shards_availabilty_status Shards availabilty status
+				# TYPE elasticsearch_health_report_shards_availabilty_status gauge
+				elasticsearch_health_report_shards_availabilty_status{cluster="docker-cluster",color="green"} 1
+				elasticsearch_health_report_shards_availabilty_status{cluster="docker-cluster",color="red"} 0
+				elasticsearch_health_report_shards_availabilty_status{cluster="docker-cluster",color="yellow"} 0
+				# HELP elasticsearch_health_report_shards_capacity_status Shards capacity status
+				# TYPE elasticsearch_health_report_shards_capacity_status gauge
+				elasticsearch_health_report_shards_capacity_status{cluster="docker-cluster",color="green"} 1
+				elasticsearch_health_report_shards_capacity_status{cluster="docker-cluster",color="red"} 0
+				elasticsearch_health_report_shards_capacity_status{cluster="docker-cluster",color="yellow"} 0
+				# HELP elasticsearch_health_report_slm_policies The number of SLM policies
+				# TYPE elasticsearch_health_report_slm_policies gauge
+				elasticsearch_health_report_slm_policies{cluster="docker-cluster"} 0
+				# HELP elasticsearch_health_report_slm_status SLM status
+				# TYPE elasticsearch_health_report_slm_status gauge
+				elasticsearch_health_report_slm_status{cluster="docker-cluster",color="green"} 1
+				elasticsearch_health_report_slm_status{cluster="docker-cluster",color="red"} 0
+				elasticsearch_health_report_slm_status{cluster="docker-cluster",color="yellow"} 0
+				# HELP elasticsearch_health_report_started_primaries The number of started primary shards
+				# TYPE elasticsearch_health_report_started_primaries gauge
+				elasticsearch_health_report_started_primaries{cluster="docker-cluster"} 11703
+				# HELP elasticsearch_health_report_started_replicas The number of started replica shards
+				# TYPE elasticsearch_health_report_started_replicas gauge
+				elasticsearch_health_report_started_replicas{cluster="docker-cluster"} 1701
+				# HELP elasticsearch_health_report_status Overall cluster status
+				# TYPE elasticsearch_health_report_status gauge
+				elasticsearch_health_report_status{cluster="docker-cluster",color="green"} 1
+				elasticsearch_health_report_status{cluster="docker-cluster",color="red"} 0
+				elasticsearch_health_report_status{cluster="docker-cluster",color="yellow"} 0
+				# HELP elasticsearch_health_report_total_repositories The number of snapshot repositories
+				# TYPE elasticsearch_health_report_total_repositories gauge
+				elasticsearch_health_report_total_repositories{cluster="docker-cluster"} 1
+				# HELP elasticsearch_health_report_unassigned_primaries The number of unassigned primary shards
+				# TYPE elasticsearch_health_report_unassigned_primaries gauge
+				elasticsearch_health_report_unassigned_primaries{cluster="docker-cluster"} 0
+				# HELP elasticsearch_health_report_unassigned_replicas The number of unassigned replica shards
+				# TYPE elasticsearch_health_report_unassigned_replicas gauge
+				elasticsearch_health_report_unassigned_replicas{cluster="docker-cluster"} 0
+      `,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			f, err := os.Open(tt.file)
+			if err != nil {
+				t.Fatal(err)
+			}
+			defer f.Close()
+
+			ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+				io.Copy(w, f)
+			}))
+			defer ts.Close()
+
+			u, err := url.Parse(ts.URL)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			c, err := NewHealthReport(promslog.NewNopLogger(), u, http.DefaultClient)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if err := testutil.CollectAndCompare(wrapCollector{c}, strings.NewReader(tt.want)); err != nil {
+				t.Fatal(err)
+			}
+		})
+	}
+}
diff --git a/fixtures/healthreport/8.7.0.json b/fixtures/healthreport/8.7.0.json
new file mode 100644
index 00000000..337142d5
--- /dev/null
+++ b/fixtures/healthreport/8.7.0.json
@@ -0,0 +1,111 @@
+{
+  "status": "green",
+  "cluster_name": "docker-cluster",
+  "indicators": {
+    "master_is_stable": {
+      "status": "green",
+      "symptom": "The cluster has a stable master node",
+      "details": {
+        "current_master": {
+          "node_id": "X8BAj1mfQ3qgcSoAlG3HHw",
+          "name": "5da1610e99a7"
+        },
+        "recent_masters": [
+          {
+            "node_id": "X8BAj1mfQ3qgcSoAlG3HHw",
+            "name": "5da1610e99a7"
+          }
+        ]
+      }
+    },
+    "repository_integrity": {
+      "status": "green",
+      "symptom": "All repositories are healthy.",
+      "details": {
+        "total_repositories": 1
+      }
+    },
+    "shards_capacity": {
+      "status": "green",
+      "symptom": "The cluster has enough room to add new shards.",
+      "details": {
+        "data": {
+          "max_shards_in_cluster": 13500
+        },
+        "frozen": {
+          "max_shards_in_cluster": 9000
+        }
+      }
+    },
+    "shards_availability": {
+      "status": "green",
+      "symptom": "This cluster has all shards available.",
+      "details": {
+        "restarting_replicas": 0,
+        "creating_primaries": 0,
+        "initializing_replicas": 0,
+        "unassigned_replicas": 0,
+        "started_primaries": 11703,
+        "restarting_primaries": 0,
+        "initializing_primaries": 0,
+        "creating_replicas": 0,
+        "started_replicas": 1701,
+        "unassigned_primaries": 0
+      },
+      "impacts": [
+        {
+          "id": "elasticsearch:health:shards_availability:impact:replica_unassigned",
+          "severity": 2,
+          "description": "Searches might be slower than usual. Fewer redundant copies of the data exist on 1 index [twitter].",
+          "impact_areas": [
+            "search"
+          ]
+        }
+      ],
+      "diagnosis": [
+        {
+          "id": "elasticsearch:health:shards_availability:diagnosis:increase_tier_capacity_for_allocations:tier:data_content",
+          "cause": "Elasticsearch isn't allowed to allocate some shards from these indices to any of the nodes in the desired data tier because there are not enough nodes in the [data_content] tier to allocate each shard copy on a different node.",
+          "action": "Increase the number of nodes in this tier or decrease the number of replica shards in the affected indices.",
+          "help_url": "https://ela.st/tier-capacity",
+          "affected_resources": {
+            "indices": [
+              "twitter"
+            ]
+          }
+        }
+      ]
+    },
+    "disk": {
+      "status": "green",
+      "symptom": "The cluster has enough available disk space.",
+      "details": {
+        "indices_with_readonly_block": 0,
+        "nodes_with_enough_disk_space": 1,
+        "nodes_with_unknown_disk_status": 0,
+        "nodes_over_high_watermark": 0,
+        "nodes_over_flood_stage_watermark": 0
+      }
+    },
+    "data_stream_lifecycle": {
+      "status": "green",
+      "symptom": "No data stream lifecycle health data available yet. Health information will be reported after the first run."
+    },
+    "ilm": {
+      "status": "green",
+      "symptom": "Index Lifecycle Management is running",
+      "details": {
+        "policies": 17,
+        "ilm_status": "RUNNING"
+      }
+    },
+    "slm": {
+      "status": "green",
+      "symptom": "No Snapshot Lifecycle Management policies configured",
+      "details": {
+        "slm_status": "RUNNING",
+        "policies": 0
+      }
+    }
+  }
+}

From 47231fb107b30646f0dc86c24e6bc63deae0a64a Mon Sep 17 00:00:00 2001
From: Richard Klose <richard@klose.dev>
Date: Fri, 21 Mar 2025 11:17:28 +0100
Subject: [PATCH 2/2] docs: update cardinality in README and release version

Co-authored-by: Joe Adams <github@joeadams.io>
Signed-off-by: Richard Klose <richard@klose.dev>
---
 README.md | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 9cc6f173..382c25f2 100644
--- a/README.md
+++ b/README.md
@@ -60,7 +60,7 @@ elasticsearch_exporter --help
 | es.ilm                  | 1.6.0                 | If true, query index lifecycle policies for indices in the cluster.
 | es.shards               | 1.0.3rc1              | If true, query stats for all indices in the cluster, including shard-level stats (implies `es.indices=true`).                                                                                                                                                                                                                                                                         | false |
 | collector.snapshots     | 1.0.4rc1              | If true, query stats for the cluster snapshots. (As of v1.7.0, this flag has replaced "es.snapshots").                                                                                                                                                                                                                                                                                | false |
-| collector.health-report | 1.9.0                 | If true, query the health report (requires elasticsearch 8.7.0 or later)                                                                                                                                                                                                                                                                                                              | false |
+| collector.health-report | 1.10.0                 | If true, query the health report (requires elasticsearch 8.7.0 or later)                                                                                                                                                                                                                                                                                                              | false |
 | es.slm                  |                       | If true, query stats for SLM.                                                                                                                                                                                                                                                                                                                                                         | false |
 | es.data_stream          |                       | If true, query state for Data Steams.                                                                                                                                                                                                                                                                                                                                                 | false |
 | es.timeout              | 1.0.2                 | Timeout for trying to get stats from Elasticsearch. (ex: 20s)                                                                                                                                                                                                                                                                                                                         | 5s |
@@ -273,26 +273,26 @@ Further Information
 | elasticsearch_data_stream_store_size_bytes                           | gauge      | 1           | Current size of data stream backing indices in bytes                                                |
 | elasticsearch_health_report_creating_primaries                       | gauge      | 1           | The number of creating primary shards                                                               |
 | elasticsearch_health_report_creating_replicas                        | gauge      | 1           | The number of creating replica shards                                                               |
-| elasticsearch_health_report_data_stream_lifecycle_status             | gauge      | 1           | Data stream lifecycle status                                                                        |
-| elasticsearch_health_report_disk_status                              | gauge      | 3           | disk status                                                                                         |
+| elasticsearch_health_report_data_stream_lifecycle_status             | gauge      | 2           | Data stream lifecycle status                                                                        |
+| elasticsearch_health_report_disk_status                              | gauge      | 2           | disk status                                                                                         |
 | elasticsearch_health_report_ilm_policies                             | gauge      | 1           | The number of ILM Policies                                                                          |
 | elasticsearch_health_report_ilm_stagnating_indices                   | gauge      | 1           | The number of stagnating indices                                                                    |
-| elasticsearch_health_report_ilm_status                               | gauge      | 3           | ILM status                                                                                          |
+| elasticsearch_health_report_ilm_status                               | gauge      | 2           | ILM status                                                                                          |
 | elasticsearch_health_report_initializing_primaries                   | gauge      | 1           | The number of initializing primary shards                                                           |
 | elasticsearch_health_report_initializing_replicas                    | gauge      | 1           | The number of initializing replica shards                                                           |
-| elasticsearch_health_report_master_is_stable_status                  | gauge      | 3           | Master is stable status                                                                             |
+| elasticsearch_health_report_master_is_stable_status                  | gauge      | 2           | Master is stable status                                                                             |
 | elasticsearch_health_report_max_shards_in_cluster_data               | gauge      | 1           | The number of maximum shards in a cluster                                                           |
 | elasticsearch_health_report_max_shards_in_cluster_frozen             | gauge      | 1           | The number of maximum frozen shards in a cluster                                                    |
-| elasticsearch_health_report_repository_integrity_status              | gauge      | 3           | Repository integrity status                                                                         |
+| elasticsearch_health_report_repository_integrity_status              | gauge      | 2           | Repository integrity status                                                                         |
 | elasticsearch_health_report_restarting_primaries                     | gauge      | 1           | The number of restarting primary shards                                                             |
 | elasticsearch_health_report_restarting_replicas                      | gauge      | 1           | The number of restarting replica shards                                                             |
-| elasticsearch_health_report_shards_availabilty_status                | gauge      | 3           | Shards availabilty status                                                                           |
-| elasticsearch_health_report_shards_capacity_status                   | gauge      | 3           | Shards capacity status                                                                              |
+| elasticsearch_health_report_shards_availabilty_status                | gauge      | 2           | Shards availabilty status                                                                           |
+| elasticsearch_health_report_shards_capacity_status                   | gauge      | 2           | Shards capacity status                                                                              |
 | elasticsearch_health_report_slm_policies                             | gauge      | 1           | The number of SLM policies                                                                          |
-| elasticsearch_health_report_slm_status                               | gauge      | 3           | SLM status                                                                                          |
+| elasticsearch_health_report_slm_status                               | gauge      | 2           | SLM status                                                                                          |
 | elasticsearch_health_report_started_primaries                        | gauge      | 1           | The number of started primary shards                                                                |
 | elasticsearch_health_report_started_replicas                         | gauge      | 1           | The number of started replica shards                                                                |
-| elasticsearch_health_report_status                                   | gauge      | 3           | Overall cluster status                                                                              |
+| elasticsearch_health_report_status                                   | gauge      | 2           | Overall cluster status                                                                              |
 | elasticsearch_health_report_total_repositories                       | gauge      | 1           | The number snapshot repositories                                                                    |
 | elasticsearch_health_report_unassigned_primaries                     | gauge      | 1           | The number of unassigned primary shards                                                             |
 | elasticsearch_health_report_unassigned_replicas                      | gauge      | 1           | The number of unassigned replica shards                                                             |