Skip to content

Commit 44172f6

Browse files
authored
Merge pull request #1 from peterparser/kubernetes-monitoring
Kubernetes monitoring
2 parents 85ecb8f + 683572b commit 44172f6

File tree

6 files changed

+161
-47
lines changed

6 files changed

+161
-47
lines changed

.vscode/launch.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
// Use IntelliSense to learn about possible attributes.
3+
// Hover to view descriptions of existing attributes.
4+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5+
"version": "0.2.0",
6+
"configurations": [
7+
{
8+
"name": "Python: Current File",
9+
"type": "python",
10+
"request": "launch",
11+
"program": "${file}",
12+
"console": "integratedTerminal",
13+
"justMyCode": true,
14+
"args": ["-u", "https://prometheus-k8s-openshift-monitoring.apps.elclown.lab.local", "-n", "sso"]
15+
}
16+
]
17+
}

README.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@
33
This tool simplifies the gathering of data about the resource consumption of your pods.
44
In particular, it outputs an excel table with the current configuration of your deployments and statefulsets and their resources, also the tool estimates the requests and the limits based on prometheus data.
55

6-
# DISCLAIMER
7-
At the moment the tool has been tested only against Openshift Clusters, a more agnostic version is coming soon!
8-
96
# Requirements
107
To run the script you need:
118
* An authenticated kubectl session

main.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,16 @@
3636
epilog='RRS')
3737

3838
parser.add_argument("-u", "--url", help="Prometheus URL", type=str, required=True)
39+
parser.add_argument("-t", "--type", choices=['kubernetes', 'openshift'], default="kubernetes", help="Kubernetes flavour", type=str)
3940
parser.add_argument("-n", "--namespaces", help="Comma separated list of namespaces", type=str, required=True)
4041
parser.add_argument("-o", "--output", default="output.xlsx", help="Output file", type=str)
42+
4143
args = parser.parse_args()
4244
token = getpass.getpass("Insert SA Token here:")
4345

4446
config.load_kube_config()
4547
retriever = Retriever()
46-
suggester = Suggester(args.url, token)
48+
suggester = Suggester(args.url, token, args.type)
4749
namespaces = args.namespaces.split(",")
4850

4951
data = []

querier.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import requests
2+
3+
from urllib3.exceptions import InsecureRequestWarning
4+
5+
from statistics import mean
6+
7+
queries = {
8+
"kubernetes": {
9+
"memory_request": {
10+
"query": "avg_over_time(container_memory_working_set_bytes{{pod=~'{workload}.*',namespace='{namespace}',container=''}}[{time_range}])",
11+
"query_type": "query"
12+
},
13+
"memory_limit": {
14+
"query": "max_over_time(container_memory_working_set_bytes{{pod=~'{workload}.*',namespace='{namespace}',container=''}}[{time_range}])",
15+
"query_type": "query"
16+
},
17+
"cpu_request": {
18+
"query": "avg(rate(container_cpu_usage_seconds_total{{pod=~'{workload}.*',namespace='{namespace}', container=''}}[1m]))",
19+
"query_type": "query_range"
20+
},
21+
"cpu_limit": {
22+
"query": "avg(rate(container_cpu_usage_seconds_total{{pod=~'{workload}.*',namespace='{namespace}', container=''}}[1m]))",
23+
"query_type": "query_range"
24+
}
25+
26+
},
27+
"openshift": {
28+
"memory_request": {
29+
"query": "avg_over_time(container_memory_working_set_bytes{{pod=~'{workload}.*',namespace='{namespace}',container=''}}[{time_range}])",
30+
"query_type": "query"
31+
},
32+
"memory_limit": {
33+
"query": "max_over_time(container_memory_working_set_bytes{{pod=~'{workload}.*',namespace='{namespace}',container=''}}[{time_range}])",
34+
"query_type": "query"
35+
},
36+
"cpu_request": {
37+
"query": "avg_over_time(pod:container_cpu_usage:sum{{pod=~'{workload}.*',namespace='{namespace}'}}[{time_range}])",
38+
"query_type": "query"
39+
},
40+
"cpu_limit": {
41+
"query": "max_over_time(pod:container_cpu_usage:sum{{pod=~'{workload}.*',namespace='{namespace}'}}[{time_range}])",
42+
"query_type": "query"
43+
}
44+
}
45+
46+
}
47+
48+
49+
aggregators = {
50+
"cpu_limit": max,
51+
"cpu_request": mean
52+
}
53+
54+
def handle_memory(result):
55+
return f'{round(float(result[0]["value"][1]) / 1000000)} M'
56+
57+
def handle_cpu(result, aggregator):
58+
if result[0]["values"]:
59+
values = map(lambda x: float(x[1]), result[0]["values"])
60+
61+
return f'{round(aggregator(values), 2)}'
62+
else:
63+
return f'{round(float(result[0]["value"][1]), 2)}'
64+
65+
66+
def handle_response(metric, result, aggregator=None):
67+
68+
match metric:
69+
case "memory_request" | "memory_limit":
70+
return handle_memory(result)
71+
72+
73+
case "cpu_request" | "cpu_limit":
74+
return handle_cpu(result, aggregator)
75+
76+
77+
def run_query(url, headers, params):
78+
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
79+
response = requests.get(url, headers=headers, params=params, verify=False)
80+
return response.json()["data"]["result"]

retriever.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ def __init__(self):
1010
self.retrieve_function = {
1111
"deployment": self.resources.list_namespaced_deployment,
1212
"statefulset": self.resources.list_namespaced_stateful_set,
13-
"daemonset": self.resources.list_namespaced_daemon_set
1413
}
1514

1615
def get_mem_cpu_req_lim(self, namespace):
@@ -34,9 +33,19 @@ def get_mem_cpu_req_lim(self, namespace):
3433
else:
3534
limit_cpu = "Not Defined"
3635
limit_memory = "Not Defined"
36+
37+
if object_type == 'deployment':
38+
for status in k8s_object.status.conditions:
39+
if status.type == 'Progressing':
40+
replicaset_name = status.message.split(" ")[1].replace('"', "")
41+
regex = f'{replicaset_name}.*'
42+
break
43+
else:
44+
regex = f'{name}-[0-9]+'
45+
print(regex)
3746
results.append((namespace, object_type, name,
3847
container.name, replicas,
3948
request_cpu, limit_cpu,
40-
request_memory, limit_memory))
49+
request_memory, limit_memory, regex))
4150

4251
return results

suggester.py

Lines changed: 50 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,63 @@
1-
import requests
1+
import time
2+
23
from urllib3.exceptions import InsecureRequestWarning
34

5+
import querier
46

57
class Suggester:
6-
7-
def __init__(self, prometheus_host, token, time_range='7d'):
8+
def __init__(self, prometheus_host, token, k8s_flavour, time_range='7d', step='3h'):
89
self.prometheus_host = prometheus_host
910
self.token = token
1011
self.time_range = time_range
12+
self.queries = querier.queries[k8s_flavour]
13+
self.end = int(time.time())
14+
self.start = self.end - 24 * 60 * 60 * int(time_range[:-1])
15+
self.step = step
16+
self.aggregator = {}
17+
18+
for metric, query_struct in self.queries.items():
19+
if query_struct["query_type"] == 'query_range':
20+
self.aggregator[metric] = querier.aggregators[metric]
21+
22+
23+
def build_query(self, k8s_object, metric):
24+
host = f"{self.prometheus_host}/api/v1/{self.queries[metric]['query_type']}"
25+
match self.queries[metric]["query_type"]:
26+
case "query":
27+
params = {
28+
"query": self.queries[metric]["query"].format(
29+
workload=k8s_object[9],
30+
namespace=k8s_object[0],
31+
time_range=self.time_range
32+
)
33+
}
34+
case "query_range":
35+
params = {
36+
"query": self.queries[metric]["query"].format(
37+
workload=k8s_object[9],
38+
namespace=k8s_object[0],
39+
time_range=self.time_range
40+
),
41+
"start": self.start,
42+
"end": self.end,
43+
"step": self.step
44+
}
45+
46+
return host, params
47+
1148

1249
def suggest_values(self, k8s_object):
13-
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
50+
1451
headers = {
1552
'Authorization': f'Bearer {self.token}'
1653
}
17-
#Pe i pod avg_over_time(container_memory_working_set_bytes{pod=~'snitch-jvm.*',namespace='snitch',container='snitch-jvm'}[7d])
18-
# Retrieve pod memory usage
19-
params_memory_request = {
20-
"query": f"avg_over_time(container_memory_working_set_bytes{{pod=~'{k8s_object[2]}.*',namespace='{k8s_object[0]}',container=''}}[{self.time_range}])"
21-
}
22-
23-
response = requests.get(f"{self.prometheus_host}/api/v1/query", headers=headers, params=params_memory_request, verify=False)
24-
memoryRequest = f'{round(float(response.json()["data"]["result"][0]["value"][1]) / 1000000)} M'
25-
26-
params_memory_limit = {
27-
"query": f"max_over_time(container_memory_working_set_bytes{{pod=~'{k8s_object[2]}.*',namespace='{k8s_object[0]}',container=''}}[{self.time_range}])"
28-
}
29-
30-
response = requests.get(f"{self.prometheus_host}/api/v1/query", headers=headers, params=params_memory_limit,
31-
verify=False)
32-
33-
memoryLimit = f'{round(float(response.json()["data"]["result"][0]["value"][1]) / 1000000)} M'
34-
35-
params_cpu_request = {
36-
"query": f"avg_over_time(pod:container_cpu_usage:sum{{pod=~'{k8s_object[2]}.*',namespace='{k8s_object[0]}'}}[{self.time_range}])"
37-
}
38-
39-
response = requests.get(f"{self.prometheus_host}/api/v1/query", headers=headers, params=params_cpu_request,
40-
verify=False)
41-
42-
cpu_request = f'{round(float(response.json()["data"]["result"][0]["value"][1]), 3)}'
43-
44-
params_cpu_limit = {
45-
"query": f"max_over_time(pod:container_cpu_usage:sum{{pod=~'{k8s_object[2]}.*',namespace='{k8s_object[0]}'}}[{self.time_range}])"
46-
}
4754

48-
response = requests.get(f"{self.prometheus_host}/api/v1/query", headers=headers, params=params_cpu_limit,
49-
verify=False)
55+
results = {}
5056

51-
cpu_limit = f'{round(float(response.json()["data"]["result"][0]["value"][1]), 3)}'
57+
for metric, query_struct in self.queries.items():
58+
host, params = self.build_query(k8s_object, metric)
59+
result = querier.run_query(host, headers, params)
60+
results[metric] = querier.handle_response(metric, result, self.aggregator.get(metric, None))
5261

5362
return [
5463
k8s_object[0], # Namespace
@@ -57,13 +66,13 @@ def suggest_values(self, k8s_object):
5766
k8s_object[3], # Container
5867
k8s_object[4], # Replicas
5968
k8s_object[5], # Request CPU
60-
cpu_request, # suggested
69+
results["cpu_request"], # suggested
6170
k8s_object[6], # CPU LIMIT,
62-
cpu_limit,
71+
results["cpu_limit"],
6372
k8s_object[7], # Memory request
64-
memoryRequest, # suggest
73+
results["memory_request"], # suggest
6574
k8s_object[8], # memory limit
66-
memoryLimit # suggest
75+
results["memory_limit"] # suggest
6776
]
6877

6978

0 commit comments

Comments
 (0)