Skip to content

Commit 56f2b2f

Browse files
committed
fix: Prevent Pod 0 restart by utilizing mutating webhook
1 parent 9ae9ffc commit 56f2b2f

File tree

9 files changed

+2398
-113
lines changed

9 files changed

+2398
-113
lines changed

Cargo.lock

Lines changed: 458 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.nix

Lines changed: 1714 additions & 84 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ edition = "2021"
1010
repository = "https://github.com/stackabletech/commons-operator"
1111

1212
[workspace.dependencies]
13-
stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", features = ["telemetry"], tag = "stackable-operator-0.100.1" }
13+
stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", features = ["telemetry", "webhook"], tag = "stackable-operator-0.100.1" }
1414

1515
anyhow = "1.0"
1616
built = { version = "0.8", features = ["chrono", "git2"] }
@@ -24,5 +24,6 @@ strum = { version = "0.27", features = ["derive"] }
2424
tokio = { version = "1.40", features = ["full"] }
2525
tracing = "0.1"
2626

27-
# [patch."https://github.com/stackabletech/operator-rs.git"]
28-
# stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "main" }
27+
[patch."https://github.com/stackabletech/operator-rs.git"]
28+
# stackable-operator = { path = "../operator-rs/crates/stackable-operator" }
29+
stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "feat/mutating-webhook" }

_TEST.yaml

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
apiVersion: apps/v1
2+
kind: StatefulSet
3+
metadata:
4+
name: web
5+
labels:
6+
restarter.stackable.tech/enabled: "true"
7+
spec:
8+
selector:
9+
matchLabels:
10+
app: nginx
11+
serviceName: nginx
12+
replicas: 3
13+
template:
14+
metadata:
15+
labels:
16+
app: nginx
17+
spec:
18+
terminationGracePeriodSeconds: 10
19+
containers:
20+
- name: nginx
21+
image: registry.k8s.io/nginx-slim:0.24
22+
ports:
23+
- containerPort: 80
24+
name: web
25+
volumeMounts:
26+
- name: config
27+
mountPath: "/config"
28+
envFrom:
29+
- configMapRef:
30+
name: web-config-2
31+
volumes:
32+
- name: config
33+
configMap:
34+
name: web-config
35+
---
36+
apiVersion: v1
37+
kind: Service
38+
metadata:
39+
name: nginx
40+
labels:
41+
app: nginx
42+
spec:
43+
ports:
44+
- port: 80
45+
name: web
46+
clusterIP: None
47+
selector:
48+
app: nginx
49+
---
50+
apiVersion: v1
51+
kind: ConfigMap
52+
metadata:
53+
name: web-config
54+
data:
55+
foo: bar
56+
---
57+
apiVersion: v1
58+
kind: ConfigMap
59+
metadata:
60+
name: web-config-2
61+
data:
62+
foo: bar

crate-hashes.json

Lines changed: 9 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

deploy/helm/commons-operator/templates/roles.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,10 @@ rules:
4646
- pods/eviction
4747
verbs:
4848
- create
49+
# Required to maintain MutatingWebhookConfigurations. The operator needs to do this, as it needs
50+
# to enter e.g. it's generated certificate in the webhooks.
51+
- apiGroups: [admissionregistration.k8s.io]
52+
resources: [mutatingwebhookconfigurations]
53+
verbs:
54+
- create
55+
- patch

rust/operator-binary/src/main.rs

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22
// This will need changes in our and upstream error types.
33
#![allow(clippy::large_enum_variant)]
44

5-
mod restart_controller;
6-
5+
use anyhow::anyhow;
76
use clap::Parser;
8-
use futures::FutureExt;
7+
use futures::{FutureExt, TryFutureExt};
98
use stackable_operator::{
109
YamlSchema as _,
1110
cli::{Command, RunArguments},
@@ -17,11 +16,18 @@ use stackable_operator::{
1716
shared::yaml::SerializeOptions,
1817
telemetry::Tracing,
1918
};
19+
use webhooks::create_webhook;
20+
21+
mod restart_controller;
22+
mod webhooks;
2023

2124
mod built_info {
2225
include!(concat!(env!("OUT_DIR"), "/built.rs"));
2326
}
2427

28+
pub const OPERATOR_NAME: &str = "commons.stackable.tech";
29+
pub const FIELD_MANAGER: &str = "commons-operator";
30+
2531
#[derive(Parser)]
2632
#[clap(about, author)]
2733
struct Opts {
@@ -44,7 +50,7 @@ async fn main() -> anyhow::Result<()> {
4450
Command::Run(RunArguments {
4551
product_config: _,
4652
watch_namespace,
47-
operator_environment: _,
53+
operator_environment,
4854
maintenance,
4955
common,
5056
}) => {
@@ -76,12 +82,28 @@ async fn main() -> anyhow::Result<()> {
7682
)
7783
.await?;
7884

85+
let webhook = create_webhook(
86+
&operator_environment,
87+
// TODO: Make user configurable
88+
false,
89+
client.as_kube_client(),
90+
)
91+
.await?;
92+
let webhook = webhook
93+
.run()
94+
.map_err(|err| anyhow!(err).context("failed to run webhook"));
95+
7996
let sts_restart_controller =
8097
restart_controller::statefulset::start(&client, &watch_namespace).map(anyhow::Ok);
8198
let pod_restart_controller =
8299
restart_controller::pod::start(&client, &watch_namespace).map(anyhow::Ok);
83100

84-
futures::try_join!(sts_restart_controller, pod_restart_controller, eos_checker)?;
101+
futures::try_join!(
102+
sts_restart_controller,
103+
pod_restart_controller,
104+
eos_checker,
105+
webhook
106+
)?;
85107
}
86108
}
87109

rust/operator-binary/src/restart_controller/statefulset.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@ use stackable_operator::{
1313
apps::v1::StatefulSet,
1414
core::v1::{ConfigMap, EnvFromSource, EnvVar, PodSpec, Secret, Volume},
1515
},
16-
kube,
1716
kube::{
18-
Resource, ResourceExt,
17+
self, Resource, ResourceExt,
1918
api::{PartialObjectMeta, Patch, PatchParams},
2019
core::{DeserializeGuard, DynamicObject, error_boundary},
2120
runtime::{
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
use std::collections::BTreeMap;
2+
3+
use snafu::{ResultExt, Snafu};
4+
use stackable_operator::{
5+
builder::meta::ObjectMetaBuilder,
6+
cli::OperatorEnvironmentOptions,
7+
k8s_openapi::{
8+
api::{
9+
admissionregistration::v1::{
10+
MutatingWebhook, MutatingWebhookConfiguration, RuleWithOperations,
11+
WebhookClientConfig,
12+
},
13+
apps::v1::StatefulSet,
14+
},
15+
apimachinery::pkg::apis::meta::v1::LabelSelector,
16+
},
17+
kube::{
18+
Client,
19+
core::admission::{AdmissionRequest, AdmissionResponse},
20+
},
21+
kvp::Label,
22+
webhook::{WebhookError, WebhookOptions, WebhookServer, servers::MutatingWebhookServer},
23+
};
24+
25+
use crate::{FIELD_MANAGER, OPERATOR_NAME};
26+
27+
#[derive(Debug, Snafu)]
28+
pub enum Error {
29+
#[snafu(display("failed to create webhook server"))]
30+
CreateWebhookServer { source: WebhookError },
31+
}
32+
33+
pub async fn create_webhook<'a>(
34+
operator_environment: &'a OperatorEnvironmentOptions,
35+
disable_mutating_webhook_configuration_maintenance: bool,
36+
client: Client,
37+
) -> Result<WebhookServer, Error> {
38+
let mutating_webhook_server = MutatingWebhookServer::new(
39+
get_mutating_webhook_configuration(),
40+
foo,
41+
disable_mutating_webhook_configuration_maintenance,
42+
client,
43+
FIELD_MANAGER.to_owned(),
44+
);
45+
46+
let webhook_options = WebhookOptions {
47+
socket_addr: WebhookServer::DEFAULT_SOCKET_ADDRESS,
48+
operator_namespace: operator_environment.operator_namespace.to_owned(),
49+
operator_service_name: operator_environment.operator_service_name.to_owned(),
50+
};
51+
WebhookServer::new(webhook_options, vec![Box::new(mutating_webhook_server)])
52+
.await
53+
.context(CreateWebhookServerSnafu)
54+
}
55+
56+
fn get_mutating_webhook_configuration() -> MutatingWebhookConfiguration {
57+
let webhook_name = "restarter-sts-enricher.stackable.tech";
58+
let metadata = ObjectMetaBuilder::new()
59+
.name(webhook_name)
60+
.with_label(Label::stackable_vendor())
61+
.with_label(
62+
Label::managed_by(OPERATOR_NAME, webhook_name).expect("static label is always valid"),
63+
)
64+
.build();
65+
66+
MutatingWebhookConfiguration {
67+
metadata,
68+
webhooks: Some(vec![MutatingWebhook {
69+
name: webhook_name.to_owned(),
70+
// This is checked by the stackable_webhook code
71+
admission_review_versions: vec!["v1".to_owned()],
72+
rules: Some(vec![RuleWithOperations {
73+
api_groups: Some(vec!["apps".to_owned()]),
74+
api_versions: Some(vec!["v1".to_owned()]),
75+
resources: Some(vec!["statefulsets".to_owned()]),
76+
operations: Some(vec!["CREATE".to_owned()]),
77+
scope: Some("Namespaced".to_owned()),
78+
}]),
79+
// We only need to care about StatefulSets with the `restarter.stackable.tech/enabled``
80+
// label set to `true`.
81+
object_selector: Some(LabelSelector {
82+
match_labels: Some(BTreeMap::from([(
83+
"restarter.stackable.tech/enabled".to_owned(),
84+
"true".to_owned(),
85+
)])),
86+
match_expressions: None,
87+
}),
88+
// Will be set by the stackable_webhook code
89+
client_config: WebhookClientConfig::default(),
90+
// Worst case the annotations are missing cause a restart of Pod 0, basically the same
91+
// behavior which we had for years.
92+
// See https://github.com/stackabletech/commons-operator/issues/111 for details
93+
// failure_policy: Some("Ignore".to_owned()),
94+
// TEMP for testing
95+
failure_policy: Some("Fail".to_owned()),
96+
// It could be the case that other mutating webhooks add more ConfigMpa/Secret mounts,
97+
// in which case it would be nice if we detect that.
98+
reinvocation_policy: Some("IfNeeded".to_owned()),
99+
// We don't have side effects
100+
side_effects: "None".to_owned(),
101+
..Default::default()
102+
}]),
103+
}
104+
}
105+
106+
fn foo(request: AdmissionRequest<StatefulSet>) -> AdmissionResponse {
107+
let Some(sts) = &request.object else {
108+
return AdmissionResponse::invalid(
109+
"object (of type StatefulSet) missing - for operation CREATE it must be always present",
110+
);
111+
};
112+
113+
dbg!(&request);
114+
115+
AdmissionResponse::from(&request)
116+
}

0 commit comments

Comments
 (0)