Skip to content

Commit e94e0e7

Browse files
authored
Hadoop job support (#8)
* Optimized creation logic * Optimized creation logic * make vet
1 parent 199361a commit e94e0e7

30 files changed

+1379
-205
lines changed

docs/api/kubecluster.org_v1alpha1_generated.asciidoc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,8 @@ HadoopJobSpec defines the desired state of HadoopJob NOTE: json tags are require
252252
| *`mainApplicationFile`* __string__ | MainFile is the path to a bundled JAR, Python, or R file of the application.
253253
| *`arguments`* __string array__ | Arguments is a list of arguments to be passed to the application.
254254
| *`executorSpec`* __xref:{anchor_prefix}-github-com-chriskery-hadoop-cluster-operator-pkg-apis-kubecluster-org-v1alpha1-hadoopnodespec[$$HadoopNodeSpec$$]__ |
255+
| *`nameNodeDirFormat`* __boolean__ |
256+
| *`env`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#envvar-v1-core[$$EnvVar$$] array__ | List of environment variables to set in the container. Cannot be updated.
255257
|===
256258

257259

@@ -291,6 +293,7 @@ HadoopJobStatus defines the observed state of HadoopJob
291293
[cols="25a,75a", options="header"]
292294
|===
293295
| Field | Description
296+
| *`env`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#envvar-v1-core[$$EnvVar$$] array__ | List of environment variables to set in the container. Cannot be updated.
294297
| *`replicas`* __integer__ | Number of desired pods. This is a pointer to distinguish between explicit zero and not specified. Defaults to 1.
295298
| *`image`* __string__ | Container image name. More info: https://kubernetes.io/docs/concepts/containers/images This field is optional to allow higher level config management to default or override container images in workload controllers like Deployments and StatefulSets.
296299
| *`volumeMounts`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumemount-v1-core[$$VolumeMount$$] array__ | Pod volumes to mount into the container's filesystem. Cannot be updated.

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ require (
1313
k8s.io/apimachinery v0.29.0
1414
k8s.io/client-go v0.29.0
1515
k8s.io/code-generator v0.29.0
16-
k8s.io/klog/v2 v2.110.1
1716
k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00
1817
k8s.io/utils v0.0.0-20230726121419-3b25d923346b
1918
sigs.k8s.io/controller-runtime v0.16.3
@@ -77,6 +76,7 @@ require (
7776
k8s.io/apiextensions-apiserver v0.28.3 // indirect
7877
k8s.io/component-base v0.28.3 // indirect
7978
k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 // indirect
79+
k8s.io/klog/v2 v2.110.1 // indirect
8080
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
8181
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
8282
sigs.k8s.io/yaml v1.3.0 // indirect

main.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package main
1919
import (
2020
"flag"
2121
"fmt"
22+
"github.com/sirupsen/logrus"
2223
"os"
2324

2425
"github.com/chriskery/hadoop-cluster-operator/pkg/config"
@@ -50,6 +51,10 @@ func init() {
5051

5152
utilruntime.Must(hadoopclusterorgv1alpha1.AddToScheme(scheme))
5253
//+kubebuilder:scaffold:scheme
54+
55+
logrus.SetFormatter(&logrus.TextFormatter{
56+
ForceColors: true,
57+
})
5358
}
5459

5560
func main() {
@@ -69,8 +74,8 @@ func main() {
6974
config.HadoopInitContainerImageDefault, "The image for hadoop init container")
7075
flag.StringVar(&config.Config.HadoopInitContainerTemplateFile, "hadoop-init-container-template-file",
7176
config.HadoopInitContainerTemplateFileDefault, "The template file for hadoop init container")
72-
flag.Var(&enabledSchemes, "enable-scheme", "Enable scheme(s) as --enable-scheme=tfjob --enable-scheme=pytorchjob, case insensitive."+
73-
" Now supporting TFJob, PyTorchJob, MXNetJob, XGBoostJob, PaddleJob. By default, all supported schemes will be enabled.")
77+
flag.Var(&enabledSchemes, "enable-scheme", "Enable scheme(s) as --enable-scheme=hadoopcluster --enable-scheme=hadoopjob, case insensitive."+
78+
" Now supporting HadoopCluster, HadoopJob. By default, all supported schemes will be enabled.")
7479

7580
opts := zap.Options{
7681
Development: true,

manifests/crd/bases/kubecluster.org_hadoopclusters.yaml

Lines changed: 484 additions & 0 deletions
Large diffs are not rendered by default.

manifests/crd/bases/kubecluster.org_hadoopjobs.yaml

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,233 @@ spec:
5252
items:
5353
type: string
5454
type: array
55+
env:
56+
description: List of environment variables to set in the container.
57+
Cannot be updated.
58+
items:
59+
description: EnvVar represents an environment variable present in
60+
a Container.
61+
properties:
62+
name:
63+
description: Name of the environment variable. Must be a C_IDENTIFIER.
64+
type: string
65+
value:
66+
description: 'Variable references $(VAR_NAME) are expanded using
67+
the previously defined environment variables in the container
68+
and any service environment variables. If a variable cannot
69+
be resolved, the reference in the input string will be unchanged.
70+
Double $$ are reduced to a single $, which allows for escaping
71+
the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the
72+
string literal "$(VAR_NAME)". Escaped references will never
73+
be expanded, regardless of whether the variable exists or
74+
not. Defaults to "".'
75+
type: string
76+
valueFrom:
77+
description: Source for the environment variable's value. Cannot
78+
be used if value is not empty.
79+
properties:
80+
configMapKeyRef:
81+
description: Selects a key of a ConfigMap.
82+
properties:
83+
key:
84+
description: The key to select.
85+
type: string
86+
name:
87+
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
88+
TODO: Add other useful fields. apiVersion, kind, uid?'
89+
type: string
90+
optional:
91+
description: Specify whether the ConfigMap or its key
92+
must be defined
93+
type: boolean
94+
required:
95+
- key
96+
type: object
97+
x-kubernetes-map-type: atomic
98+
fieldRef:
99+
description: 'Selects a field of the pod: supports metadata.name,
100+
metadata.namespace, `metadata.labels[''<KEY>'']`, `metadata.annotations[''<KEY>'']`,
101+
spec.nodeName, spec.serviceAccountName, status.hostIP,
102+
status.podIP, status.podIPs.'
103+
properties:
104+
apiVersion:
105+
description: Version of the schema the FieldPath is
106+
written in terms of, defaults to "v1".
107+
type: string
108+
fieldPath:
109+
description: Path of the field to select in the specified
110+
API version.
111+
type: string
112+
required:
113+
- fieldPath
114+
type: object
115+
x-kubernetes-map-type: atomic
116+
resourceFieldRef:
117+
description: 'Selects a resource of the container: only
118+
resources limits and requests (limits.cpu, limits.memory,
119+
limits.ephemeral-storage, requests.cpu, requests.memory
120+
and requests.ephemeral-storage) are currently supported.'
121+
properties:
122+
containerName:
123+
description: 'Container name: required for volumes,
124+
optional for env vars'
125+
type: string
126+
divisor:
127+
anyOf:
128+
- type: integer
129+
- type: string
130+
description: Specifies the output format of the exposed
131+
resources, defaults to "1"
132+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
133+
x-kubernetes-int-or-string: true
134+
resource:
135+
description: 'Required: resource to select'
136+
type: string
137+
required:
138+
- resource
139+
type: object
140+
x-kubernetes-map-type: atomic
141+
secretKeyRef:
142+
description: Selects a key of a secret in the pod's namespace
143+
properties:
144+
key:
145+
description: The key of the secret to select from. Must
146+
be a valid secret key.
147+
type: string
148+
name:
149+
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
150+
TODO: Add other useful fields. apiVersion, kind, uid?'
151+
type: string
152+
optional:
153+
description: Specify whether the Secret or its key must
154+
be defined
155+
type: boolean
156+
required:
157+
- key
158+
type: object
159+
x-kubernetes-map-type: atomic
160+
type: object
161+
required:
162+
- name
163+
type: object
164+
type: array
55165
executorSpec:
56166
properties:
167+
env:
168+
description: List of environment variables to set in the container.
169+
Cannot be updated.
170+
items:
171+
description: EnvVar represents an environment variable present
172+
in a Container.
173+
properties:
174+
name:
175+
description: Name of the environment variable. Must be a
176+
C_IDENTIFIER.
177+
type: string
178+
value:
179+
description: 'Variable references $(VAR_NAME) are expanded
180+
using the previously defined environment variables in
181+
the container and any service environment variables. If
182+
a variable cannot be resolved, the reference in the input
183+
string will be unchanged. Double $$ are reduced to a single
184+
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
185+
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
186+
Escaped references will never be expanded, regardless
187+
of whether the variable exists or not. Defaults to "".'
188+
type: string
189+
valueFrom:
190+
description: Source for the environment variable's value.
191+
Cannot be used if value is not empty.
192+
properties:
193+
configMapKeyRef:
194+
description: Selects a key of a ConfigMap.
195+
properties:
196+
key:
197+
description: The key to select.
198+
type: string
199+
name:
200+
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
201+
TODO: Add other useful fields. apiVersion, kind,
202+
uid?'
203+
type: string
204+
optional:
205+
description: Specify whether the ConfigMap or its
206+
key must be defined
207+
type: boolean
208+
required:
209+
- key
210+
type: object
211+
x-kubernetes-map-type: atomic
212+
fieldRef:
213+
description: 'Selects a field of the pod: supports metadata.name,
214+
metadata.namespace, `metadata.labels[''<KEY>'']`,
215+
`metadata.annotations[''<KEY>'']`, spec.nodeName,
216+
spec.serviceAccountName, status.hostIP, status.podIP,
217+
status.podIPs.'
218+
properties:
219+
apiVersion:
220+
description: Version of the schema the FieldPath
221+
is written in terms of, defaults to "v1".
222+
type: string
223+
fieldPath:
224+
description: Path of the field to select in the
225+
specified API version.
226+
type: string
227+
required:
228+
- fieldPath
229+
type: object
230+
x-kubernetes-map-type: atomic
231+
resourceFieldRef:
232+
description: 'Selects a resource of the container: only
233+
resources limits and requests (limits.cpu, limits.memory,
234+
limits.ephemeral-storage, requests.cpu, requests.memory
235+
and requests.ephemeral-storage) are currently supported.'
236+
properties:
237+
containerName:
238+
description: 'Container name: required for volumes,
239+
optional for env vars'
240+
type: string
241+
divisor:
242+
anyOf:
243+
- type: integer
244+
- type: string
245+
description: Specifies the output format of the
246+
exposed resources, defaults to "1"
247+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
248+
x-kubernetes-int-or-string: true
249+
resource:
250+
description: 'Required: resource to select'
251+
type: string
252+
required:
253+
- resource
254+
type: object
255+
x-kubernetes-map-type: atomic
256+
secretKeyRef:
257+
description: Selects a key of a secret in the pod's
258+
namespace
259+
properties:
260+
key:
261+
description: The key of the secret to select from. Must
262+
be a valid secret key.
263+
type: string
264+
name:
265+
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
266+
TODO: Add other useful fields. apiVersion, kind,
267+
uid?'
268+
type: string
269+
optional:
270+
description: Specify whether the Secret or its key
271+
must be defined
272+
type: boolean
273+
required:
274+
- key
275+
type: object
276+
x-kubernetes-map-type: atomic
277+
type: object
278+
required:
279+
- name
280+
type: object
281+
type: array
57282
hostNetwork:
58283
description: Host networking requested for this pod. Use the host's
59284
network namespace. If this option is set, the ports that will
@@ -2072,6 +2297,8 @@ spec:
20722297
description: MainFile is the path to a bundled JAR, Python, or R file
20732298
of the application.
20742299
type: string
2300+
nameNodeDirFormat:
2301+
type: boolean
20752302
required:
20762303
- mainApplicationFile
20772304
type: object

manifests/samples/hadoop_job.yaml

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,9 @@ kind: HadoopJob
33
metadata:
44
name: hadoopjob-sample
55
spec:
6-
# // MainFile is the path to a bundled JAR, Python, or R file of the application.
7-
# MainApplicationFile string `json:"mainApplicationFile"`
8-
#
9-
# // Arguments is a list of arguments to be passed to the application.
10-
# // +optional
11-
# Arguments []string `json:"arguments,omitempty"`
12-
#
13-
# ExecutorSpec HadoopNodeSpec `json:"executorSpec,omitempty"`
146
mainApplicationFile: /opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.1.jar
157
arguments: ["pi","10","1000"]
8+
nameNodeDirFormat: true
169
executorSpec:
1710
image: apache/hadoop:3
1811
replicas: 1

pkg/apis/kubecluster.org/v1alpha1/hadoopcluster_types.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ const (
4141
)
4242

4343
type HadoopNodeSpec struct {
44+
// List of environment variables to set in the container.
45+
// Cannot be updated.
46+
// +optional
47+
// +patchMergeKey=name
48+
// +patchStrategy=merge
49+
Env []corev1.EnvVar `json:"env,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,7,rep,name=env"`
4450
// Number of desired pods. This is a pointer to distinguish between explicit
4551
// zero and not specified. Defaults to 1.
4652
// +optional
@@ -183,11 +189,9 @@ const (
183189
// The training is running without error.
184190
ClusterRunning ClusterConditionType = "Running"
185191

186-
// ClusterRestarting means one or more sub-resources (e.g. services/pods) of this job
187-
// reached phase failed but maybe restarted according to it's restart policy
188-
// which specified by user in v1.PodTemplateSpec.
189-
// The training is freezing/pending.
190-
ClusterRestarting ClusterConditionType = "Restarting"
192+
// ClusterReconfiguring means one or more sub-resources (e.g. datanoe/namenode) of this cluster
193+
// apply replicas changed configuration.
194+
ClusterReconfiguring ClusterConditionType = "Reconfiguring"
191195
)
192196

193197
// +k8s:openapi-gen=true

pkg/apis/kubecluster.org/v1alpha1/hadoopjob_types.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ type HadoopJobSpec struct {
5555
Arguments []string `json:"arguments,omitempty"`
5656

5757
ExecutorSpec HadoopNodeSpec `json:"executorSpec,omitempty"`
58+
59+
NameNodeDirFormat bool `json:"nameNodeDirFormat,omitempty"`
60+
61+
// List of environment variables to set in the container.
62+
// Cannot be updated.
63+
// +optional
64+
// +patchMergeKey=name
65+
// +patchStrategy=merge
66+
Env []corev1.EnvVar `json:"env,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,7,rep,name=env"`
5867
}
5968

6069
// +k8s:openapi-gen=true

0 commit comments

Comments
 (0)