Skip to content

Commit f895c53

Browse files
committed
support submit hadoop job dirctly
1 parent 9870715 commit f895c53

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+3464
-1691
lines changed

main.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ package main
1818

1919
import (
2020
"flag"
21-
"github.com/chriskery/hadoop-cluster-operator/pkg/config"
21+
"fmt"
2222
"os"
23+
24+
"github.com/chriskery/hadoop-cluster-operator/pkg/config"
2325
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
2426
"sigs.k8s.io/controller-runtime/pkg/webhook"
2527

@@ -55,6 +57,7 @@ func main() {
5557
var enableLeaderElection bool
5658
var probeAddr string
5759
var certDir string
60+
var enabledSchemes controllers.EnabledSchemes
5861

5962
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
6063
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
@@ -66,6 +69,8 @@ func main() {
6669
config.HadoopInitContainerImageDefault, "The image for hadoop init container")
6770
flag.StringVar(&config.Config.HadoopInitContainerTemplateFile, "hadoop-init-container-template-file",
6871
config.HadoopInitContainerTemplateFileDefault, "The template file for hadoop init container")
72+
flag.Var(&enabledSchemes, "enable-scheme", "Enable scheme(s) as --enable-scheme=tfjob --enable-scheme=pytorchjob, case insensitive."+
73+
" Now supporting TFJob, PyTorchJob, MXNetJob, XGBoostJob, PaddleJob. By default, all supported schemes will be enabled.")
6974

7075
opts := zap.Options{
7176
Development: true,
@@ -99,13 +104,22 @@ func main() {
99104
os.Exit(1)
100105
}
101106

102-
if err = controllers.NewReconciler(mgr).SetupWithManager(mgr); err != nil {
103-
setupLog.Error(err, "unable to create controller", "controller", "hadoopCluster")
104-
os.Exit(1)
107+
// TODO: We need a general manager. all rest reconciler addsToManager
108+
// Based on the user configuration, we start different controllers
109+
if enabledSchemes.Empty() {
110+
enabledSchemes.FillAll()
105111
}
106-
if err = (&hadoopclusterorgv1alpha1.HadoopCluster{}).SetupWebhookWithManager(mgr); err != nil {
107-
setupLog.Error(err, "unable to create webhook", "webhook", "hadoopCluster")
108-
os.Exit(1)
112+
for _, s := range enabledSchemes {
113+
setupFunc, supported := controllers.SupportedSchemeReconciler[s]
114+
if !supported {
115+
setupLog.Error(fmt.Errorf("cannot find %s in supportedSchemeReconciler", s),
116+
"scheme not supported", "scheme", s)
117+
os.Exit(1)
118+
}
119+
if err = setupFunc(mgr); err != nil {
120+
setupLog.Error(err, "unable to create controller", "controller", s)
121+
os.Exit(1)
122+
}
109123
}
110124
//+kubebuilder:scaffold:builder
111125

manifests/crd/bases/kubecluster.org_hadoopjobs.yaml

Lines changed: 2135 additions & 0 deletions
Large diffs are not rendered by default.

manifests/crd/kustomization.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# It should be run by manifests/default
44
resources:
55
- bases/kubecluster.org_hadoopclusters.yaml
6+
- bases/kubecluster.org_hadoopjobs.yaml
67
#+kubebuilder:scaffold:crdkustomizeresource
78

89
patchesStrategicMerge:

manifests/rbac/role.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,3 +95,29 @@ rules:
9595
- get
9696
- patch
9797
- update
98+
- apiGroups:
99+
- kubecluster.org
100+
resources:
101+
- hadoopjobs
102+
verbs:
103+
- create
104+
- delete
105+
- get
106+
- list
107+
- patch
108+
- update
109+
- watch
110+
- apiGroups:
111+
- kubecluster.org
112+
resources:
113+
- hadoopjobs/finalizers
114+
verbs:
115+
- update
116+
- apiGroups:
117+
- kubecluster.org
118+
resources:
119+
- hadoopjobs/status
120+
verbs:
121+
- get
122+
- patch
123+
- update

manifests/samples/hadoop_job.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
apiVersion: kubecluster.org/v1alpha1
2+
kind: HadoopJob
3+
metadata:
4+
name: hadoopjob-sample
5+
spec:
6+
# // MainFile is the path to a bundled JAR, Python, or R file of the application.
7+
# MainApplicationFile string `json:"mainApplicationFile"`
8+
#
9+
# // Arguments is a list of arguments to be passed to the application.
10+
# // +optional
11+
# Arguments []string `json:"arguments,omitempty"`
12+
#
13+
# ExecutorSpec HadoopNodeSpec `json:"executorSpec,omitempty"`
14+
mainApplicationFile: /opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.1.jar
15+
arguments: ["pi","10","1000"]
16+
executorSpec:
17+
image: apache/hadoop:3
18+
replicas: 1
19+
resources:
20+
requests:
21+
cpu: 1
22+
memory: 4Gi
23+
limits:
24+
cpu: 1
25+
memory: 4Gi
26+
27+
28+

manifests/webhook/manifests.yaml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,26 @@ webhooks:
2424
resources:
2525
- hadoopclusters
2626
sideEffects: None
27+
- admissionReviewVersions:
28+
- v1
29+
clientConfig:
30+
service:
31+
name: webhook-service
32+
namespace: system
33+
path: /mutate-kubecluster-org-v1alpha1-hadoopjob
34+
failurePolicy: Fail
35+
name: mhadoopjob.kb.io
36+
rules:
37+
- apiGroups:
38+
- kubecluster.org
39+
apiVersions:
40+
- v1alpha1
41+
operations:
42+
- CREATE
43+
- UPDATE
44+
resources:
45+
- hadoopjobs
46+
sideEffects: None
2747
---
2848
apiVersion: admissionregistration.k8s.io/v1
2949
kind: ValidatingWebhookConfiguration
@@ -50,3 +70,23 @@ webhooks:
5070
resources:
5171
- hadoopclusters
5272
sideEffects: None
73+
- admissionReviewVersions:
74+
- v1
75+
clientConfig:
76+
service:
77+
name: webhook-service
78+
namespace: system
79+
path: /validate-kubecluster-org-v1alpha1-hadoopjob
80+
failurePolicy: Fail
81+
name: vhadoopjob.kb.io
82+
rules:
83+
- apiGroups:
84+
- kubecluster.org
85+
apiVersions:
86+
- v1alpha1
87+
operations:
88+
- CREATE
89+
- UPDATE
90+
resources:
91+
- hadoopjobs
92+
sideEffects: None

pkg/.DS_Store

6 KB
Binary file not shown.

pkg/apis/kubecluster.org/v1alpha1/common_types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@ const (
99
ReplicaTypeNodemanager ReplicaType = "nodemanager"
1010

1111
ReplicaTypeConfigMap ReplicaType = "configmap"
12+
13+
ReplicaTypeDriver ReplicaType = "driver"
1214
)
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
/*
2+
Copyright 2023.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import (
20+
corev1 "k8s.io/api/core/v1"
21+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22+
)
23+
24+
const (
25+
// HadoopJobKind is the kind name.
26+
HadoopJobKind = "HadoopJob"
27+
// HadoopJobPlural is the TensorflowPlural for HadoopJob.
28+
HadoopJobPlural = "HadoopJobs"
29+
// HadoopJobSingular is the singular for HadoopJob.
30+
HadoopJobSingular = "HadoopJob"
31+
32+
// JobNameLabel represents the label key for the cluster name, the value is the cluster name.
33+
JobNameLabel = "kubeclusetr.org/job-name"
34+
)
35+
36+
// SparkApplicationType describes the type of a Spark application.
37+
type SparkApplicationType string
38+
39+
// Different types of Spark applications.
40+
const (
41+
JavaApplicationType SparkApplicationType = "Java"
42+
ScalaApplicationType SparkApplicationType = "Scala"
43+
PythonApplicationType SparkApplicationType = "Python"
44+
RApplicationType SparkApplicationType = "R"
45+
)
46+
47+
// HadoopJobSpec defines the desired state of HadoopJob
48+
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
49+
type HadoopJobSpec struct {
50+
// MainFile is the path to a bundled JAR, Python, or R file of the application.
51+
MainApplicationFile string `json:"mainApplicationFile"`
52+
53+
// Arguments is a list of arguments to be passed to the application.
54+
// +optional
55+
Arguments []string `json:"arguments,omitempty"`
56+
57+
ExecutorSpec HadoopNodeSpec `json:"executorSpec,omitempty"`
58+
}
59+
60+
// +k8s:openapi-gen=true
61+
// +k8s:deepcopy-gen=true
62+
// JobCondition describes current state of a cluster
63+
type JobCondition struct {
64+
// Type of job condition.
65+
Type JobConditionType `json:"type"`
66+
// Status of the condition, one of True, False, Unknown.
67+
Status corev1.ConditionStatus `json:"status"`
68+
// The reason for the condition's last transition.
69+
Reason string `json:"reason,omitempty"`
70+
// A human readable message indicating details about the transition.
71+
Message string `json:"message,omitempty"`
72+
// The last time this condition was updated.
73+
LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"`
74+
// Last time the condition transitioned from one status to another.
75+
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"`
76+
}
77+
78+
type JobConditionType string
79+
80+
const (
81+
// JobCreated means the job has been accepted by the system,
82+
// but one or more of the pods/services has not been started.
83+
// This includes time before pods being scheduled and launched.
84+
JobCreated JobConditionType = "Created"
85+
86+
// JobSubmitted means all sub-resources (e.g. services/pods) of this job
87+
// have been successfully submitted.
88+
JobSubmitted JobConditionType = "Submitted"
89+
90+
// JobRunning means all sub-resources (e.g. services/pods) of this job
91+
// have been successfully scheduled and launched.
92+
// The training is running without error.
93+
JobRunning JobConditionType = "Running"
94+
95+
// JobSucceeded means all sub-resources (e.g. services/pods) of this job
96+
// reached phase have terminated in success.
97+
// The training is complete without error.
98+
JobSucceeded JobConditionType = "Succeeded"
99+
100+
// JobFailed means one or more sub-resources (e.g. services/pods) of this job
101+
// reached phase failed with no restarting.
102+
// The training has failed its execution.
103+
JobFailed JobConditionType = "Failed"
104+
)
105+
106+
// +k8s:openapi-gen=true
107+
// +k8s:deepcopy-gen=true
108+
// HadoopJobStatus defines the observed state of HadoopJob
109+
type HadoopJobStatus struct {
110+
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
111+
// Important: Run "make" to regenerate code after modifying this file
112+
// Conditions is an array of current observed job conditions.
113+
Conditions []JobCondition `json:"conditions"`
114+
115+
// Represents time when the job was acknowledged by the job controller.
116+
// It is not guaranteed to be set in happens-before order across separate operations.
117+
// It is represented in RFC3339 form and is in UTC.
118+
StartTime *metav1.Time `json:"startTime,omitempty"`
119+
120+
// Represents time when the job was completed. It is not guaranteed to
121+
// be set in happens-before order across separate operations.
122+
// It is represented in RFC3339 form and is in UTC.
123+
CompletionTime *metav1.Time `json:"completionTime,omitempty"`
124+
}
125+
126+
// +genclient
127+
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
128+
// +resource:path=hadoopjobs
129+
// +kubebuilder:object:root=true
130+
// +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date
131+
// +kubebuilder:printcolumn:JSONPath=`.status.conditions[-1:].type`,name="State",type=string
132+
// +kubebuilder:subresource:status
133+
// +kubebuilder:resource:scope=Namespaced,path=hadoopjobs,shortName={"hdj","hdjs"}
134+
// HadoopJob is the Schema for the hadoopjobs API
135+
type HadoopJob struct {
136+
metav1.TypeMeta `json:",inline"`
137+
metav1.ObjectMeta `json:"metadata,omitempty"`
138+
139+
Spec HadoopJobSpec `json:"spec,omitempty"`
140+
Status HadoopJobStatus `json:"status,omitempty"`
141+
}
142+
143+
//+kubebuilder:object:root=true
144+
// +k8s:deepcopy-gen:interfaces=k8s.io/apimzxachinery/pkg/runtime.Object
145+
// +resource:path=hadoopjobs
146+
147+
// HadoopJobList contains a list of HadoopJob
148+
type HadoopJobList struct {
149+
metav1.TypeMeta `json:",inline"`
150+
metav1.ListMeta `json:"metadata,omitempty"`
151+
Items []HadoopJob `json:"items"`
152+
}
153+
154+
func init() {
155+
SchemeBuilder.Register(&HadoopJob{}, &HadoopJobList{})
156+
}

0 commit comments

Comments
 (0)