Skip to content

Commit e53adbd

Browse files
authored
Features/add demos (#10)
1.Fix for last commit we introduced some configurations incorrectly, causing the reduce process to fail 2. Support dataloader if need upload some data to hdfs before application run
1 parent cba6d82 commit e53adbd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+12586
-4386
lines changed

demos/mapreduce/pom.xml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
5+
<groupId>kubecluster.org</groupId>
6+
<artifactId>mapreduce</artifactId>
7+
<version>1.0-SNAPSHOT</version>
8+
<packaging>jar</packaging>
9+
10+
<name>mapreduce</name>
11+
<url>http://maven.apache.org</url>
12+
13+
<properties>
14+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
15+
</properties>
16+
17+
<dependencies>
18+
<dependency>
19+
<groupId>org.apache.hadoop</groupId>
20+
<artifactId>hadoop-client</artifactId>
21+
<version>3.3.6</version>
22+
</dependency>
23+
</dependencies>
24+
25+
<build>
26+
<plugins>
27+
<plugin>
28+
<artifactId>maven-assembly-plugin</artifactId>
29+
<configuration>
30+
<archive>
31+
<manifest>
32+
<mainClass>kubecluster.org.WordCountDemo</mainClass>
33+
</manifest>
34+
<manifestEntries>
35+
<Class-Path>.</Class-Path>
36+
</manifestEntries>
37+
</archive>
38+
<descriptorRefs>
39+
<descriptorRef>jar-with-dependencies</descriptorRef>
40+
</descriptorRefs>
41+
</configuration>
42+
<executions>
43+
<execution>
44+
<id>make-assembly</id>
45+
<phase>package</phase>
46+
<goals>
47+
<goal>single</goal>
48+
</goals>
49+
</execution>
50+
</executions>
51+
</plugin>
52+
</plugins>
53+
</build>
54+
</project>
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package kubecluster.org;
2+
3+
import org.apache.hadoop.conf.Configuration;
4+
import org.apache.hadoop.fs.Path;
5+
import org.apache.hadoop.io.IntWritable;
6+
import org.apache.hadoop.io.Text;
7+
import org.apache.hadoop.mapreduce.job;
8+
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
9+
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
10+
import org.apache.hadoop.util.GenericOptionsParser;
11+
12+
public class WordCountDemo {
13+
14+
public static void main(String[] args) throws Exception {
15+
Configuration conf = new Configuration();
16+
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
17+
if (otherArgs.length < 2) {
18+
System.err.println("Usage: wordcount <in> [<in>...] <out>");
19+
System.exit(2);
20+
}
21+
Application application = Application.getInstance(conf, "word count");
22+
application.setJarByClass(WordCountDemo.class);
23+
application.setMapperClass(WordCountMapper.class);
24+
application.setCombinerClass(WordCountReducer.class);
25+
application.setReducerClass(WordCountReducer.class);
26+
application.setOutputKeyClass(Text.class);
27+
application.setOutputValueClass(IntWritable.class);
28+
for (int i = 0; i < otherArgs.length - 1; ++i) {
29+
FileInputFormat.addInputPath(application, new Path(otherArgs[i]));
30+
}
31+
FileOutputFormat.setOutputPath(application, new Path(otherArgs[otherArgs.length - 1]));
32+
System.exit(application.waitForCompletion(true) ? 0 : 1);
33+
}
34+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package kubecluster.org;
2+
import java.io.IOException;
3+
import java.util.StringTokenizer;
4+
5+
import org.apache.hadoop.io.IntWritable;
6+
import org.apache.hadoop.io.Text;
7+
import org.apache.hadoop.mapreduce.Mapper;
8+
9+
public class WordCountMapper extends Mapper<Object, Text, Text, IntWritable> {
10+
11+
private final static IntWritable one = new IntWritable(1);
12+
private Text word = new Text();
13+
14+
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
15+
StringTokenizer itr = new StringTokenizer(value.toString());
16+
while (itr.hasMoreTokens()) {
17+
word.set(itr.nextToken());
18+
context.write(word, one);
19+
}
20+
}
21+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package kubecluster.org;
2+
3+
import java.io.IOException;
4+
5+
import org.apache.hadoop.io.IntWritable;
6+
import org.apache.hadoop.io.Text;
7+
import org.apache.hadoop.mapreduce.Reducer;
8+
9+
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
10+
11+
private IntWritable result = new IntWritable();
12+
13+
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
14+
int sum = 0;
15+
for (IntWritable val : values) {
16+
sum += val.get();
17+
}
18+
result.set(sum);
19+
context.write(key, result);
20+
}
21+
}

docs/api/kubecluster.org_v1alpha1_generated.asciidoc

Lines changed: 153 additions & 91 deletions
Large diffs are not rendered by default.

docs/quick-start.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ hadoopcluster-sample-resourcemanager 1/1 Running 0 6s 10.2
5050

5151
As you can see, there is a Pod that simulates **NameNode**, DataNode, NodeManager, and ResourceManager. If you need more Pods to run more tasks, you can adjust the number of associated Datanodes and Nodemanagers
5252

53-
Now let's try logging in to a node and running a MapReduce job:
53+
Now let's try logging in to a node and running a MapReduce application:
5454
```bash
5555
> kubectl exec -it hadoopcluster-sample-resourcemanager /bin/bash
5656
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
@@ -59,8 +59,8 @@ LICENSE-binary LICENSE.txt NOTICE-binary NOTICE.txt README.txt bin etc in
5959
bash-4.2$ cd share/hadoop/mapreduce/
6060
bash-4.2$ ls
6161
hadoop-mapreduce-client-app-3.3.1.jar hadoop-mapreduce-client-hs-plugins-3.3.1.jar hadoop-mapreduce-client-shuffle-3.3.1.jar lib-examples
62-
hadoop-mapreduce-client-common-3.3.1.jar hadoop-mapreduce-client-jobclient-3.3.1-tests.jar hadoop-mapreduce-client-uploader-3.3.1.jar sources
63-
hadoop-mapreduce-client-core-3.3.1.jar hadoop-mapreduce-client-jobclient-3.3.1.jar hadoop-mapreduce-examples-3.3.1.jar
62+
hadoop-mapreduce-client-common-3.3.1.jar hadoop-mapreduce-client-applicationclient-3.3.1-tests.jar hadoop-mapreduce-client-uploader-3.3.1.jar sources
63+
hadoop-mapreduce-client-core-3.3.1.jar hadoop-mapreduce-client-applicationclient-3.3.1.jar hadoop-mapreduce-examples-3.3.1.jar
6464
hadoop-mapreduce-client-hs-3.3.1.jar hadoop-mapreduce-client-nativetask-3.3.1.jar jdiff
6565
bash-4.2$ hadoop jar hadoop-mapreduce-examples-3.3.1.jar pi 8 1000
6666
Number of Maps = 8
@@ -73,16 +73,16 @@ Wrote input for Map #4
7373
Wrote input for Map #5
7474
Wrote input for Map #6
7575
Wrote input for Map #7
76-
Starting Job
76+
Starting Application
7777
2024-01-11 08:28:41 INFO DefaultNoHARMFailoverProxyProvider:64 - Connecting to ResourceManager at hadoopcluster-sample-resourcemanager/10.244.0.101:8032
78-
2024-01-11 08:28:42 INFO JobResourceUploader:906 - Disabling Erasure Coding for path: /tmp/hadoop-yarn/staging/hadoop/.staging/job_1704961336749_0001
78+
2024-01-11 08:28:42 INFO ApplicationResourceUploader:906 - Disabling Erasure Coding for path: /tmp/hadoop-yarn/staging/hadoop/.staging/application_1704961336749_0001
7979
2024-01-11 08:28:42 INFO FileInputFormat:300 - Total input files to process : 8
80-
2024-01-11 08:28:43 INFO JobSubmitter:202 - number of splits:8
81-
2024-01-11 08:28:43 INFO JobSubmitter:298 - Submitting tokens for job: job_1704961336749_0001
82-
2024-01-11 08:28:43 INFO JobSubmitter:299 - Executing with tokens: []
80+
2024-01-11 08:28:43 INFO ApplicationSubmitter:202 - number of splits:8
81+
2024-01-11 08:28:43 INFO ApplicationSubmitter:298 - Submitting tokens for application: application_1704961336749_0001
82+
2024-01-11 08:28:43 INFO ApplicationSubmitter:299 - Executing with tokens: []
8383
......
8484
......
85-
Job Finished in 26.166 seconds
85+
Application Finished in 26.166 seconds
8686
Estimated value of Pi is 3.14100000000000000000
8787
bash-4.2$
8888
```

hack/violation_exception.list

Lines changed: 0 additions & 2 deletions
This file was deleted.

0 commit comments

Comments
 (0)