-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathMain
143 lines (114 loc) · 4.61 KB
/
Main
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
// Matric Number:A0224917B
// Name:Wang Yutian
// FInalCommonWordstest.java
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.checkerframework.checker.units.qual.A;
public class TopkCommonWords {
public static class MyMapper
extends Mapper<Object, Text, Text, Text>{
private String filename = new String();
@Override
protected void setup(Context context) throws IOException, InterruptedException {
FileSplit fs = (FileSplit)context.getInputSplit();
filename = fs.getPath().getName();
}
private Text word = new Text();
private Text FileName = new Text();
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String [] Words = value.toString().split("[\\t\\n\\r\\f\\s]");
for(String itr: Words){
word.set(itr);
FileName.set(filename);
context.write(word, FileName);
}
}
}
public static class MyReducer
extends Reducer<Text,Text,IntWritable,Text> {
private TreeMap<Integer, String> treeMap = new TreeMap<Integer, String>(new Comparator<Integer>() {
@Override
public int compare(Integer x, Integer y) {
return y.compareTo(x);
}
});
public void reduce(Text key, Iterable<Text> values, Context context) throws
IOException, InterruptedException {
int sum_1 = 0;
int sum_2 = 0;
int sum = 0;
boolean flag = false;
for (Text item : values) {
String itemword = new String();
itemword = item.toString();
if (itemword.equals("stopwords.txt")) {
flag = true;
break;
} else if (itemword.equals("task1-input1.txt")) {
sum_1 = sum_1 + 1;
} else if (itemword.equals("task1-input2.txt")) {
sum_2 = sum_2 + 1;
}
}
if (!flag) {
if (sum_1 != 0 && sum_2 != 0) {
sum = Math.min(sum_1, sum_2);
if (treeMap.containsKey(sum)) {
String vlw = treeMap.get(sum) + "," + key.toString();
treeMap.put(sum, vlw);
} else {
treeMap.put(sum, key.toString());
}
}
}
}
protected void cleanup(Context context) throws IOException, InterruptedException {
int i = 0;
for (Integer key : treeMap.keySet()) {
if (i > 20)
break;
else {
ArrayList<String> OutputWords = new ArrayList<>();
String[] temp = treeMap.get(key).split(",");
for (String itrr : temp)
OutputWords.add(itrr);
for (String itrrr : OutputWords){
if(!itrrr.equals(""))
context.write(new IntWritable(key), new Text(itrrr));
}
i = i + treeMap.get(key).split(",").length;
}
}
}
}
public static void main(String [] args) throws Exception{
Job job = Job.getInstance(new Configuration());
job.setJarByClass(TopkCommonWords.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.addInputPath(job, new Path(args[1]));
FileInputFormat.addInputPath(job, new Path(args[2]));
FileOutputFormat.setOutputPath(job, new Path(args[3]));
boolean i = job.waitForCompletion(true);
System.exit(i ? 0 : 1);
}
}