Month: June 2019
Hadoop Character Count example
Referenced library:
Path: D:\hadoop-2.7.3\share\hadoop
Path: D:\hadoop-2.7.3\share\hadoop\mapreduce
hadoop-common-2.7.3.jar
log4j-1.2.17.jar
hadoop-mapreduce-client-core-2.7.3.jar
package com.abhay.demo;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class CharCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private Text word = new Text();
int iCount=0;
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
String data=value.toString();
StringTokenizer tokenizer=new StringTokenizer(data);
while(tokenizer.hasMoreTokens())
{
word.set(tokenizer.nextToken());
iCount= iCount + word.getLength();
}
word.set("Total");
context.write(word,new IntWritable(iCount));
}
}
package com.abhay.demo;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class CharCountReducer extends Reducer<Text, IntWritable, Text, IntWritable>
{
int cnt=0;
protected void reduce(Text key,Iterable<IntWritable> values,Reducer<Text,IntWritable,Text,IntWritable>.Context context)throws IOException,InterruptedException
{
for(IntWritable i:values)
{
cnt = cnt + i.get();
}
context.write(key,new IntWritable(cnt));
}
}
package com.abhay.demo;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class CharCountDriver
{
public static void main(String[] args)throws ClassNotFoundException,InterruptedException
{
if(args.length < 2)
{
System.out.println("Input path is missing");
return;
}
try {
Job job=Job.getInstance();
job.setJobName("CharCount");
FileInputFormat.addInputPath(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.setJarByClass(CharCountDriver.class);
job.setMapperClass(CharCountMapper.class);
job.setReducerClass(CharCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
int result = job.waitForCompletion(true) ? 0: 1;
System.out.println(job.isSuccessful());
System.exit(result);
} catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}