Month: June 2019

Hadoop Character Count example

Posted on

Referenced library:

Path: D:\hadoop-2.7.3\share\hadoop

Path: D:\hadoop-2.7.3\share\hadoop\mapreduce

hadoop-common-2.7.3.jar

log4j-1.2.17.jar

hadoop-mapreduce-client-core-2.7.3.jar

package com.abhay.demo;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


public class CharCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

	private Text word = new Text();
	
	int iCount=0;
	
	protected void map(LongWritable key, Text value, Context context)
			 throws IOException, InterruptedException 
	{
		
		
		String data=value.toString();
		
		StringTokenizer tokenizer=new StringTokenizer(data);
		
		while(tokenizer.hasMoreTokens()) 
		{
			
			word.set(tokenizer.nextToken());
			
			iCount= iCount + word.getLength();
			
			
		}
		
		word.set("Total");
		context.write(word,new IntWritable(iCount));
	}

}

 

package com.abhay.demo;

import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class CharCountReducer extends Reducer<Text, IntWritable, Text, IntWritable>
{

int cnt=0;

protected void reduce(Text key,Iterable<IntWritable> values,Reducer<Text,IntWritable,Text,IntWritable>.Context context)throws IOException,InterruptedException
{


for(IntWritable i:values)
{

cnt = cnt + i.get();


}
context.write(key,new IntWritable(cnt));
}
}

 

package com.abhay.demo;

import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class CharCountDriver 
{


public static void main(String[] args)throws ClassNotFoundException,InterruptedException
{

if(args.length < 2)
{
System.out.println("Input path is missing");
return;
}
try {
Job job=Job.getInstance();
job.setJobName("CharCount");

FileInputFormat.addInputPath(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));

job.setJarByClass(CharCountDriver.class);
job.setMapperClass(CharCountMapper.class);
job.setReducerClass(CharCountReducer.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

int result = job.waitForCompletion(true) ? 0: 1;
System.out.println(job.isSuccessful());

System.exit(result);

} catch (IOException e) 
{
// TODO Auto-generated catch block
e.printStackTrace();
}

}

}