package org.apache.hadoop.examples;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context ) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, ) throws IOException, InterruptedException { for (IntWritable val : values) { context.write(key, result); public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); 12/06/01 10:23:31 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
12/06/01 10:23:33 WARN mapred.JobClient: No job jar file set.
User classes may not be found. See JobConf(Class) or JobConf#setJar(String). 12/06/01 10:23:36 INFO input.FileInputFormat: Total input paths to process : 2
12/06/01 10:23:37 INFO mapred.JobClient: Running job: job_local_0001
12/06/01 10:23:37 INFO input.FileInputFormat: Total input paths to process : 2
12/06/01 10:23:37 INFO mapred.MapTask: io.sort.mb = 100
12/06/01 10:23:40 INFO mapred.MapTask: data buffer = 79691776/99614720
12/06/01 10:23:40 INFO mapred.MapTask: record buffer = 262144/327680
12/06/01 10:23:44 INFO mapred.JobClient:
map 0% reduce 0% 12/06/01 10:23:52 INFO mapred.MapTask: Starting flush of map output
12/06/01 10:23:59 INFO mapred.LocalJobRunner:
12/06/01 10:23:59 INFO mapred.MapTask: Finished spill 0
12/06/01 10:24:00 INFO mapred.JobClient:
map 100% reduce 0% 12/06/01 10:24:00 INFO mapred.TaskRunner: Task:attempt_local_0001_m_000000_0 is done. And is in the process of commiting
12/06/01 10:24:03 INFO mapred.LocalJobRunner:
12/06/01 10:24:03 INFO mapred.TaskRunner: Task 'attempt_local_0001_m_000000_0' done.
12/06/01 10:24:04 INFO mapred.MapTask: io.sort.mb = 100
12/06/01 10:24:08 INFO mapred.MapTask: data buffer = 79691776/99614720
12/06/01 10:24:08 INFO mapred.MapTask: record buffer = 262144/327680
12/06/01 10:24:10 INFO mapred.MapTask: Starting flush of map output
12/06/01 10:24:10 INFO mapred.MapTask: Finished spill 0
12/06/01 10:24:11 INFO mapred.TaskRunner: Task:attempt_local_0001_m_000001_0 is done. And is in the process of commiting
12/06/01 10:24:12 INFO mapred.LocalJobRunner:
12/06/01 10:24:12 INFO mapred.TaskRunner: Task 'attempt_local_0001_m_000001_0' done.
12/06/01 10:24:14 INFO mapred.LocalJobRunner:
12/06/01 10:24:16 INFO mapred.Merger: Merging 2 sorted segments
12/06/01 10:24:17 INFO mapred.Merger: Down to the last merge-pass, with 2 segments left of total size: 77 bytes
12/06/01 10:24:17 INFO mapred.LocalJobRunner:
12/06/01 10:24:20 INFO mapred.TaskRunner: Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting
12/06/01 10:24:20 INFO mapred.LocalJobRunner:
12/06/01 10:24:20 INFO mapred.TaskRunner: Task attempt_local_0001_r_000000_0 is allowed to commit now
12/06/01 10:24:21 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0001_r_000000_0' to hdfs://localhost:9000/user/xsj/output
12/06/01 10:24:21 INFO mapred.LocalJobRunner: reduce > reduce
12/06/01 10:24:21 INFO mapred.TaskRunner: Task 'attempt_local_0001_r_000000_0' done.
12/06/01 10:24:22 INFO mapred.JobClient:
map 100% reduce 100% 12/06/01 10:24:22 INFO mapred.JobClient: Job complete: job_local_0001
12/06/01 10:24:22 INFO mapred.JobClient: Counters: 14
12/06/01 10:24:22 INFO mapred.JobClient:
FileSystemCounters 12/06/01 10:24:22 INFO mapred.JobClient:
FILE_BYTES_READ=50488 12/06/01 10:24:22 INFO mapred.JobClient:
HDFS_BYTES_READ=120 12/06/01 10:24:22 INFO mapred.JobClient:
FILE_BYTES_WRITTEN=102748 12/06/01 10:24:22 INFO mapred.JobClient:
HDFS_BYTES_WRITTEN=41 12/06/01 10:24:22 INFO mapred.JobClient:
Map-Reduce Framework 12/06/01 10:24:22 INFO mapred.JobClient:
Reduce input groups=5 12/06/01 10:24:22 INFO mapred.JobClient:
Combine output records=6 12/06/01 10:24:22 INFO mapred.JobClient:
Map input records=4 12/06/01 10:24:22 INFO mapred.JobClient:
Reduce shuffle bytes=0 12/06/01 10:24:22 INFO mapred.JobClient:
Reduce output records=5 12/06/01 10:24:22 INFO mapred.JobClient:
Spilled Records=12 12/06/01 10:24:22 INFO mapred.JobClient:
Map output bytes=81 12/06/01 10:24:22 INFO mapred.JobClient:
Combine input records=8 12/06/01 10:24:22 INFO mapred.JobClient:
Map output records=8 12/06/01 10:24:22 INFO mapred.JobClient:
Reduce input records=6