package com.hadoop.study;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
*
* @author:戴桥冰 2015-10-6 上午10:33:54 content:hadoop的单词统计案例
*/
public class WordCount {
/**
* map代码块
*/
static class MapperCount extends
Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable intValue = new IntWritable(1);
private Text text = new Text();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// 获取每行数据
String lineValue = value.toString();
// 对每行进行分割
//StringTokenizer strTokenizer = new StringTokenizer(lineValue);
String[] line=lineValue.split("");
// 遍历
/*while (strTokenizer.hasMoreTokens()) {
String wordCount = strTokenizer.nextToken();
text.set(wordCount);
System.out.println(wordCount);
// 上下文输出map处理的key,value
context.write(text, intValue);
}*/
for(int i=0;i<line.length;i++){
text.set(line[i]);
// 上下文输出map处理的key,value
context.write(text, intValue);
}
}
}
/**
* reduce代码块
*/
static class ReduceCount extends
Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result=new IntWritable();
protected void reduce(Text keys, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum=0;
//循环遍历
for(IntWritable val:values){
sum+=val.get();
}
result.set(sum);
context.write(keys, result);
};
}
public static void main(String[] args) throws Exception {
//获取配置信息
Configuration conf=new Configuration();
//创建Job,设置配置和job名称和
Job job=new Job(conf,"wordcount");
//1设置job运行的类
job.setJarByClass(WordCount.class);
//2设置map和reduce运行的类
job.setMapperClass(MapperCount.class);
job.setReducerClass(ReduceCount.class);
//3设置输入文件的目录和输出文件的目录
FileInputFormat.addInputPath(job,new Path("/opt/data/test.txt"));
FileOutputFormat.setOutputPath(job, new Path("/opt/data/hadoop/test/out"));
//4设置输出结果的key,value的类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//5提交job等待运行结果,并在客户端运行输出信息
boolean isSuccess=job.waitForCompletion(true);
//结束程序
System.exit(isSuccess?0:1);
}
}
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
*
* @author:戴桥冰 2015-10-6 上午10:33:54 content:hadoop的单词统计案例
*/
public class WordCount {
/**
* map代码块
*/
static class MapperCount extends
Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable intValue = new IntWritable(1);
private Text text = new Text();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// 获取每行数据
String lineValue = value.toString();
// 对每行进行分割
//StringTokenizer strTokenizer = new StringTokenizer(lineValue);
String[] line=lineValue.split("");
// 遍历
/*while (strTokenizer.hasMoreTokens()) {
String wordCount = strTokenizer.nextToken();
text.set(wordCount);
System.out.println(wordCount);
// 上下文输出map处理的key,value
context.write(text, intValue);
}*/
for(int i=0;i<line.length;i++){
text.set(line[i]);
// 上下文输出map处理的key,value
context.write(text, intValue);
}
}
}
/**
* reduce代码块
*/
static class ReduceCount extends
Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result=new IntWritable();
protected void reduce(Text keys, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum=0;
//循环遍历
for(IntWritable val:values){
sum+=val.get();
}
result.set(sum);
context.write(keys, result);
};
}
public static void main(String[] args) throws Exception {
//获取配置信息
Configuration conf=new Configuration();
//创建Job,设置配置和job名称和
Job job=new Job(conf,"wordcount");
//1设置job运行的类
job.setJarByClass(WordCount.class);
//2设置map和reduce运行的类
job.setMapperClass(MapperCount.class);
job.setReducerClass(ReduceCount.class);
//3设置输入文件的目录和输出文件的目录
FileInputFormat.addInputPath(job,new Path("/opt/data/test.txt"));
FileOutputFormat.setOutputPath(job, new Path("/opt/data/hadoop/test/out"));
//4设置输出结果的key,value的类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//5提交job等待运行结果,并在客户端运行输出信息
boolean isSuccess=job.waitForCompletion(true);
//结束程序
System.exit(isSuccess?0:1);
}
}