自定义key
package test;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class ConsumeWritable implements WritableComparable<ConsumeWritable>{
private String name;
private float money;
public ConsumeWritable() {}
public ConsumeWritable(String name, float money) {
super();
this.name = name;
this.money = money;
}
//从源码中的获得
public void set(String name,float money){
this.name=name;
this.money=money;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public float getMoney() {
return money;
}
public void setMoney(float money) {
this.money = money;
}
//序列化
public void write(DataOutput out) throws IOException {
out.writeUTF(name);
out.writeFloat(money);
}
//反序列化
public void readFields(DataInput in) throws IOException {
name=in.readUTF();
money=in.readFloat();
}
public int compareTo(ConsumeWritable o) {
//第一次比较
int compareTo = this.getName().compareTo(o.getName());
if (compareTo !=0) {
return compareTo;
}
//第二次比较 注意:普通的数据类型是没有compaerTo方法 所以要转换为他的包装类
return Float.valueOf(this.getMoney()).compareTo(Float.valueOf(o.getMoney()));
}
//比较对象两个对象,需要重写equals和hashcode()方法
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + Float.floatToIntBits(money);
result = prime * result + ((name == null) ? 0 : name.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
ConsumeWritable other = (ConsumeWritable) obj;
if (Float.floatToIntBits(money) != Float.floatToIntBits(other.money))
return false;
if (name == null) {
if (other.name != null)
return false;
} else if (!name.equals(other.name))
return false;
return true;
}
@Override
public String toString() {
return name + "," + money;
}
}
mapreduce程序
package test;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* 主要思想:根据shuffle阶段排序是根据key来排序的
* @author Administrator
*
*/
public class SecondSortMapReduce extends Configured implements Tool{
//map映射
public static class SecondSortMapper extends Mapper<LongWritable, Text, ConsumeWritable, FloatWritable>{
private ConsumeWritable mapOutPutKey = new ConsumeWritable();
private FloatWritable mapOutPutValue= new FloatWritable();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//把读取出来的内容 装换为String 类型
String line = value.toString();
//通过制表符分割
String[] split = line.split("\t");
mapOutPutKey.set(split[0], Float.valueOf(split[1]));
mapOutPutValue.set(Float.parseFloat(split[1]));
context.write(mapOutPutKey, mapOutPutValue);
}
}
public static class SecondSortReducer extends Reducer<ConsumeWritable, FloatWritable, Text, FloatWritable>{
private Text OutPutKey =new Text();
private FloatWritable OutPutValue = new FloatWritable();
@Override
protected void reduce(ConsumeWritable key,
Iterable<FloatWritable> values,Context context)
throws IOException, InterruptedException {
OutPutKey.set(key.getName());
for (FloatWritable floatWritable : values) {
OutPutValue.set(floatWritable.get());
context.write(OutPutKey, OutPutValue);
}
}
}
public int run(String[] args) throws Exception {
// 1.创建Configuration对象,获取配置文件
Configuration conf = new Configuration();
// 2.构建MapReduce Job对象
Job job = Job.getInstance(conf, this.getClass().getSimpleName());
job.setJarByClass(getClass());
// 3.输入目录/文件(input) -》 map -》 reduce -》输出路径 (output)
// 3.1 设置输入文件所在目录
Path inPath = new Path(args[0]);
FileInputFormat.setInputPaths(job, inPath);
// 3.2 设置Map输出信息
job.setMapperClass(SecondSortMapper.class);
job.setMapOutputKeyClass(ConsumeWritable.class);
job.setMapOutputValueClass(FloatWritable.class);
//自定义分区
//job.setPartitionerClass(NamePartitioner.class);
//自定义分组
//job.setGroupingComparatorClass(NameGroup.class);
// 3.3设置reduce的输出信息
job.setReducerClass(SecondSortReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
// 3.4 设置输出路径
Path outPath = new Path(args[1]);
FileSystem fs = outPath.getFileSystem(conf);
if (fs.exists(outPath)) {
fs.delete(outPath, true);
}
FileOutputFormat.setOutputPath(job, outPath);
// 提交job
/**
* 可以详细显示任务的进度信息 job.submit()这种方式是做不到的
*/
boolean isSuccessed = job.waitForCompletion(true);
// job.submit(); 不推荐
return isSuccessed ? 0 : 1;
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
args = new String[] {
"hdfs://hive01:8020/input/ceshi.txt",
"hdfs://hive01:8020/outputtest"
};
int status = ToolRunner.run(conf, new SecondSortMapReduce(), args);
System.exit(status);
}
}