案例1:统计手机号的 总上行流量 总下行流量 总流量
--------------------------------------------------------
手机号 上行流量 下行流量
13726230501 200 1100
13396230502 300 1200
13396230502 320 500
13396230502 100 3200
13897230503 400 1300
13897230503 100 300
13597230534 500 1400
13597230534 300 1200
统计结果如下:
------------------------------------------------
手机号 上行流量 总下行流量 总流量
13726230501 200 1100 1300
13396230502 720 4900 5620
.............
/**
* 定义一个实体类FlowLog
*/
public class FlowLog implements WritableComparable<FlowLog>{
private IntWritable upperflow;
private IntWritable lowerflow;
private IntWritable totalflow;
public FlowLog() {
upperflow = new IntWritable();
lowerflow = new IntWritable();
totalflow = new IntWritable();
}
public FlowLog(IntWritable upperflow, IntWritable lowerflow, IntWritable totalflow) {
super();
this.upperflow = upperflow;
this.lowerflow = lowerflow;
this.totalflow = totalflow;
}
public IntWritable getUpperflow() {
return upperflow;
}
public void setUpperflow(IntWritable upperflow) {
this.upperflow = upperflow;
}
public IntWritable getLowerflow() {
return lowerflow;
}
public void setLowerflow(IntWritable lowerflow) {
this.lowerflow = lowerflow;
}
public IntWritable getTotalflow() {
return totalflow;
}
public void setTotalflow(IntWritable totalflow) {
this.totalflow = totalflow;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((lowerflow == null) ? 0 : lowerflow.hashCode());
result = prime * result + ((totalflow == null) ? 0 : totalflow.hashCode());
result = prime * result + ((upperflow == null) ? 0 : upperflow.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
FlowLog other = (FlowLog) obj;
if (lowerflow == null) {
if (other.lowerflow != null)
return false;
} else if (!lowerflow.equals(other.lowerflow))
return false;
if (totalflow == null) {
if (other.totalflow != null)
return false;
} else if (!totalflow.equals(other.totalflow))
return false;
if (upperflow == null) {
if (other.upperflow != null)
return false;
} else if (!upperflow.equals(other.upperflow))
return false;
return true;
}
/**重写序列化方法,将FlowLog对象序列号,实际上就是序列化其属性*/
@Override
public void write(DataOutput out) throws IOException {
upperflow.write(out);
lowerflow.write(out);
totalflow.write(out);
}
/**重写反序列化,将字节流反序列化成相关属性值*/
@Override
public void readFields(DataInput in) throws IOException {
upperflow.readFields(in);
lowerflow.readFields(in);
totalflow.readFields(in);
}
@Override
public int compareTo(FlowLog o) {
return 0;
}
//必须重写
@Override
public String toString() {
return upperflow + "\t" + lowerflow + "\t" + totalflow;
}
}
public class FlowLogMapper extends Mapper<LongWritable, Text, Text, FlowLog> {
private FlowLog log = new FlowLog();
private IntWritable upperflow = new IntWritable();
private IntWritable lowerflow = new IntWritable();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer st = new StringTokenizer(value.toString());
if(st.hasMoreTokens()) {
String phone = st.nextToken();
upperflow.set(Integer.parseInt(st.nextToken()));
lowerflow.set(Integer.parseInt(st.nextToken()));
log.setUpperflow(upperflow);
log.setLowerflow(lowerflow);
context.write(new Text(phone), log);
}
}
}
public class FlowLogReducer extends Reducer<Text, FlowLog, Text, FlowLog>{
@Override
protected void reduce(Text key, Iterable<FlowLog> vs, Context context)
throws IOException, InterruptedException {
int upper = 0;
int lower = 0;
int total = 0;
for(FlowLog log:vs) {
upper += log.getUpperflow().get();
lower += log.getLowerflow().get();
}
total = upper + lower;
FlowLog log = new FlowLog(new IntWritable(upper), new IntWritable(lower), new IntWritable(total));
context.write(key, log);
}
} //定义驱动类 public class FlowDriver {
public static void main(String[] args) throws Exception {
Job job = Job.getInstance();
job.setJobName(" flow");
job.setJarByClass(FlowDriver.class);
job.setMapperClass(FlowLogMapper.class);
job.setReducerClass(FlowLogReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowLog.class);
FileInputFormat.addInputPath(job, new Path("file:///D:/data/上行下行/*"));
FileOutputFormat.setOutputPath(job, new Path("file:///D:/flow2"));
System.exit(job.waitForCompletion(true)?0:1);
}
}
案例2:二次排序: 先按照第一个字段排序,
如果第一个字段相同,再按照第二个字段排序
账号(Account) 金额(price)
hadoop@apache 200
hive@apache 550
yarn@apache 580
hive@apache 159
hadoop@apache 300
hadoop@apache 300
hive@apache 258
hadoop@apache 300
yarn@apache 100
hadoop@apache 150
yarn@apache 560
yarn@apache 260
统计结果为:
hadoop@apache 150
hadoop@apache 200
hadoop@apache 300
hadoop@apache 300
hive@apache 159
hive@apache 258
hive@apache 550
yarn@apache 100
yarn@apache 260
yarn@apache 560
yarn@apache 580
hadoop@apache 150
hadoop@apache 200
hadoop@apache 300
hadoop@apache 300
思路:
定义一个类型AccountBean:封装 账号(Account)和金额(money)
<k1,v1> <偏移量,hadoop@apache 150>
<k2,v2> --><AccountBean,NullWritable> <k3,v3> --><AccountBean,NullWritable>
context.write(AccountBean,NullWritable.get())
public class AccountBean implements WritableComparable<AccountBean>{
private Text account;
private IntWritable money;
public AccountBean() {
account = new Text();
money = new IntWritable();
}
public AccountBean(Text account, IntWritable money) {
this.account = account;
this.money = money;
}
public AccountBean(String account, int money) {
set(account,money);
}
public void set(String account,int money) {
this.account = new Text(account);
this.money = new IntWritable(money);
}
public Text getAccount() {
return account;
}
public void setAccount(Text account) {
this.account = account;
}
public IntWritable getMoney() {
return money;
}
public void setMoney(IntWritable money) {
this.money = money;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((account == null) ? 0 : account.hashCode());
result = prime * result + ((money == null) ? 0 : money.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
AccountBean other = (AccountBean) obj;
if (account == null) {
if (other.account != null)
return false;
} else if (!account.equals(other.account))
return false;
if (money == null) {
if (other.money != null)
return false;
} else if (!money.equals(other.money))
return false;
return true;
}
@Override
public void write(DataOutput out) throws IOException {
account.write(out);
money.write(out);
}
@Override
public void readFields(DataInput in) throws IOException {
account.readFields(in);
money.readFields(in);
}
@Override
public int compareTo(AccountBean o) {
int temp =this.account.compareTo(o.getAccount());
if(temp==0) {
//降序
return -this.money.compareTo(o.getMoney());
}
return temp;
}
@Override
public String toString() {
return account + "\t" + money;
}
}
public class AccountMapper extends Mapper<LongWritable, Text, AccountBean, NullWritable>{
private AccountBean accountbean = new AccountBean();
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, AccountBean, NullWritable>.Context context)
throws IOException, InterruptedException {
StringTokenizer st = new StringTokenizer(value.toString());
if(st.hasMoreTokens()) {
String str = st.nextToken();
int m = Integer.parseInt(st.nextToken());
accountbean.set(str, m);
}
context.write(accountbean, NullWritable.get());
}
}
public class AccountReducer extends Reducer<AccountBean, NullWritable, AccountBean, NullWritable>{
@Override
protected void reduce(AccountBean key, Iterable<NullWritable> vs,Context context) throws IOException, InterruptedException {
for(NullWritable n:vs) {
context.write(key, NullWritable.get());
}
}
}
public class AccountDriver {
public static void main(String[] args) throws Exception {
Job job = Job.getInstance();
job.setJobName(" account");
job.setJarByClass(AccountDriver.class);
job.setMapperClass(AccountMapper.class);
job.setReducerClass(AccountReducer.class);
job.setOutputKeyClass(AccountBean.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path("file:///D:/data/二次排序/*"));
FileOutputFormat.setOutputPath(job, new Path("file:///D:/secondsort1"));
System.exit(job.waitForCompletion(true)?0:1);
}
}
【自定义比较器】:属于优化方案(视情况而定,是否需要)
/**
* 自定义一个比较器,也属于优化方案:
* @author Michael
*
*/
public class AccountComparator extends WritableComparator{
private static final Text.Comparator TEXT_COMPARATOR = new Text.Comparator();
private static final IntWritable.Comparator INTWRITABLE_COMPARATOR = new IntWritable.Comparator();
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
try {
int firstl1 = WritableUtils.decodeVIntSize(b1[s1])+readVInt(b1,s1);
int firstl2 = WritableUtils.decodeVIntSize(b2[s2])+readVInt(b2,s2);
int temp = TEXT_COMPARATOR.compare(b1, s1, firstl1, b2, s2, firstl2);
if(temp==0) {
return INTWRITABLE_COMPARATOR.compare( b1, s1+firstl1, l1-firstl1, b2, s2+firstl2, l2-firstl2);
}
return temp;
} catch (Exception e) {
}
return 0;
}
}
分组比较器(辅助排序):
public class AccountGroupComparator extends WritableComparator{
public AccountGroupComparator() {
super(AccountBean.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
AccountBean a1 = (AccountBean) a;
AccountBean a2 = (AccountBean) b;
return a1.getAccount().compareTo(a2.getAccount());
}
}