2020大数据数据处理综合练习
1.数据的预处理阶段
Mapper:
public class Mapper02 extends Mapper<LongWritable, Text,LongWritable,Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//每行按:切分
String[] line = value.toString().split(":");
//
if (line.length>=10){
//处理 视频类别
//调用Uitl方法切分重组 视频类别
String Category=Util.replacedata(line[3]);
//处理相关视频id
String RelatedIds="";
//相关视频数量id=1 不需要切分重组
if (line.length==10){
RelatedIds=line[9];
//相关视频数量id>1
}else if (line.length>10){
String newRelatedId="";
//从第一个相关视频id遍历重组
for (int i = 9; i < line.length; i++) {
newRelatedId+=line[i]+",";
}
RelatedIds=newRelatedId.substring(0,newRelatedId.lastIndexOf(","));
}
//处理整段数据
//第一次处理:将原视频类别替换
String data=value.toString().replace(line[3],Category);