package com.**.pcdnas.mr.job;
import com.alibaba.fastjson.JSON;
import com.**.pcdnas.mr.model.LogInfo;
import com.**.pcdnas.mr.util.MrCommonUtil;
import com.**.pcdnas.mr.util.StringUtil;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import java.io.IOException;
/**
* Created by
* 2018/11/8.
*/
public class OpenPeerMR {
/**
* 4个泛型中,前两个是指定mapper输入数据的类型,KEYIN是输入的key的类型,VALUEIN是输入的value的类型
*/
public static class OpenPeerMapper extends Mapper<Object, Text, Text, Text> {
/**
* 输出key
*/
private Text outKey = new Text();
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String peerLog = value.toString().trim();
LogInfo logInfo = JSON.parseObject(peerLog, LogInfo.class);
outKey.set(MrCommonUtil.getOpenPeerMapKey(logInfo));
context.write(outKey, value);
}
}
/**
* 经过mapper处理后的数据会被reducer拉取过来,所以reducer的KEYIN、VALUEIN和mapper的KEYOUT、VALUEOUT一致
*/
public static class OpenPeerReducer extends Reducer<Text, Text, Text, Text> {
private MultipleOutputs outputs;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
outputs = new MultipleOutputs(context);
}
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
String keyVal = key.toString().trim();
String[] arrs = keyVal.split(",");
String companyId = "ip";
String token = "token";
String date = "yyyyMMdd-HH";
if(arrs.length == 3){
companyId = arrs[0];
token = arrs[1];
date = arrs[2];
}
for (Text value : values) {
/**
* 指定写出不同文件的数据
*/
outputs.write("MOSText", NullWritable.get() , value, companyId + "/" + token + "/" + (StringUtil.getUUIDFromToken(token) + "_" + date));
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
outputs.close();
super.cleanup(context);
}
}
}
package com.**.pcdnas.mr.job;
import com.**.pcdnas.mr.text.MyTextOutputFormat;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
/**
* Created by jiangzhou
* 2018/11/9.
*/
public class OpenPeerMRTest {
MapDriver<Object, Text, Text, Text> mapDriver;
ReduceDriver<Text, Text, Text, Text> reduceDriver;
MapReduceDriver<Object, Text, Text, Text, Text, Text> mapReduceDriver;
@Before
public void setUp() {
//测试mapreduce
OpenPeerMR.OpenPeerMapper mapper = new OpenPeerMR.OpenPeerMapper();
OpenPeerMR.OpenPeerReducer reducer = new OpenPeerMR.OpenPeerReducer();
mapDriver = MapDriver.newMapDriver(mapper);
reduceDriver = ReduceDriver.newReduceDriver(reducer);
mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer);
}
@Test
public void OpenPeerMapper() throws IOException {
LongWritable key = new LongWritable(0);
String content = "{\"peerType\":0,\"countryCode\":1,\"provinceCode\":16,\"ispCode\":1,\"ip\":\"61.171.51.200\",\"ipcountry\":\"中国\",\"ipprovince\":\"上海\",\"ipisp\":\"电信\",\"time\":1541523309467,\"date\":\"181107\",\"hour\":0,\"minute\":5509,\"submitTime\":1541523289720,\"token\":\"10100101000100000004016db042cd0e204d028a3ef961ab216d92\",\"deviceId\":\"2D15397E0E2CD4067FF185B5E2459706\",\"osName\":\"p\",\"osVersion\":\"\",\"deviceType\":\"\",\"cpuAbi\":\"\",\"networkType\":\"\",\"appversion\":\"\",\"appName\":\"\",\"peerVersion\":\"2.5.1.0\",\"pcdnType\":\"6\",\"natType\":4,\"queryPeerNum\":0,\"uploadAvgSpeed\":31026,\"uploadMaxSpeed\":0,\"uploadLimitSpeed\":2097152,\"uploadtotalBytes\":597747830,\"uploadtime\":0,\"uploadMaxConnect\":0,\"connPeerNum\":0,\"validconnPeerNum\":0,\"domainName\":\"114.80.186.137\",\"logcode\":\"\",\"repTtfb\":0,\"repTime\":0,\"t4\":0,\"t5\":53977000,\"t6\":0,\"t7\":53977000,\"t8\":299,\"cacheUsedSize\":0,\"cacheRemainSize\":5632,\"cacheSetSize\":5632,\"systemValidSpace\":188468,\"isthirdStorage\":0,\"isdiskcanwrite\":0,\"cpuNum\":0,\"syscpuRate\":0,\"plugcpuRate\":0,\"sysMem\":0,\"restMem\":0,\"plugMem\":0,\"ioread\":0.0,\"iowrite\":0.0,\"await\":0.0,\"svctm\":0.0,\"util\":0.0,\"type\":\"peer\"}";
Text value = new Text(content);
new MapDriver<Object, Text, Text, Text>()
.withMapper(new OpenPeerMR.OpenPeerMapper())
.withInput(key,value)
.withOutput(new Text("401,10100101000100000004016db042cd0e204d028a3ef961ab216d92,2018-11-07_00"),new Text(content))
.runTest();
}
/*@Test
public void OpenPeerReducer() throws IOException {
String content = "{\"peerType\":0,\"countryCode\":1,\"provinceCode\":16,\"ispCode\":1,\"ip\":\"61.171.51.200\",\"ipcountry\":\"中国\",\"ipprovince\":\"上海\",\"ipisp\":\"电信\",\"time\":1541523309467,\"date\":\"181107\",\"hour\":0,\"minute\":5509,\"submitTime\":1541523289720,\"token\":\"10100101000100000004016db042cd0e204d028a3ef961ab216d92\",\"deviceId\":\"2D15397E0E2CD4067FF185B5E2459706\",\"osName\":\"p\",\"osVersion\":\"\",\"deviceType\":\"\",\"cpuAbi\":\"\",\"networkType\":\"\",\"appversion\":\"\",\"appName\":\"\",\"peerVersion\":\"2.5.1.0\",\"pcdnType\":\"6\",\"natType\":4,\"queryPeerNum\":0,\"uploadAvgSpeed\":31026,\"uploadMaxSpeed\":0,\"uploadLimitSpeed\":2097152,\"uploadtotalBytes\":597747830,\"uploadtime\":0,\"uploadMaxConnect\":0,\"connPeerNum\":0,\"validconnPeerNum\":0,\"domainName\":\"114.80.186.137\",\"logcode\":\"\",\"repTtfb\":0,\"repTime\":0,\"t4\":0,\"t5\":53977000,\"t6\":0,\"t7\":53977000,\"t8\":299,\"cacheUsedSize\":0,\"cacheRemainSize\":5632,\"cacheSetSize\":5632,\"systemValidSpace\":188468,\"isthirdStorage\":0,\"isdiskcanwrite\":0,\"cpuNum\":0,\"syscpuRate\":0,\"plugcpuRate\":0,\"sysMem\":0,\"restMem\":0,\"plugMem\":0,\"ioread\":0.0,\"iowrite\":0.0,\"await\":0.0,\"svctm\":0.0,\"util\":0.0,\"type\":\"peer\"}";
new ReduceDriver<Text, Text, Text, Text>()
.withReducer(new OpenPeerMR.OpenPeerReducer())
.withInput(new Text("401,10100101000100000004016db042cd0e204d028a3ef961ab216d92,2018-11-07_00"), Arrays.asList(new Text(content)))
.withMultiOutput("MOSText",new Text(content),new Text(content))
.runTest();
}*/
@Test
public void testMR() throws Exception{
try{
String content = "{\"peerType\":0,\"countryCode\":1,\"provinceCode\":16,\"ispCode\":1,\"ip\":\"61.171.51.200\",\"ipcountry\":\"中国\",\"ipprovince\":\"上海\",\"ipisp\":\"电信\",\"time\":1541523309467,\"date\":\"181107\",\"hour\":0,\"minute\":5509,\"submitTime\":1541523289720,\"token\":\"10100101000100000004016db042cd0e204d028a3ef961ab216d92\",\"deviceId\":\"2D15397E0E2CD4067FF185B5E2459706\",\"osName\":\"p\",\"osVersion\":\"\",\"deviceType\":\"\",\"cpuAbi\":\"\",\"networkType\":\"\",\"appversion\":\"\",\"appName\":\"\",\"peerVersion\":\"2.5.1.0\",\"pcdnType\":\"6\",\"natType\":4,\"queryPeerNum\":0,\"uploadAvgSpeed\":31026,\"uploadMaxSpeed\":0,\"uploadLimitSpeed\":2097152,\"uploadtotalBytes\":597747830,\"uploadtime\":0,\"uploadMaxConnect\":0,\"connPeerNum\":0,\"validconnPeerNum\":0,\"domainName\":\"114.80.186.137\",\"logcode\":\"\",\"repTtfb\":0,\"repTime\":0,\"t4\":0,\"t5\":53977000,\"t6\":0,\"t7\":53977000,\"t8\":299,\"cacheUsedSize\":0,\"cacheRemainSize\":5632,\"cacheSetSize\":5632,\"systemValidSpace\":188468,\"isthirdStorage\":0,\"isdiskcanwrite\":0,\"cpuNum\":0,\"syscpuRate\":0,\"plugcpuRate\":0,\"sysMem\":0,\"restMem\":0,\"plugMem\":0,\"ioread\":0.0,\"iowrite\":0.0,\"await\":0.0,\"svctm\":0.0,\"util\":0.0,\"type\":\"peer\"}";
LongWritable key = new LongWritable(0);
mapReduceDriver.withInput(key, new Text(content));
mapReduceDriver.withMultiOutput("MOSText",new Text(content),new Text(content));
mapReduceDriver.runTest();
}catch(Exception ex){
}
}
}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>pcdnas-pom</artifactId>
<groupId>com.**.pcdnas</groupId>
<version>0.0.1</version>
<relativePath>../pcdnas-pom/pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>com.**.**</groupId>
<artifactId>pcdnas-mr</artifactId>
<properties>
<hadoop.version>2.6.1</hadoop.version>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
</dependency>
<!--单元测试 start -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.mrunit</groupId>
<artifactId>mrunit</artifactId>
<version>1.1.0</version>
<classifier>hadoop2</classifier>
<scope>test</scope>
</dependency>
<!--单元测试 end -->
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4.1</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<!--<mainClass>com.**.**.mr.job.Driver</mainClass>-->
<mainClass>com.**.**.mr.job.OpenPeerDriver</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
参考:https://blog.youkuaiyun.com/xiaocaidexuexibiji/article/details/17471627 仍不能解决
注释部分 仍然没有解决 好心人支援下!!!
题外话
在搜索的过程中发现几个不错的maperduce文章:
使用hadoop multipleOutputs对输出结果进行不一样的组织:
https://www.cnblogs.com/yuhan-TB/p/3705665.html
MR-2.输出格式(OutputFormat)Multiple outputs多目录输出:
https://blog.youkuaiyun.com/shenfuli/article/details/50589339
MapReduce处理输出多文件格式(MultipleOutputs)
https://blog.youkuaiyun.com/u010366796/article/details/44753071
MapReduce编程实战之“调试”和"调优"
https://blog.youkuaiyun.com/puma_dong/article/details/24120045