数据:
前面是用户,后面是用户有哪些好友
A:B,D,E,H,I,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:B,C,D,E,O,M
G:Q,W,A,C,E,O
H:A,C,E,D,O
I:A,O
J:B,P
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
分析:
以第一行数据为例先求出那些人是A的好友
比如B和D都有A的好友,所以A是B,D的好友
所以分两步进行:
第一步:找出那些人A在哪些人的好友列表里
第二步:将A分组后的人进行排序在相邻的人相互组合,得到他们的共同好友A
package friendTest;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class FriendsDriver {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(FriendsDriver.class);
job.setMapperClass(FriendsMapper.class);
job.setReducerClass(FriendsReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean result = job.waitForCompletion(true);
System.out.println(result);
}
}
class FriendsMapper extends Mapper<LongWritable,Text,Text,Text>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String[] split = value.toString().split(":");
String person =split[0];
String[] friends =split[1].split(",");
for (String friend : friends) {
context.write(new Text(friend),new Text(person));
}
}
}
//B A
//C A
//D A
//B C
class FriendsReducer extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text key, Iterable<Text> values,Context context)
throws IOException, InterruptedException {
StringBuffer buffer=new StringBuffer();
for (Text friend1 : values) {
buffer.append(friend1).append(",");
}
context.write(new Text(key),new Text(buffer.toString()));
}
}
第一次处理结果是A—>D,O,K,I,H,G,B,C,
可以看到value里的人都有A的好友,那么他们的公共好友就是A,所以要将每一行进行处理
package friendTest;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class FriendsDriver2 {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(FriendsDriver2.class);
job.setMapperClass(FriendsMapper2.class);
job.setReducerClass(FriendsReducer2.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean result = job.waitForCompletion(true);
System.out.println(result);
}
}
class FriendsMapper2 extends Mapper<LongWritable,Text,Text,Text>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String[] split = value.toString().split("\t");
String person =split[0];
String[] friends =split[1].split(",");
// 将Value值进行排序,避免重复的结果
Arrays.sort(friends);
for (int i = 0; i <friends.length; i++) {
for (int j = i+1; j < friends.length; j++) {
context.write(new Text(friends[i]+"-"+friends[j]+"-->"),new Text(person));
}
}
}
}
class FriendsReducer2 extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text key, Iterable<Text> values,Context context)
throws IOException, InterruptedException {
StringBuffer buffer=new StringBuffer();
for (Text friend1 : values) {
buffer.append(friend1).append(",");
}
context.write(key,new Text(buffer.toString()));
//C:\Users\hp\Desktop\part-r-00000
}
}
这样就可以得出A-B——>E的形式
pom.xml文件
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.qianfeng</groupId>
<artifactId>Friends</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.8</version>
<scope>system</scope>
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.8</version>
<scope>system</scope>
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
</dependency>
</dependencies>
<build>
<!-- 配置了很多插件 -->
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>