Hadoop的MapReduce求共同好友

最新推荐文章于 2021-11-11 15:12:19 发布

魂落忘川犹在川

最新推荐文章于 2021-11-11 15:12:19 发布

阅读量292

点赞数 1

CC 4.0 BY-SA版权

分类专栏：大数据 Hadoop 文章标签： mapreduce

本文链接：https://blog.youkuaiyun.com/weixin_43345864/article/details/84675656

大数据同时被 2 个专栏收录

71 篇文章

订阅专栏

Hadoop

12 篇文章

订阅专栏

本文介绍了一种基于Hadoop的社交网络好友推荐算法实现，通过MapReduce处理大量用户关系数据，找出共同好友并推荐可能感兴趣的新朋友。

数据：

前面是用户，后面是用户有哪些好友

A:B,D,E,H,I,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:B,C,D,E,O,M
G:Q,W,A,C,E,O
H:A,C,E,D,O
I:A,O
J:B,P
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J

分析：

以第一行数据为例先求出那些人是A的好友
比如B和D都有A的好友，所以A是B，D的好友
所以分两步进行：
第一步：找出那些人A在哪些人的好友列表里
第二步：将A分组后的人进行排序在相邻的人相互组合，得到他们的共同好友A

package friendTest;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class FriendsDriver {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		job.setJarByClass(FriendsDriver.class);
		job.setMapperClass(FriendsMapper.class);
		job.setReducerClass(FriendsReducer.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		boolean result = job.waitForCompletion(true);
		System.out.println(result);	
	}
}
class FriendsMapper extends Mapper<LongWritable,Text,Text,Text>{
    @Override
    protected void map(LongWritable key, Text value,Context context)
    		throws IOException, InterruptedException {
    	     String[] split = value.toString().split(":");
    	     String person =split[0];
    	      String[] friends =split[1].split(",");
    	     for (String friend : friends) {
				context.write(new Text(friend),new Text(person));
			}
    	   }
}
//B  A
//C  A
//D	 A
//B  C
class FriendsReducer extends Reducer<Text, Text, Text, Text>{
	@Override
	protected void reduce(Text key, Iterable<Text> values,Context context)
			throws IOException, InterruptedException {
		StringBuffer buffer=new StringBuffer();
		for (Text friend1 : values) {
			buffer.append(friend1).append(",");
		}
		context.write(new Text(key),new Text(buffer.toString()));		
	}
}

第一次处理结果是A—>D,O,K,I,H,G,B,C,
可以看到value里的人都有A的好友，那么他们的公共好友就是A，所以要将每一行进行处理

package friendTest;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class FriendsDriver2 {
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf);
	job.setJarByClass(FriendsDriver2.class);
	job.setMapperClass(FriendsMapper2.class);
	job.setReducerClass(FriendsReducer2.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Text.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);
	FileInputFormat.setInputPaths(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean result = job.waitForCompletion(true);
	System.out.println(result);
}
}
class FriendsMapper2 extends Mapper<LongWritable,Text,Text,Text>{
    @Override
    protected void map(LongWritable key, Text value,Context context)
    		throws IOException, InterruptedException {
    	     String[] split = value.toString().split("\t");
    	     String person =split[0];
    	      String[] friends =split[1].split(",");
    	   //   将Value值进行排序，避免重复的结果
    	      Arrays.sort(friends);
    	      for (int i = 0; i <friends.length; i++) {
    				for (int j = i+1; j < friends.length; j++) {
    					context.write(new Text(friends[i]+"-"+friends[j]+"-->"),new Text(person));
    				}
    	   }
     }
}
class FriendsReducer2 extends Reducer<Text, Text, Text, Text>{
	@Override
	protected void reduce(Text key, Iterable<Text> values,Context context)
			throws IOException, InterruptedException {
		StringBuffer buffer=new StringBuffer();
		for (Text friend1 : values) {
			buffer.append(friend1).append(",");
		}
		context.write(key,new Text(buffer.toString()));
		//C:\Users\hp\Desktop\part-r-00000	
		}	
	}

这样就可以得出A-B——>E的形式

pom.xml文件

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.qianfeng</groupId>
  <artifactId>Friends</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  
   <dependencies>
  <dependency> 
     <groupId>org.apache.hadoop</groupId>
     <artifactId>hadoop-mapreduce-client-core</artifactId>
     <version>2.7.2</version>
  </dependency>
  <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
       <version>2.7.2</version>
  </dependency>
  <dependency>
            <groupId>jdk.tools</groupId>
            <artifactId>jdk.tools</artifactId>
            <version>1.8</version>
            <scope>system</scope>
            <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
     </dependency>
     <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-common -->
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-common</artifactId>
        <version>2.7.2</version>
    </dependency>
       <dependency>
            <groupId>jdk.tools</groupId>
            <artifactId>jdk.tools</artifactId>
            <version>1.8</version>
            <scope>system</scope>
            <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
     </dependency>
     
  </dependencies>  
   <build>
  <!-- 配置了很多插件 -->
		<plugins>
   <plugin>  
                <groupId>org.apache.maven.plugins</groupId>  
                <artifactId>maven-compiler-plugin</artifactId>  
                <configuration>  
                    <source>1.8</source>  
                    <target>1.8</target>  
                </configuration>  
            </plugin> 
            </plugins>
            </build> 
</project>