转载地址:http://blog.youkuaiyun.com/xuedingkai/article/details/78997928
输入:邻接表
- 100, 200 300 400 500 600
- 200, 100 300 400
- 300, 100 200 400 500
- 400, 100 200 300
- 500, 100 300
- 600, 100
需求:查找两两用户的共同好友。
思路:1、key为两两用户,value为其中一个用户的所有好友
2、求两个用户所有好友的交集
步骤:1、map:取每一行,组合user和其任一好友为key(key中的两个字段按字典序排列),user的所有好友为value
2、reduce:求两个用户之间好友的交集
- package dabook;
- import hadoop.FriendRecom;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Set;
- import java.util.TreeSet;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- import org.apache.hadoop.util.GenericOptionsParser;
- public class CommFriend {
- public static Set<String> intersect(Set<String> set1, Set<String> set2){
- if(set1==null || set2 == null){
- return null;
- }
- Set<String> result = new TreeSet<String>();
- Set<String> small = null;
- Set<String> big = null;
- if(set1.size() < set2.size()){
- small = set1;
- big = set2;
- }
- else {
- small = set2;
- big = set1;
- }
- for (String String : small) {
- if(big.contains(String)){
- result.add(String);
- }
- }
- return result;
- }
- static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
- private static Text outKey = new Text();
- private static Text outValue = new Text();
- @Override
- protected void map(LongWritable key, Text value, Context context)
- throws IOException, InterruptedException {
- String [] input = value.toString().split(",");
- if(input.length != 2){
- return;
- }
- outValue.set(input[1]);
- String [] sz = input[1].split(" ");
- for (String string : sz) {
- if(input[0].compareTo(string) < 0){
- outKey.set("[" + input[0] + ", " + string + "]");
- }
- else {
- outKey.set("[" + string + ", " + input[0] + "]");
- }
- context.write(outKey, outValue);
- }
- }
- }
- static class MyReducer extends Reducer<Text, Text, Text, Text>{
- private Text outKey = new Text();
- private Text outValue = new Text();
- @Override
- protected void reduce(Text key, Iterable<Text> value, Context context)
- throws IOException, InterruptedException {
- int len = 0;
- Set<String> set1 = new TreeSet<String>();
- Set<String> set2 = new TreeSet<String>();
- ArrayList<String> arrayList = new ArrayList<String>();
- for (Text text : value) {
- arrayList.add(text.toString());
- len++;
- }
- if(len != 2){
- return;
- }
- String [] sz = arrayList.get(0).split(" ");
- for (String s : sz) {
- set1.add(s);
- }
- sz = arrayList.get(1).trim().split(" ");
- for (String s : sz) {
- set2.add(s);
- }
- Set<String> res = intersect(set1, set2);
- if(res == null){
- return;
- }
- StringBuilder sb = new StringBuilder();
- for (String s : res) {
- sb.append(s + ", ");
- }
- String substring = null;
- if(sb.length() > 1){
- substring = sb.substring(0, sb.length()-2);
- }
- if(substring != null){
- this.outValue.set(substring);
- context.write(key, outValue);
- }
- }
- }
- private static String inputPath = "dabook/commfriend";
- private static String outputPath = "dabook/commfriend-out";
- public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
- Configuration conf = new Configuration();
- String[] otherArgs = new GenericOptionsParser(conf, args)
- .getRemainingArgs();
- Job job = new Job(conf, "common friend");
- job.setJarByClass(CommFriend.class);
- job.setMapperClass(MyMapper.class);
- job.setReducerClass(MyReducer.class);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Text.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Text.class);
- FileSystem fs = FileSystem.get(conf);
- Path inPath = new Path(inputPath);
- if (fs.exists(inPath)) {
- FileInputFormat.addInputPath(job, inPath);
- }
- Path outPath = new Path(outputPath);
- fs.delete(outPath, true);
- FileOutputFormat.setOutputPath(job, outPath);
- System.exit(job.waitForCompletion(true) ? 0 : 1);
- }
- private void test(){
- Set<String> set1 = new TreeSet<String>();
- Set<String> set2 = new TreeSet<String>();
- set2.add("2");
- set2.add("3");
- set2.add("4");
- set2.add("5");
- set1.add("3");
- set1.add("4");
- set1.add("6");
- Set<String> res = intersect(set1, set2);
- for (String string : res) {
- System.out.println(string);
- }
- }
- }
- [100, 200] 300, 400
- [100, 300] 200, 400, 500
- [100, 400] 200, 300
- [100, 500] 300
- [200, 300] 100, 400
- [200, 400] 100, 300
- [300, 400] 100, 200
- [300, 500] 100