基于物品的推荐算法

最新推荐文章于 2024-07-12 08:03:46 发布

原创

最新推荐文章于 2024-07-12 08:03:46 发布 · 2.1k 阅读

12 ·

CC 4.0 BY-SA版权

本文介绍了使用MapReduce实现基于物品的推荐算法的过程，包括构建评分矩阵、计算物品相似度矩阵、转置评分矩阵、计算推荐分数以及处理已评分商品等步骤。详细展示了每个步骤的输入、输出和具体实现。

小菜鸟注册优快云账号好久了，最近才决定开始记录自己的学习经历。哈哈^_^

前几天学习了基于map-reduce的物品推荐算法的实现，写下来和大家分享。

首先，整段代码可分为5步：

step1.根据用户行为列表构建评分矩阵。

step2.利用评分矩阵，构建物品与物品的相似度矩阵。（在这里利用余弦相似度计算物品与物品的相似度矩阵）

多维向量的余弦相似度：

a(A1,A2,A3.....An)

b(B1,B2,B3,....Bn)

step3.将评分矩阵转置。

step4.物品与物品相似度矩阵*评分矩阵（经过步骤3转置）。

step5.根据评分矩阵，将步骤4的输出中，用户已经有过行为的商品评分置0 。

接下来，我们看看具体实现。

step1.根据用户行为列表构建评分矩阵。

输入矩阵每列代表：用户ID;物品ID；分值输入文件路径为： /ItemCF/step1_input/ActionList.txt

eg：输入矩阵：

     A,1,1
      C,3,5
      B,2,3
      B,5,3
      B,6,5
      A,2,10
      C,3,10
      C,4,5
      C,1,5
      A,1,1
      A,6,5
      A,4,3

<---------------step1的map阶段----------------->

package step1;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;

public class Mapper1 extends Mapper<LongWritable,Text,Text,Text>{
	
	private Text outKey = new Text();
	private Text outValue = new Text();
	
	protected void map(LongWritable key,Text value,Mapper<LongWritable,Text,Text,Text>.Context context)
			throws IOException, InterruptedException{
	try{
		String[] values = value.toString().split(",");
		String userID = values[0];
		String itemID = values[1];
		String score = values[2];
		
		outKey.set(itemID);
		outValue.set(userID+"_"+score);
		
		context.write(outKey, outValue);
	  }catch(Exception e){
		  e.printStackTrace();
		 }
	}
}

<---------------step1的reduce阶段----------------->

package step1;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;

public class Reducer1 extends Reducer<Text,Text,Text,Text> {
	private Text outKey = new Text();
	private Text outValue = new Text();
	
	protected void reduce(Text key,Iterable<Text> values,Context context)
			throws IOException,InterruptedException{
		try{
		String itemID = key.toString();
		
		//<userID,score>
		Map<String,Integer> map = new HashMap<String,Integer>();
		for(Text value: values){ 
			String userID = value.toString().split("_")[0];
			String score = value.toString().split("_")[1];
			
			if(map.get(userID) == null){
				map.put(userID, Integer.valueOf(score));
			}else{
				Integer preScore = map.get(userID);
				map.put(userID, preScore+Integer.valueOf(score));
			}
		}
		
		StringBuilder sBuilder = new StringBuilder();
		for(Map.Entry<String, Integer> entry:map.entrySet()){
			String userID = entry.getKey();
			String score = String.valueOf(entry.getValue());
			sBuilder.append(userID + "_" + score + ",");

		}
		
		String line = null;
		if(sBuilder.toString().endsWith(",")){
			line = sBuilder.substring(0,sBuilder.length()-1);
		}
		
		outKey.set(itemID);
		outValue.set(line);
		
		context.write(outKey, outValue);
		}catch(Exception e){
			e.printStackTrace();
		}	
	}
}

<---------------step1的主函数----------------->

package step1;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;



public class MR1 {
	private static String inputPath = "/ItemCF/step1_input/ActionList.txt";
	private static String outputPath = "/ItemCF/step1_output/";
	private static String hdfsPath = "hdfs://hadoop:9000";
	
	public int run(){
		try{
			//创建job配置类
			Configuration conf = new Configuration();
			//设置hdfs地址
			conf.set("fs.defaultFS",hdfsPath);
			//创建job实例
			Job job = Job.getInstance(conf, "step1");
			//添加分布式缓存文件
			//job.addCacheArchive(new URI(cache + "matrix2"));
			//设置Job主类
			job.setJarByClass(MR1.class);
			//设置Job的Mapper类和Reducer类
			job.setMapperClass(Mapper1.class);
			job.setReducerClass(Reducer1.class);
			//设置Mapper的输出类型
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(Text.class);
			//设置Reducer的输出类型
			job.setOutputKeyClass(Text.class);
			job.setOutputValueClass(Text.class);
			//设置输入和输出路径
			FileSystem fs = FileSystem.get(conf);
			Path inPath = new Path(inputPath);
			if(fs.exists(inPath)){
				FileInputFormat.addInputPath(job, inPath);
			}
			
			Path outPath = new Path(outputPath);
			fs.delete(outPath,true);
			
			FileOutputFormat.setOutputPath(job, outPath);
			
			return job.waitForCompletion(true)? 1:-1;
			
		}catch(IOException e){
			e.printStackTrace();
		} catch (ClassNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (InterruptedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return 1;
		
   }
	public static void main(String[] args){
		try{
		int result = -1;
		result = new MR1().run();
		System.out.println("result0 = "+result);
		if(result == 1){
			System.out.println("result1 = "+result);
			System.out.println("success");
			}else if(result == -1){
				System.out.println("result2 = "+result);
				System.out.println("defeat");
			}
		}catch(Exception e){
		e.printStackTrace();
	 }
	}
}

经step1阶段输出结果为：

输出矩阵每列代表：物品ID(行);用户ID(列);分值输出路径为 /ItemCF/step1_output/。

输出结果：

1 A_2,C_5
&nb