Hadoop实现协同过滤算法

Step1

/**
 *第一步去重
 * @author Administrator
 *
 */
public class Step1 {
static class Step1_Mapper extends Mapper<LongWritable, Text, Text, NullWritable>{


protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
if(key.get()!=0){
context.write(value,NullWritable.get());
}
}
}
 
static class Step1_Reducer extends Reducer<Text, IntWritable, Text, NullWritable>{


protected void reduce(Text key, Iterable<IntWritable> i,
Context context)
throws IOException, InterruptedException {
context.write(key,NullWritable.get());
}
}
 
 
}


Step2


/**
 * 按用户分组,计算所有物品出现的组合列表,得到用户对物品的喜爱度得分矩阵
 * @author Administrator
 *u13 i160:1,
*u14 i25:1,i223:1,
*u16 i252:1,
*u21 i266:1,
*u24 i64:1,i218:1,i185:1,
*u26 i276:1,i201:1,i348:1,i321:1,i136:1,
 */
public class Step2 {
//按用户分组,计算所有物品出现的组合列表,得到用户对物品的喜爱度得分矩阵
static class Step2_Mapper extends Mapper<LongWritable, Text, Text, Text>{


//如果使用:用户+物品,同时作为输出key,更优
protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
String[] tokens=value.toString().split(",");
String user=tokens[1];
String action =tokens[2];
Text k=new Text(user);
Integer rv=StartRun.R.get(action);
Text v=new Text(item+":"+rv.intValue());
//key:user  v :item:action
context.write(k,v);
}
}
//u1  i1    3
//按照key   user 进行分组
/*
i100:i100 3
i100:i105 1
i100:i106 1
i100:i109 1
*/
 
//按用户分组,计算所有物品出现的组合列表,得到用户对物品的喜爱度得分矩阵
static class Step2_Reducer extends Reducer<Text, Text, Text, Text>{


protected void reduce(Text key, Iterable<Text> i,
Context context)
throws IOException, InterruptedException {
Map<String,String> r=new HashMap<String,Integer>();

for(Text value:i){
String [] vs=value.toString().split(":");
String item=vs[0];
Integer action=Integer.parseInt(vs[1]);
//得到喜爱度
action=(r.get(item)==null?0:r.get(item)).intvalue()+action;
r.put(item,action);
}
//得到同现组合列表
StringBuffer sb=new StringBuffer();
for(Entry<String,Integer> entry:r.entrySet()){
sb.append(entry.getKey()+":"+entry.getValue().intValue()+",");
}
//key?  value item:action,item:action  
//key 是用户
//u1  item:action   ,   item,action  这是物品出现的组合列表  
context.write(key,new Text(sb.toString()));

}
}
}


Step3


/**
 * 对物品组合列表进行计数,建立物品的同现矩阵
 * @author Administrator
 *
 */
public class Step3 {
//i100:i100 3
static class Step3_Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{


protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
String[] tokens=value.toString().split("\t");
String[] items=tokens[1].split(",");
for(int i = 0; i < items.length; i++){
String itemA=items[i].split(":")[0];
for(int j = 0; j < items.length; j++){
String itemB=items[j].split(":");
//相同的key在此时会进行分组      
K.set(itemA+":"+itemB);
context.write(K,V);
}
}
}
}
//进行 统计  计算 同现 次数   sum
static class Step3_Reducer extends Reducer<Text, IntWritable, Text, IntWritable>{


protected void reduce(Text key, Iterable<IntWritable> i,
Context context)
throws IOException, InterruptedException {
int sum=0;
for(IntWritable v:i){
sum=sum+v.get();
}
V.set(sum);
context.write(key,V);
}

}
}


Step4


/**
 * 把同现矩阵和得分矩阵相乘
 * @author Administrator
 *
 */
public class Step4 {
static class Step4_Mapper extends Mapper<LongWritable, Text, Text, Text> {

private String flag;//A同现矩阵 or B得分矩阵
//每个maptask,初始化时调用一次
protected void setup(Context context) throws IOException,
InterruptedException {
FileSplit split = (FileSplit) context.getInputSplit();
flag = split.getPath().getParent().getName();// 判断读的数据集


System.out.println(flag + "**********************");
}

protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {

String [] tokens=Pattern.compile("[\t]").split(value.toString());

if(flag.equals("step3")){//同现矩阵
String [] v1=tokens[0].split(":");
String itemID1=v1[0];
String itemID2 = v1[1];
String num = tokens[1];
Text k=new Text(itemID1);//以前一个物品为key 比如i100
Text v=new Text("A"+itemID2+","+num);//A:i109,1

context.write(k,v);
}
} else if (flag.equals("step2")) {// 用户对物品喜爱得分矩阵
String userID = tokens[0];
for (int i = 1; i < tokens.length; i++) {
String[] vector = tokens[i].split(":");
String itemID = vector[0];// 物品id
String pref = vector[1];// 喜爱分数


Text k = new Text(itemID); // 以物品为key 比如:i100
Text v = new Text("B:" + userID + "," + pref); // B:u401,2


context.write(k, v);
}
}
}

static class Step4_Reducer extends Reducer<Text, Text, Text, Text> {
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
// A同现矩阵 or B得分矩阵
//某一个物品,针对它和其他所有物品的同现次数,都在mapA集合中
//和该物品同现的   其他物品同现集合  key中的itemID
Map<String,Integer> mapA=new HashMap<String,Integer>();
//物品的推荐权重    物品对所有用户    可以说是 喜爱指数key中的itemID
Map<String, Integer> mapB = new HashMap<String, Integer>();

for(Text line:values){
String val=line.toString();
if(val.startsWith("A:"){// 表示物品同现数字
//正则表达式
String [] kv=Pattern.compile("[\t,]").split(
val.substring(2));
mapA.put(kv[0], Integer.parseInt(kv[1]));
}else if (val.startsWith("B:")) {
String[] kv = Pattern.compile("[\t,]").split(
val.substring(2));
try {
mapB.put(kv[0], Integer.parseInt(kv[1]));
} catch (Exception e) {
e.printStackTrace();
}
}
}


double result=0;
Iterator<String> iter=mapA.keySet().iterator();
while(iter.hasNext()){
String mapk=iter.next();//itemID
int num = mapA.get(mapk).intValue();
Iterator<String> iterb = mapB.keySet().iterator();
while (iterb.hasNext()) {
String mapkb = iterb.next();// userID
int pref = mapB.get(mapkb).intValue();
result = num * pref;// 矩阵乘法相乘计算


Text k = new Text(mapkb);
Text v = new Text(mapk + "," + result);
context.write(k, v);

//得到的key userid  value  
}
}
}


}
}


Step5


/**
 *把相乘之后的矩阵相加获得结果矩阵
 * @author Administrator
 *
 */
public class Step5 {


static class Step5_Mapper extends Mapper<LongWritable, Text, Text, Text> {


/**
* 原封不动输出
*/
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] tokens = Pattern.compile("[\t,]").split(value.toString());
Text k = new Text(tokens[0]);// 用户为key
Text v = new Text(tokens[1] + "," + tokens[2]);
context.write(k, v);
}
}

static class Step5_Reducer extends Reducer<Text, Text, Text, Text> {
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
Map<String,Double> map=new HashMap<String,Double>();//结果
for(Text line:values){
String[] tokens = line.toString().split(",");
String itemID = tokens[0];
Double score = Double.parseDouble(tokens[1]);

//求和计算
if(map.containsKey(itemID)){
map.put(itemID,map.get(itemID)+score);

} else {
map.put(itemID, score);
}
}

Iterator<String> iter = map.keySet().iterator();
while (iter.hasNext()) {
String itemID = iter.next();
double score = map.get(itemID);
Text v = new Text(itemID + "," + score);
context.write(key, v);
}
}
}
}


Step6


/**
 * 按照推荐得分降序排序,每个用户列出10个推荐物品
 * @author Administrator
 *
 */
public class Step6 {
static class Step6_Mapper extends Mapper<LongWritable, Text, PairWritable, Text> {


protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] tokens = Pattern.compile("[\t,]").split(value.toString());
String u = tokens[0];
String item = tokens[1];
String num = tokens[2];
PairWritable k =new PairWritable();
k.setUid(u);
k.setNum(Double.parseDouble(num));
V.set(item+":"+num);
context.write(k, V);
}
}

static class Step6_Reducer extends Reducer<PairWritable, Text, Text, Text> {
protected void reduce(PairWritable key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
int i=0;
StringBuffer sb =new StringBuffer();
for(Text v :values){
if(i==10)
break;
sb.append(v.toString()+",");
i++;
}
K.set(key.getUid());
V.set(sb.toString());
context.write(K, V);
}

}

}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值