mapreduce代码部分
计算推荐结果
recommend部分代码
Test.java
package my;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
public class Test {
public Map<String, String> path;
public static final Pattern DELIMITER = Pattern.compile("[,\t]");
public Test()
{
path = new HashMap<String, String>();
path.put("Step1Input", "hdfs://localhost:9000/user/hadoop/recommend");
path.put("Step1Output", path.get("Step1Input") + "/step1");
path.put("Step2Input", path.get("Step1Output"));
path.put("Step2Output", path.get("Step1Input") + "/step2");
path.put("Step3Input1", path.get("Step1Output"));
path.put("Step3Output1", path.get("Step1Input") + "/step3_1");
path.put("Step3Input2", path.get("Step2Output"));
path.put("Step3Output2", path.get("Step1Input") + "/step3_2");
path.put("Step4Input1", path.get("Step3Output1"));
path.put("Step4Input2", path.get("Step3Output2"));
path.put("Step4Output", path.get("Step1Input") + "/step4");
}
public Map<String, String> getPath() {
return path;
}
public void setPath(Map<String, String> path) {
this.path = path;
}
}
Cooccurrence.java
package my;
public class Cooccurrence {
private int itemID1;
private int itemID2;
private int num;
public Cooccurrence(int itemID1, int itemID2, int num) {
super();
this.itemID1 = itemID1;
this.itemID2 = itemID2;
this.num = num;
}
public int getItemID1() {
return itemID1;
}
public void setItemID1(int itemID1) {
this.itemID1 = itemID1;
}
public int getItemID2() {
return itemID2;
}
public void setItemID2(int itemID2) {
this.itemID2 = itemID2;
}
public int getNum() {
return num;
}
public void setNum(int num) {
this.num = num;
}
}
MultiTask2.java
package my;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MultiTask2 {
public static final Pattern DELIMITER = Pattern.compile("[\t,]");
public static class Step1Mapper extends Mapper<Object, Text, IntWritable, Text> {
private final static IntWritable k = new IntWritable();
private final static Text v = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] tokens = DELIMITER.split(value.toString());
int userID = Integer.parseInt(tokens[0]);
String itemID = tokens[1];
String pref = tokens[2];
k.set(userID);
v.set(itemID + ":" + pref);//133 24 5----->133 24:5
context.write(k, v);
}
}
public static class Step1Reducer extends Reducer<IntWritable, Text, IntWritable, Text>{
private final static Text v = new Text();
public void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
for (Text val : values) {
sb.append("," + val.toString());
}
v.set(sb.toString().replaceFirst(",", ""));//133 24:5,57:3,41:4
context.write(key, v);
}
}
public static class Step2Mapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static Text k = new Text();
private final static IntWritable v = new IntWritable(1);
public void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
String[] tokens = Test.DELIMITER.split(values.toString());
for (int i = 1; i < tokens.length; i++) {
String itemID = tokens[i].split(":")[0];
for (int j = 1; j < tokens.length; j++) {
String itemID2 = tokens[j].split(":")[0];
k.set(itemID + ":" + itemID2);
context.write(k, v);
}
}
}
}
public static class Step2Reducer extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static class Step3Mapper1 extends Mapper<LongWritable, Text, IntWritable, Text> {
private final static IntWritable k = new IntWritable();
private final static Text v = new Text();
@Override
public void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
String[] tokens = DELIMITER.split(values.toString());
for (int i = 1; i < tokens.length; i++) {
String[] vector = tokens[i].split(":");
int itemID = Integer.parseInt(vector[0]);
String pref = vector[1];
k.set(itemID);
v.set(tokens[0] + ":" + pref);
context.write(k,v);
}
}
}
public static class Step3Mapper2 extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static Text k = new Text();
private final static IntWritable v = new IntWritable();
@Override
public void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
String[] tokens = DELIMITER.split(values.toString());
k.set(tokens[0]);
v.set(Integer.parseInt(tokens[1]));
context.write(k,v);
}
}
public static class Step4Mapper extends Mapper<LongWritable, Text, IntWritable, Text> {
private final static IntWritable k = new IntWritable();
private final static Text v = new Text();
private final static Map<Integer, List<Cooccurrence>> cooccurrenceMatrix = new HashMap<Integer, List<Cooccurrence>>();
public void map(LongWritable key, Text values,Context context) throws IOException, InterruptedException {
String[] tokens = DELIMITER.split(values.toString());
String[] v1 = tokens[0].split(":");
String[] v2 = tokens[1].split(":");
if (v1.length > 1) {// cooccurrence
int itemID1 = Integer.parseInt(v1[0]);
int itemID2 = Integer.parseInt(v1[1]);
int num = Integer.parseInt(tokens[1]);
List list = null;
if (!cooccurrenceMatrix.containsKey(itemID1)) {
list = new ArrayList();
} else {
list = cooccurrenceMatrix.get(itemID1);
}
list.add(new Cooccurrence(itemID1, itemID2, num));
cooccurrenceMatrix.put(itemID1, list);
}
if (v2.length > 1) {// userVector
int itemID = Integer.parseInt(tokens[0]);
int userID = Integer.parseInt(v2[0]);
double pref = Double.parseDouble(v2[1]);
k.set(userID);
for (Cooccurrence co : cooccurrenceMatrix.get(itemID)) {
v.set(co.getItemID2() + "," + pref * co.getNum());
//System.out.println("score:"+co.getItemID2() + "," + pref * co.getNum());
context.write(k, v);
}
}
}
}
public static class Step4Reducer extends Reducer<IntWritable, Text, IntWritable, Text> {
private Text v = new Text();
public void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
Map<String, Double> result = new HashMap<String, Double>();
for(Text vale : values){
String[] str = vale.toString().split(",");
if (result.containsKey(str[0]))
result.put( str[0], result.get(str[0]) + Double.parseDouble(str[1]) );
else {
result.put(str[0], Double.parseDouble(str[1]));
}
}
for (String val : result.keySet()) {
String itemID = (String) val;
double score = result.get(itemID);
v.set(itemID + "," + score);
context.write(key, v);
}
}
}
public static void main(String[] args)throws Exception {
Test test=new Test();
String input1 = test.getPath().get("Step1Input");
String output1 = test.getPath().get("Step1Output");
Configuration conf = new Configuration();
Job job1 = new Job(conf, "job1");
job1.setJarByClass(MultiTask2.class);
job1.setMapperClass(Step1Mapper.class);
job1.setCombinerClass(Step1Reducer.class);
job1.setReducerClass(Step1Reducer.class);
job1.setOutputKeyClass(IntWritable.class);
job1.setOutpu