package mp05.com;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
public class RecommenderIntro {
//下面是一个基于用户的简单的推荐
//探究用户与用户之间的相似性,简单的说就是你有一个好基友,他喜欢这首歌,那么你喜欢这首歌的可能性很大。
public static void main(String[] args) throws TasteException, Exception {
try {
DataModel model=new FileDataModel(new File("/home/xuyao/mahout/test_data/intro.csv"));
//UserSimilarity封装了用户间相似性的概念
UserSimilarity similarity=new PearsonCorrelationSimilarity(model);
//UserNeighborhood封装了最相似用户组的概念. 2是用户的邻域,指的是最相似的几个用户
UserNeighborhood neighborhood=new NearestNUserNeighborhood(2,similarity,model);
//Recommender推荐引擎
Recommender recommender=new GenericUserBasedRecommender(model,neighborhood,similarity);
List<RecommendedItem> recommendations=recommender.recommend(1,1);
for(RecommendedItem recommendation : recommendations)
System.out.println(recommendation);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
evaluator();
}
//配置并评估一个推荐程序,这里也是基于用户的推荐
public static void evaluator() throws IOException, TasteException{
DataModel model=new FileDataModel(new File("/home/xuyao/mahout/ua.base"));
RecommenderEvaluator evaluator=new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder =new RecommenderBuilder() {
public Recommender buildRecommender(DataModel model) throws TasteException {
//PearsonCorrelationSimilarity:相似性度量标准--皮尔逊相关系数
UserSimilarity similarity=new PearsonCorrelationSimilarity(model);
//EuclideanDistanceSimilarity: 相似性度量标准--欧式距离
UserSimilarity similarity_2=new EuclideanDistanceSimilarity(model);
//TanimotoCoefficientSimilarity: 相似性度量标准--谷本系数--完全抛开偏好值
UserSimilarity similarity_3=new TanimotoCoefficientSimilarity(model);
//NearestNUserNeighborhood :固定大小的邻域。。改变这个100可以得到不同的打分,所以这个是可以用来调优的
UserNeighborhood neighborhood=new NearestNUserNeighborhood(100,similarity,model);
//下面是另一个表示邻域的,用的是基于阈值的邻域。。其中0.5为可调优。
UserNeighborhood neighborhood_2=new ThresholdUserNeighborhood(0.5, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
//0.9指的是训练90%的数据,测试10%的数据。 而1.0指的是输入的数据的比例。 这里表示数据集全部输入,其中90%用来训练,另外10%用来测试。
double socre =evaluator.evaluate(builder, null, model, 0.9, 1.0);
//这个socre表示这个模型的打分,分数越小表示这个模型越好。
System.out.println(socre);
}
//下面是基于物品的推荐,简单的说就是你的电脑有360安全卫士,360杀毒,360浏览器,于是说你比较喜欢360的产品,就给你推荐360WIFI。
public static void evaluator_2() throws IOException{
DataModel model=new FileDataModel(new File("/home/xuyao/mahout/ua.base"));
RecommenderBuilder builder =new RecommenderBuilder() {
public Recommender buildRecommender(DataModel model) throws TasteException {
ItemSimilarity similarity =new PearsonCorrelationSimilarity(model);
return new GenericItemBasedRecommender(model, similarity);
}
};
}
}
到这个地址下面 http://grouplens.org/datasets/movielens/
下载100K的。解压找到ua.base
intro.csv数据如下:
1,101,5
1,102,3
1,103,2.5
2,101,2
2,102,2.5
2,103,5
2,104,2
3,101,2.5
3,104,4
3,105,4.5
3,107,5
4,101,5
4,103,3
4,104,4.5
4,106,4
5,101,4
5,102,3
5,103,2
5,104,4
5,105,3.5
5,106,4