推荐系统的评估
使用平均绝对值差和均方根来进行评估。
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.RMSRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.common.RandomUtils;
import java.io.File;
public class EvaluatorIntro {
private EvaluatorIntro(){
}
//因为使用重复的随机数据来进行测试去看两个指标的结果,保证每次随机都一样,只用于测试,在开发生产中不使用
RandomUtils.useTestSeed();
DataModel model = new FileDataModel(new File("/root/data/*.base"));
//创建评估器利用平均绝对值差(MAE:mean average error)来评分
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
//创建评估器利用均方根(RMS:root mean square数据的平方的平均值的平方根)来评分
RecommenderEvaluator recommenderEvaluator = new RMSRecommenderEvaluator();
//RecommenderBuilder是一个辅助类,帮助我们实现根据数据模型实现推荐系统的评估
RecommenderBuilder recommenderBuilder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);//相似度计算方式
UserNeighborhood neighborhood = new NearestNUserNeighborhood(100,similarity,model);
return new GenericUserBasedRecommender(model,neighborhood,similarity);
}
};
double maeScore = evaluator.evaluate(recommenderBuilder,null,model,0.7,1.0);
double rmsScore = recommenderEvaluator.evaluate(recommenderBuilder,null,model,0.7,1.0);
//这个方法解释:evaluate(RecommenderBuilder recommenderBuilder,DataModelBuilder dataModelBuilder, DataModel dataModel, double trainingPercentage, double evaluationPercentage)Evaluates the quality of a Recommender's recommendations.
//如果数据比较复杂就用dataModelBuilder,常规的数据就用DataModel把dataModelBuilder参数为null,参数0.7表示是训练70%的数据,测试30%的数据,1.0是指使用数据的比例。
System.out.println(maeScore );//输出这个模型的打分,分数越接近0表示这个模型越好
System.out.println(rmsScore );//分数越接近0表示这个模型越好
}
}
输出结果:
可以更换不同的相似度计算方法来测试一下。
举两个例子如使用:UncenteredCosineSimilarity(弦距离相似度)
输出结果:
如:CityBlockSimilarity(曼哈顿距离相似度)
输出结果:
均方根公式:平方的平均值的平方根
平均绝对值差公式: