Slope One Predictors for Online Rating-Based Collaborative Filtering
http://www.daniel-lemire.com/fr/abstracts/SDM2005.html
Abstract:
Rating-based collaborative filtering is the process of predicting how a user would rate a given item from other user ratings. We propose three related slope one schemes with predictors of the form f(x) = x b, which precompute the average difference between the ratings of one item and another for users who rated both. Slope one algorithms are easy to implement, efficient to query, reasonably accurate, and they support both online queries and dynamic updates, which makes them good candidatesfor real-world systems. The basic slope one scheme is suggested as a new reference scheme for collaborative filtering. By factoring in items that a user liked separately from items that a user disliked, we achieve results competitive with slower memory-based schemes over the standard benchmark EachMovie and Movielens data sets while better fulfilling the desiderata of CF applications.
基于评分的在线协同过滤查询是由用户通过对商品进行评分得到的<Item , rating>而组成。查询响应是由那些用户没有评分的商品的预测值组成的<Item, rating>。Slope One 算法试图同时满足这样的5个目标:
1. 易于实现和维护:普通工程师可以轻松解释所有的聚合数据,并且算法易于实现和测试。
2. 运行时可更新的:新增一个评分项,应该对预测结果即时产生影响。
3. 高效率的查询响应:快速的执行查询,可能需要付出更多的空间占用作为代价。
4. 对初次访问者要求少:对于一个评分项目很少的用户,也应该可以获得有效的推荐。
5. 合理的准确性:与最准确的方法相比,此方法应该是有竞争力的,准确性方面的微小增长不能以简单性和扩展性的大量牺牲为代价。
Slope One算法例子:

用户A对商品I的评分为1,对商品J的评分是1.5;用户B对商品A的评分时2,对商品J的评分未知。现在需要预测这个分值,采用Slope one算法:2 (1.5-1)=2.5。
加权算法:

首先计算item1和item2的平均差值,((5-3) (3-4))/2=0.5,还有item1和item3的平均差值,就是5-2=3,然后推算lucy对item1的评分,根据item1和item2的平均差值来看lucy对item1的评分可能为2 0.5=2.5,同理根据item1和item3的平均差值lucy对item1的评分可能为5 3=8.
现在如何取舍那?使用加权平均数应该是一种比较好的方法:(因为2.5是根据两个值推算的,8是通过一个只推算的)
java实现
import java.util.*;
public class SlopeOne {
public static void main(String args[]){
// this is my data base
Map<UserId,Map<ItemId,Float>> data = new HashMap<UserId,Map<ItemId,Float>>();
// items
ItemId item1 = new ItemId(" candy");
ItemId item2 = new ItemId(" dog");
ItemId item3 = new ItemId(" cat");
ItemId item4 = new ItemId(" war");
ItemId item5 = new ItemId("strange food");
mAllItems = new ItemId[]{item1, item2, item3, item4, item5};
//I'm going to fill it in
HashMap<ItemId,Float> user1 = new HashMap<ItemId,Float>();
HashMap<ItemId,Float> user2 = new HashMap<ItemId,Float>();
HashMap<ItemId,Float> user3 = new HashMap<ItemId,Float>();
HashMap<ItemId,Float> user4 = new HashMap<ItemId,Float>();
user1.put(item1,1.0f);
user1.put(item2,0.5f);
user1.put(item4,0.1f);
data.put(new UserId("Bob"),user1);
user2.put(item1,1.0f);
user2.put(item3,0.5f);
user2.put(item4,0.2f);
data.put(new UserId("Jane"),user2);
user3.put(item1,0.9f);
user3.put(item2,0.4f);
user3.put(item3,0.5f);
user3.put(item4,0.1f);
data.put(new UserId("Jo"),user3);
user4.put(item1,0.1f);
//user4.put(item2,0.4f);
//user4.put(item3,0.5f);
user4.put(item4,1.0f);
user4.put(item5,0.4f);
data.put(new UserId("StrangeJo"),user4);
// next, I create my predictor engine
SlopeOne so = new SlopeOne(data);
System.out.println("Here's the data I have accumulated...");
so.printData();
// then, I'm going to test it out...
HashMap<ItemId,Float> user = new HashMap<ItemId,Float>();
System.out.println("Ok, now we predict...");
user.put(item5,0.4f);
System.out.println("Inputting...");
SlopeOne.print(user);
System.out.println("Getting...");
SlopeOne.print(so.predict(user));
//
user.put(item4,0.2f);
System.out.println("Inputting...");
SlopeOne.print(user);
System.out.println("Getting...");
SlopeOne.print(so.predict(user));
}
Map<UserId,Map<ItemId,Float>> mData;
Map<ItemId,Map<ItemId,Float>> mDiffMatrix;
Map<ItemId,Map<ItemId,Integer>> mFreqMatrix;
static ItemId[] mAllItems;
public SlopeOne(Map<UserId,Map<ItemId,Float>> data) {
mData = data;
buildDiffMatrix();
}
public Map<ItemId,Float> predict(Map<ItemId,Float> user) {
HashMap<ItemId,Float> predictions = new HashMap<ItemId,Float>();
HashMap<ItemId,Integer> frequencies = new HashMap<ItemId,Integer>();
for (ItemId j : mDiffMatrix.keySet()) {
frequencies.put(j,0);
predictions.put(j,0.0f);
}
for (ItemId j : user.keySet()) {
for (ItemId k : mDiffMatrix.keySet()) {
try {
float newval = ( mDiffMatrix.get(k).get(j).floatValue() user.get(j).floatValue() ) * mFreqMatrix.get(k).get(j).intValue();
predictions.put(k, predictions.get(k) newval);
frequencies.put(k, frequencies.get(k) mFreqMatrix.get(k).get(j).intValue());
} catch(NullPointerException e) {}
}
}
HashMap<ItemId,Float> cleanpredictions = new HashMap<ItemId,Float>();
for (ItemId j : predictions.keySet()) {
if (frequencies.get(j)>0) {
cleanpredictions.put(j, predictions.get(j).floatValue()/frequencies.get(j).intValue());
}
}
for (ItemId j : user.keySet()) {
cleanpredictions.put(j,user.get(j));
}
return cleanpredictions;
}
public Map<ItemId,Float> weightlesspredict(Map<ItemId,Float> user) {
HashMap<ItemId,Float> predictions = new HashMap<ItemId,Float>();
HashMap<ItemId,Integer> frequencies = new HashMap<ItemId,Integer>();
for (ItemId j : mDiffMatrix.keySet()) {
predictions.put(j,0.0f);
frequencies.put(j,0);
}
for (ItemId j : user.keySet()) {
for (ItemId k : mDiffMatrix.keySet()) {
//System.out.println("Average diff between " j " and " k " is " mDiffMatrix.get(k).get(j).floatValue() " with n = " mFreqMatrix.get(k).get(j).floatValue());
float newval = ( mDiffMatrix.get(k).get(j).floatValue() user.get(j).floatValue() ) ;
predictions.put(k, predictions.get(k) newval);
}
}
for (ItemId j : predictions.keySet()) {
predictions.put(j, predictions.get(j).floatValue()/user.size());
}
for (ItemId j : user.keySet()) {
predictions.put(j,user.get(j));
}
return predictions;
}
public void printData() {
for(UserId user : mData.keySet()) {
System.out.println(user);
print(mData.get(user));
}
for (int i=0; i<mAllItems.length; i ) {
System.out.print("\n" mAllItems[i] ":");
printMatrixes(mDiffMatrix.get(mAllItems[i]), mFreqMatrix.get(mAllItems[i]));
}
}
private void printMatrixes(Map<ItemId,Float> ratings,
Map<ItemId,Integer> frequencies) {
for (int j=0; j<mAllItems.length; j ) {
System.out.format(".3f", ratings.get(mAllItems[j]));
System.out.print(" ");
System.out.format("d", frequencies.get(mAllItems[j]));
}
System.out.println();
}
public static void print(Map<ItemId,Float> user) {
for (ItemId j : user.keySet()) {
System.out.println(" " j " --> " user.get(j).floatValue());
}
}
public void buildDiffMatrix() {
mDiffMatrix = new HashMap<ItemId,Map<ItemId,Float>>();
mFreqMatrix = new HashMap<ItemId,Map<ItemId,Integer>>();
// first iterate through users
for(Map<ItemId,Float> user : mData.values()) {
// then iterate through user data
for(Map.Entry<ItemId,Float> entry: user.entrySet()) {
if(!mDiffMatrix.containsKey(entry.getKey())) {
mDiffMatrix.put(entry.getKey(), new HashMap<ItemId,Float>());
mFreqMatrix.put(entry.getKey(), new HashMap<ItemId,Integer>());
}
for(Map.Entry<ItemId,Float> entry2: user.entrySet()) {
int oldcount = 0;
if(mFreqMatrix.get(entry.getKey()).containsKey(entry2.getKey()))
oldcount = mFreqMatrix.get(entry.getKey()).get(entry2.getKey()).intValue();
float olddiff = 0.0f;
if(mDiffMatrix.get(entry.getKey()).containsKey(entry2.getKey()))
olddiff = mDiffMatrix.get(entry.getKey()).get(entry2.getKey()).floatValue();
float observeddiff = entry.getValue() - entry2.getValue();
mFreqMatrix.get(entry.getKey()).put(entry2.getKey(),oldcount 1);
mDiffMatrix.get(entry.getKey()).put(entry2.getKey(),olddiff observeddiff);
}
}
}
for (ItemId j : mDiffMatrix.keySet()) {
for (ItemId i : mDiffMatrix.get(j).keySet()) {
float oldvalue = mDiffMatrix.get(j).get(i).floatValue();
int count = mFreqMatrix.get(j).get(i).intValue();
mDiffMatrix.get(j).put(i,oldvalue/count);
}
}
}
}
class UserId {
String content;
public UserId(String s) {
content = s;
}
public int hashCode() { return content.hashCode();}
public String toString() { return content; }
}
class ItemId {
String content;
public ItemId(String s) {
content = s;
}
public int hashCode() { return content.hashCode();}
public String toString() { return content; }
}
C#实现:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace SlopeOne
{
public class Rating
{
public float Value { get; set; }
public int Freq { get; set; }
public float AverageValue
{
get { return Value / Freq; }
}
}
public class RatingDifferenceCollection : Dictionary<string, Rating>
{
private string GetKey(int Item1Id, int Item2Id)
{
return (Item1Id < Item2Id) ? Item1Id "/" Item2Id : Item2Id "/" Item1Id ;
}
public bool Contains(int Item1Id, int Item2Id)
{
return this.Keys.Contains<string>(GetKey(Item1Id, Item2Id));
}
public Rating this[int Item1Id, int Item2Id]
{
get {
return this[this.GetKey(Item1Id, Item2Id)];
}
set { this[this.GetKey(Item1Id, Item2Id)] = value; }
}
}
public class SlopeOne
{
public RatingDifferenceCollection _DiffMarix = new RatingDifferenceCollection(); // The dictionary to keep the diff matrix
public HashSet<int> _Items = new HashSet<int>(); // Tracking how many items totally
public void AddUserRatings(IDictionary<int, float> userRatings)
{
foreach (var item1 in userRatings)
{
int item1Id = item1.Key;
float item1Rating = item1.Value;
_Items.Add(item1.Key);
foreach (var item2 in userRatings)
{
if (item2.Key <= item1Id) continue; // Eliminate redundancy
int item2Id = item2.Key;
float item2Rating = item2.Value;
Rating ratingDiff;
if (_DiffMarix.Contains(item1Id, item2Id))
{
ratingDiff = _DiffMarix[item1Id, item2Id];
}
else
{
ratingDiff = new Rating();
_DiffMarix[item1Id, item2Id] = ratingDiff;
}
ratingDiff.Value = item1Rating - item2Rating;
ratingDiff.Freq = 1;
}
}
}
// Input ratings of all users
public void AddUerRatings(IList<IDictionary<int, float>> Ratings)
{
foreach(var userRatings in Ratings)
{
AddUserRatings(userRatings);
}
}
public IDictionary<int, float> Predict(IDictionary<int, float> userRatings)
{
Dictionary<int, float> Predictions = new Dictionary<int, float>();
foreach (var itemId in this._Items)
{
if (userRatings.Keys.Contains(itemId)) continue; // User has rated this item, just skip it
Rating itemRating = new Rating();
foreach (var userRating in userRatings)
{
if (userRating.Key == itemId) continue;
int inputItemId = userRating.Key;
if (_DiffMarix.Contains(itemId, inputItemId))
{
Rating diff = _DiffMarix[itemId, inputItemId];
itemRating.Value = diff.Freq * (userRating.Value diff.AverageValue * ((itemId < inputItemId) ? 1 : -1));
itemRating.Freq = diff.Freq;
}
}
Predictions.Add(itemId, itemRating.AverageValue);
}
return Predictions;
}
public static void Test()
{
SlopeOne test = new SlopeOne();
Dictionary<int, float> userRating = new Dictionary<int, float>();
userRating.Add(1, 5);
userRating.Add(2, 4);
userRating.Add(3, 4);
test.AddUserRatings(userRating);
userRating = new Dictionary<int, float>();
userRating.Add(1, 4);
userRating.Add(2, 5);
userRating.Add(3, 3);
userRating.Add(4, 5);
test.AddUserRatings(userRating);
userRating = new Dictionary<int, float>();
userRating.Add(1, 4);
userRating.Add(2, 4);
userRating.Add(4, 5);
test.AddUserRatings(userRating);
userRating = new Dictionary<int, float>();
userRating.Add(1, 5);
userRating.Add(3, 4);
IDictionary<int, float> Predictions = test.Predict(userRating);
foreach (var rating in Predictions)
{
Console.WriteLine("Item " rating.Key " Rating: " rating.Value);
}
}
}
}